From 1b56f8afb1b2226df7716059050762b37dd84a95 Mon Sep 17 00:00:00 2001 From: justschen Date: Fri, 5 Jun 2026 20:54:34 -0700 Subject: [PATCH 1/4] use risk assesment in general confirmations --- .../chat/browser/chat.shared.contribution.ts | 2 +- .../tools/chatToolRiskAssessmentService.ts | 112 +++++++++++++++--- .../chatElicitationContentPart.ts | 31 +---- .../abstractToolConfirmationSubPart.ts | 23 ++++ ...atMissingSandboxDepsConfirmationSubPart.ts | 4 +- .../chatModifiedFilesConfirmationSubPart.ts | 5 +- .../chatTerminalToolConfirmationSubPart.ts | 45 +------ .../chatToolConfirmationSubPart.ts | 4 +- .../chatToolPostExecuteConfirmationPart.ts | 4 +- .../toolRiskBadgeHelper.ts | 71 +++++++++++ 10 files changed, 213 insertions(+), 88 deletions(-) create mode 100644 src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/toolRiskBadgeHelper.ts diff --git a/src/vs/workbench/contrib/chat/browser/chat.shared.contribution.ts b/src/vs/workbench/contrib/chat/browser/chat.shared.contribution.ts index 3f9c08bb6f1c7..c5d1cb20e0124 100644 --- a/src/vs/workbench/contrib/chat/browser/chat.shared.contribution.ts +++ b/src/vs/workbench/contrib/chat/browser/chat.shared.contribution.ts @@ -1140,7 +1140,7 @@ configurationRegistry.registerConfiguration({ }, [ChatConfiguration.ToolRiskAssessmentEnabled]: { type: 'boolean', - description: nls.localize('chat.tools.riskAssessment.enabled', "When enabled, terminal tool confirmations show an LLM-generated risk level (Safe / Caution / Review carefully) and a short explanation."), + description: nls.localize('chat.tools.riskAssessment.enabled', "When enabled, tool confirmations show an LLM-generated risk level (Safe / Caution / Review carefully) and a short explanation."), default: true, experiment: { mode: 'auto' diff --git a/src/vs/workbench/contrib/chat/browser/tools/chatToolRiskAssessmentService.ts b/src/vs/workbench/contrib/chat/browser/tools/chatToolRiskAssessmentService.ts index a8183c09e88d4..934dd0234e4b9 100644 --- a/src/vs/workbench/contrib/chat/browser/tools/chatToolRiskAssessmentService.ts +++ b/src/vs/workbench/contrib/chat/browser/tools/chatToolRiskAssessmentService.ts @@ -28,18 +28,25 @@ export interface IToolRiskAssessment { export const IChatToolRiskAssessmentService = createDecorator('chatToolRiskAssessmentService'); +/** + * Which rubric the model uses to assess a tool call. `terminal` evaluates a + * shell command; `generic` evaluates file edits, reads, fetches, and other + * tool calls. When omitted, the kind is auto-detected from the tool id. + */ +export type ToolRiskPromptKind = 'terminal' | 'generic'; + export interface IChatToolRiskAssessmentService { readonly _serviceBrand: undefined; /** Returns whether the feature is enabled by configuration. */ isEnabled(): boolean; /** Synchronously read a previously cached assessment, or undefined if none. */ - getCached(tool: IToolData, parameters: unknown): IToolRiskAssessment | undefined; + getCached(tool: IToolData, parameters: unknown, kind?: ToolRiskPromptKind): IToolRiskAssessment | undefined; /** * Get a cached or freshly-computed risk assessment for a tool call. * Returns `undefined` when the feature is disabled, no model is available, * or the assessment cannot be parsed. */ - assess(tool: IToolData, parameters: unknown, token: CancellationToken): Promise; + assess(tool: IToolData, parameters: unknown, token: CancellationToken, kind?: ToolRiskPromptKind): Promise; } const MAX_PARAM_BYTES = 2000; @@ -64,16 +71,17 @@ export class ChatToolRiskAssessmentService implements IChatToolRiskAssessmentSer return this._configurationService.getValue(ChatConfiguration.ToolRiskAssessmentEnabled) !== false; } - getCached(tool: IToolData, parameters: unknown): IToolRiskAssessment | undefined { - return this._cache.get(this._cacheKey(tool, parameters))?.assessment; + getCached(tool: IToolData, parameters: unknown, kind?: ToolRiskPromptKind): IToolRiskAssessment | undefined { + return this._cache.get(this._cacheKey(tool, parameters, resolveRiskPromptKind(tool, kind)))?.assessment; } - async assess(tool: IToolData, parameters: unknown, token: CancellationToken): Promise { + async assess(tool: IToolData, parameters: unknown, token: CancellationToken, kind?: ToolRiskPromptKind): Promise { if (!this.isEnabled()) { return undefined; } - const key = this._cacheKey(tool, parameters); + const resolvedKind = resolveRiskPromptKind(tool, kind); + const key = this._cacheKey(tool, parameters, resolvedKind); const cached = this._cache.get(key); if (cached) { @@ -87,7 +95,7 @@ export class ChatToolRiskAssessmentService implements IChatToolRiskAssessmentSer const promise = (async () => { try { - const assessment = await this._invokeModel(tool, parameters, token); + const assessment = await this._invokeModel(tool, parameters, resolvedKind, token); if (token.isCancellationRequested) { return undefined; } @@ -104,11 +112,11 @@ export class ChatToolRiskAssessmentService implements IChatToolRiskAssessmentSer return promise; } - private _cacheKey(tool: IToolData, parameters: unknown): string { - return tool.id + '::' + stableStringify(normalizeRiskCacheParameters(tool, parameters)); + private _cacheKey(tool: IToolData, parameters: unknown, kind: ToolRiskPromptKind): string { + return kind + '::' + tool.id + '::' + stableStringify(normalizeRiskCacheParameters(parameters, kind)); } - private async _invokeModel(tool: IToolData, parameters: unknown, token: CancellationToken): Promise { + private async _invokeModel(tool: IToolData, parameters: unknown, kind: ToolRiskPromptKind, token: CancellationToken): Promise { const modelId = this._configurationService.getValue(ChatConfiguration.ToolRiskAssessmentModel) || 'copilot-utility-small'; const models = await this._languageModelsService.selectLanguageModels({ vendor: 'copilot', id: modelId }); @@ -116,7 +124,7 @@ export class ChatToolRiskAssessmentService implements IChatToolRiskAssessmentSer return undefined; } - const prompt = buildPrompt(tool, parameters); + const prompt = buildPrompt(tool, parameters, kind); const response = await this._languageModelsService.sendChatRequest( models[0], undefined, @@ -149,20 +157,37 @@ export class ChatToolRiskAssessmentService implements IChatToolRiskAssessmentSer } } +/** + * Resolve which rubric to assess a tool call under. Callers that know the + * surface (e.g. the terminal confirmation) pass an explicit kind; otherwise it + * is auto-detected from the tool id so the built-in `run_in_terminal` tool + * keeps the terminal rubric. + */ +function resolveRiskPromptKind(tool: IToolData, kind: ToolRiskPromptKind | undefined): ToolRiskPromptKind { + return kind ?? (tool.id === TerminalToolId.RunInTerminal ? 'terminal' : 'generic'); +} + /** * Compute the subset of tool parameters that are relevant to the risk * assessment, used as the cache key so re-invocations of the same tool call * hit the cache even when model-generated descriptive fields differ. */ -function normalizeRiskCacheParameters(tool: IToolData, parameters: unknown): unknown { - if (tool.id === TerminalToolId.RunInTerminal && parameters && typeof parameters === 'object') { +function normalizeRiskCacheParameters(parameters: unknown, kind: ToolRiskPromptKind): unknown { + if (kind === 'terminal' && parameters && typeof parameters === 'object') { const p = parameters as Record; return { command: p.command }; } return parameters; } -function buildPrompt(tool: IToolData, parameters: unknown): string { +function buildPrompt(tool: IToolData, parameters: unknown, kind: ToolRiskPromptKind): string { + const argsJson = serializeParameters(parameters); + return kind === 'terminal' + ? buildTerminalPrompt(tool, argsJson) + : buildGenericToolPrompt(tool, argsJson); +} + +function serializeParameters(parameters: unknown): string { let argsJson: string; try { argsJson = JSON.stringify(parameters ?? {}); @@ -172,6 +197,10 @@ function buildPrompt(tool: IToolData, parameters: unknown): string { if (argsJson.length > MAX_PARAM_BYTES) { argsJson = argsJson.slice(0, MAX_PARAM_BYTES) + '...[truncated]'; } + return argsJson; +} + +function buildTerminalPrompt(tool: IToolData, argsJson: string): string { return [ `You assess what one terminal command does for a code-editing AI agent, and how risky it is.`, `Reply with STRICT JSON only (no prose, no markdown fences):`, @@ -232,6 +261,61 @@ function buildPrompt(tool: IToolData, parameters: unknown): string { ].join('\n'); } +function buildGenericToolPrompt(tool: IToolData, argsJson: string): string { + return [ + `You assess what one tool call does for a code-editing AI agent, and how risky it is.`, + `The tool may edit files, read files, fetch data, or perform some other action.`, + `Reply with STRICT JSON only (no prose, no markdown fences):`, + `{`, + ` "risk": "green" | "orange" | "red",`, + ` "explanation": ""`, + `}`, + ``, + `Rules for "risk" — apply in order; take the FIRST match:`, + ` 1. permanently destroys source code or user data with no recovery`, + ` (irrecoverable deletion, wiping a database, unrecoverable overwrite) -> red`, + ` 2. executes code downloaded on the fly from an arbitrary or untrusted URL -> red`, + ` 3. sends data to a remote server or changes remote state (POST/PUT, upload, deploy) -> orange`, + ` 4. modifies local files or workspace state (edits, creates, reversible deletes)`, + ` or installs packages from a standard registry -> orange`, + ` 5. otherwise (reads files, lists, searches, fetches public read-only data) -> green`, + ``, + `Read-only operations are always GREEN. Editing or creating a workspace file is`, + `ORANGE (reversible via undo or version control), never red. RED is reserved for`, + `actions whose effects cannot be undone. Installing a package from a normal`, + `registry is ORANGE; only running code piped straight from an arbitrary URL is RED.`, + ``, + `Examples:`, + ` read a file's contents -> green`, + ` list files in a directory -> green`, + ` search the workspace for a symbol -> green`, + ` fetch a public web page (GET) -> green`, + ` edit an existing source file -> orange`, + ` create a new file in the workspace -> orange`, + ` install a package -> orange`, + ` POST data to an external API -> orange`, + ` wipe a database table -> red`, + ` run code from an untrusted URL -> red`, + ``, + `Write "explanation" in this exact shape:`, + // allow-any-unicode-next-line + ` - green : " ." e.g. "Reads the contents of package.json."`, + // allow-any-unicode-next-line + ` - orange: " ." e.g. "Edits src/app.ts — changes workspace source."`, + // allow-any-unicode-next-line + ` - red : " ." e.g. "Deletes src/app.ts — permanently removes source."`, + ``, + `Strict explanation rules:`, + ` - Cite the ACTUAL files, paths, URLs, or values from the arguments below.`, + ` - Never use generic phrases like "may have side effects". Always name WHAT is read or changed.`, + ` - Plain prose. No quotes around the sentence. No markdown fences.`, + ``, + `Tool: ${tool.displayName} (id: ${tool.id})`, + `Description: ${tool.modelDescription || tool.userDescription || ''}`, + `Arguments (JSON): ${argsJson}`, + ].join('\n'); +} + function parseAssessment(rawText: string, tool: IToolData): IToolRiskAssessment | undefined { let text = rawText.trim(); if (text.startsWith('```')) { diff --git a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/chatElicitationContentPart.ts b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/chatElicitationContentPart.ts index bb3ebb096eafd..7326303897d56 100644 --- a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/chatElicitationContentPart.ts +++ b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/chatElicitationContentPart.ts @@ -4,7 +4,6 @@ *--------------------------------------------------------------------------------------------*/ import { IMarkdownString, isMarkdownString, MarkdownString } from '../../../../../../base/common/htmlContent.js'; -import { CancellationTokenSource } from '../../../../../../base/common/cancellation.js'; import { Disposable, IDisposable, toDisposable } from '../../../../../../base/common/lifecycle.js'; import { autorun } from '../../../../../../base/common/observable.js'; import { IInstantiationService } from '../../../../../../platform/instantiation/common/instantiation.js'; @@ -19,7 +18,7 @@ import { AcceptElicitationRequestActionId } from '../../actions/chatElicitationA import { IChatToolRiskAssessmentService } from '../../tools/chatToolRiskAssessmentService.js'; import { ChatConfirmationWidget, IChatConfirmationButton } from './chatConfirmationWidget.js'; import { IChatContentPart, IChatContentPartRenderContext } from './chatContentParts.js'; -import { ToolRiskBadgeWidget } from './toolInvocationParts/toolRiskBadgeWidget.js'; +import { createToolRiskBadge } from './toolInvocationParts/toolRiskBadgeHelper.js'; import { IAction } from '../../../../../../base/common/actions.js'; export class ChatElicitationContentPart extends Disposable implements IChatContentPart { @@ -134,38 +133,12 @@ export class ChatElicitationContentPart extends Disposable implements IChatConte if (elicitation.kind !== 'elicitation2' || !elicitation.riskAssessment) { return undefined; } - if (!this.riskAssessmentService.isEnabled()) { - return undefined; - } const { toolId, parameters } = elicitation.riskAssessment; const tool = this.languageModelToolsService.getTool(toolId); if (!tool) { return undefined; } - const widget = this._register(this.instantiationService.createInstance(ToolRiskBadgeWidget)); - const cached = this.riskAssessmentService.getCached(tool, parameters); - if (cached) { - widget.setAssessment(cached); - } else { - widget.setLoading(); - const cts = this._register(new CancellationTokenSource()); - (async () => { - try { - const result = await this.riskAssessmentService.assess(tool, parameters, cts.token); - if (cts.token.isCancellationRequested) { - return; - } - if (!result) { - widget.setHidden(); - return; - } - widget.setAssessment(result); - } catch { - widget.setHidden(); - } - })(); - } - return widget.domNode; + return createToolRiskBadge(this._store, this.instantiationService, this.riskAssessmentService, tool, parameters)?.domNode; } hasSameContent(other: IChatProgressRenderableResponseContent): boolean { diff --git a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/abstractToolConfirmationSubPart.ts b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/abstractToolConfirmationSubPart.ts index 94c5130b60cc0..f573ea1459d39 100644 --- a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/abstractToolConfirmationSubPart.ts +++ b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/abstractToolConfirmationSubPart.ts @@ -15,9 +15,11 @@ import { ChatContextKeys } from '../../../../common/actions/chatContextKeys.js'; import { ConfirmedReason, IChatToolInvocation, ToolConfirmKind } from '../../../../common/chatService/chatService.js'; import { ILanguageModelToolsService } from '../../../../common/tools/languageModelToolsService.js'; import { IChatWidgetService } from '../../../chat.js'; +import { IChatToolRiskAssessmentService } from '../../../tools/chatToolRiskAssessmentService.js'; import { ChatCustomConfirmationWidget, IChatConfirmationButton } from '../chatConfirmationWidget.js'; import { IChatContentPartRenderContext } from '../chatContentParts.js'; import { BaseChatToolInvocationSubPart } from './chatToolInvocationSubPart.js'; +import { createToolRiskBadge } from './toolRiskBadgeHelper.js'; export interface IToolConfirmationConfig { allowActionId: string; @@ -50,6 +52,7 @@ export abstract class AbstractToolConfirmationSubPart extends BaseChatToolInvoca @IContextKeyService protected readonly contextKeyService: IContextKeyService, @IChatWidgetService protected readonly chatWidgetService: IChatWidgetService, @ILanguageModelToolsService protected readonly languageModelToolsService: ILanguageModelToolsService, + @IChatToolRiskAssessmentService protected readonly riskAssessmentService: IChatToolRiskAssessmentService, ) { super(toolInvocation); @@ -114,6 +117,12 @@ export abstract class AbstractToolConfirmationSubPart extends BaseChatToolInvoca const contentElement = this.createContentElement(); const tool = languageModelToolsService.getTool(toolInvocation.toolId); + // Risk badges describe a pending action, so they are only shown for the + // pre-execution confirmation — not post-approval, where the tool has + // already run. + const riskBadge = state.type === IChatToolInvocation.StateKind.WaitingForConfirmation + ? this.createRiskBadgeDomNode(state.parameters) + : undefined; const confirmWidget = this._register(this.instantiationService.createInstance( ChatCustomConfirmationWidget<(() => void)>, this.context, @@ -123,6 +132,7 @@ export abstract class AbstractToolConfirmationSubPart extends BaseChatToolInvoca subtitle: config.subtitle, buttons, message: contentElement, + footerBanner: riskBadge, toolbarData: { arg: toolInvocation, partType: config.partType, @@ -200,6 +210,19 @@ export abstract class AbstractToolConfirmationSubPart extends BaseChatToolInvoca return []; } + /** + * Create the risk-assessment badge DOM node for this confirmation, or + * `undefined` when the feature is disabled or the tool is unknown. Returned + * as a `footerBanner` for the confirmation widget. + */ + protected createRiskBadgeDomNode(parameters: unknown): HTMLElement | undefined { + const tool = this.languageModelToolsService.getTool(this.toolInvocation.toolId); + if (!tool) { + return undefined; + } + return createToolRiskBadge(this._store, this.instantiationService, this.riskAssessmentService, tool, parameters)?.domNode; + } + /** * When true, "Allow Once" stays the primary button even when a * session-scoped action is available. Subclasses override this diff --git a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatMissingSandboxDepsConfirmationSubPart.ts b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatMissingSandboxDepsConfirmationSubPart.ts index e31d3daf423d5..70ac5dcf41ed3 100644 --- a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatMissingSandboxDepsConfirmationSubPart.ts +++ b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatMissingSandboxDepsConfirmationSubPart.ts @@ -14,6 +14,7 @@ import { IChatToolInvocation, type IChatTerminalToolInvocationData } from '../.. import { ILanguageModelToolsService } from '../../../../common/tools/languageModelToolsService.js'; import { AcceptToolConfirmationActionId, SkipToolConfirmationActionId } from '../../../actions/chatToolActions.js'; import { IChatCodeBlockInfo, IChatWidgetService } from '../../../chat.js'; +import { IChatToolRiskAssessmentService } from '../../../tools/chatToolRiskAssessmentService.js'; import { IChatContentPartRenderContext } from '../chatContentParts.js'; import { AbstractToolConfirmationSubPart } from './abstractToolConfirmationSubPart.js'; @@ -30,8 +31,9 @@ export class ChatMissingSandboxDepsConfirmationSubPart extends AbstractToolConfi @IContextKeyService contextKeyService: IContextKeyService, @IChatWidgetService chatWidgetService: IChatWidgetService, @ILanguageModelToolsService languageModelToolsService: ILanguageModelToolsService, + @IChatToolRiskAssessmentService riskAssessmentService: IChatToolRiskAssessmentService, ) { - super(toolInvocation, context, instantiationService, keybindingService, contextKeyService, chatWidgetService, languageModelToolsService); + super(toolInvocation, context, instantiationService, keybindingService, contextKeyService, chatWidgetService, languageModelToolsService, riskAssessmentService); this.render({ allowActionId: AcceptToolConfirmationActionId, diff --git a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatModifiedFilesConfirmationSubPart.ts b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatModifiedFilesConfirmationSubPart.ts index d71f6508afe7a..4c3e91c09eb30 100644 --- a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatModifiedFilesConfirmationSubPart.ts +++ b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatModifiedFilesConfirmationSubPart.ts @@ -22,6 +22,7 @@ import { ILanguageModelToolsService } from '../../../../common/tools/languageMod import { ModifiedFileEntryState } from '../../../../common/editing/chatEditingService.js'; import { ChatContextKeys } from '../../../../common/actions/chatContextKeys.js'; import { IChatCodeBlockInfo, IChatWidgetService } from '../../../chat.js'; +import { IChatToolRiskAssessmentService } from '../../../tools/chatToolRiskAssessmentService.js'; import { IChatContentPartRenderContext } from '../chatContentParts.js'; import { ChatCustomConfirmationWidget, IChatConfirmationButton } from '../chatConfirmationWidget.js'; import { CollapsibleListPool, IChatCollapsibleListItem } from '../chatReferencesContentPart.js'; @@ -44,8 +45,9 @@ export class ChatModifiedFilesConfirmationSubPart extends AbstractToolConfirmati @IMarkdownRendererService private readonly markdownRendererService: IMarkdownRendererService, @IEditorService private readonly editorService: IEditorService, @ICommandService private readonly commandService: ICommandService, + @IChatToolRiskAssessmentService riskAssessmentService: IChatToolRiskAssessmentService, ) { - super(toolInvocation, context, instantiationService, keybindingService, contextKeyService, chatWidgetService, languageModelToolsService); + super(toolInvocation, context, instantiationService, keybindingService, contextKeyService, chatWidgetService, languageModelToolsService, riskAssessmentService); const state = toolInvocation.state.get(); if (state.type !== IChatToolInvocation.StateKind.WaitingForConfirmation || !state.confirmationMessages?.title) { @@ -67,6 +69,7 @@ export class ChatModifiedFilesConfirmationSubPart extends AbstractToolConfirmati subtitle: typeof toolInvocation.originMessage === 'string' ? toolInvocation.originMessage : toolInvocation.originMessage?.value, buttons: this.createButtons(data.options), message: this.createWidgetContentElement(state.confirmationMessages.message, data), + footerBanner: this.createRiskBadgeDomNode(state.parameters), } )); diff --git a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatTerminalToolConfirmationSubPart.ts b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatTerminalToolConfirmationSubPart.ts index 518aa7c2bd35a..12ec75f688788 100644 --- a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatTerminalToolConfirmationSubPart.ts +++ b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatTerminalToolConfirmationSubPart.ts @@ -8,7 +8,6 @@ import { HoverStyle } from '../../../../../../../base/browser/ui/hover/hover.js' import { HoverPosition } from '../../../../../../../base/browser/ui/hover/hoverWidget.js'; import { Separator } from '../../../../../../../base/common/actions.js'; import { asArray } from '../../../../../../../base/common/arrays.js'; -import { CancellationTokenSource } from '../../../../../../../base/common/cancellation.js'; import { Codicon } from '../../../../../../../base/common/codicons.js'; import { ErrorNoTelemetry } from '../../../../../../../base/common/errors.js'; import { createCommandUri, escapeMarkdownSyntaxTokens, MarkdownString, type IMarkdownString } from '../../../../../../../base/common/htmlContent.js'; @@ -41,7 +40,7 @@ import { IChatContentPartRenderContext } from '../chatContentParts.js'; import { ChatMarkdownContentPart } from '../chatMarkdownContentPart.js'; import { CodeBlockPart, ICodeBlockRenderOptions } from '../codeBlockPart.js'; import { BaseChatToolInvocationSubPart } from './chatToolInvocationSubPart.js'; -import { ToolRiskBadgeWidget } from './toolRiskBadgeWidget.js'; +import { createToolRiskBadge } from './toolRiskBadgeHelper.js'; export const enum TerminalToolConfirmationStorageKeys { TerminalAutoApproveWarningAccepted = 'chat.tools.terminal.autoApprove.warningAccepted' @@ -191,7 +190,10 @@ export class ChatTerminalToolConfirmationSubPart extends BaseChatToolInvocationS position: { hoverPosition: HoverPosition.LEFT }, })); - const riskBadge = this._createRiskBadge(state.parameters); + const tool = this.languageModelToolsService.getTool(this.toolInvocation.toolId); + const riskBadge = tool + ? createToolRiskBadge(this._store, this.instantiationService, this.riskAssessmentService, tool, state.parameters, 'terminal') + : undefined; const confirmWidget = this._register(this.instantiationService.createInstance( ChatCustomConfirmationWidget, @@ -507,43 +509,6 @@ export class ChatTerminalToolConfirmationSubPart extends BaseChatToolInvocationS return promptResult.result === true; } - private _createRiskBadge(parameters: unknown): ToolRiskBadgeWidget | undefined { - if (!this.riskAssessmentService.isEnabled()) { - return undefined; - } - const tool = this.languageModelToolsService.getTool(this.toolInvocation.toolId); - if (!tool) { - return undefined; - } - const widget = this._register(this.instantiationService.createInstance(ToolRiskBadgeWidget)); - const cached = this.riskAssessmentService.getCached(tool, parameters); - if (cached) { - widget.setAssessment(cached); - } else { - widget.setLoading(); - const cts = new CancellationTokenSource(); - this._register(toDisposable(() => cts.dispose(true))); - (async () => { - try { - const result = await this.riskAssessmentService.assess(tool, parameters, cts.token); - if (cts.token.isCancellationRequested || widget.isDisposed) { - return; - } - if (!result) { - widget.setHidden(); - return; - } - widget.setAssessment(result); - } catch { - if (!widget.isDisposed) { - widget.setHidden(); - } - } - })(); - } - return widget; - } - private _appendMarkdownPart(container: HTMLElement, message: string | IMarkdownString, codeBlockRenderOptions: ICodeBlockRenderOptions) { const part = this._register(this.instantiationService.createInstance(ChatMarkdownContentPart, { diff --git a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatToolConfirmationSubPart.ts b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatToolConfirmationSubPart.ts index 2ada03f701601..1cf66bb3a5718 100644 --- a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatToolConfirmationSubPart.ts +++ b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatToolConfirmationSubPart.ts @@ -25,6 +25,7 @@ import { createToolSchemaUri, ILanguageModelToolsService, IToolConfirmationMessa import { ILanguageModelToolsConfirmationService } from '../../../../common/tools/languageModelToolsConfirmationService.js'; import { AcceptToolConfirmationActionId, SkipToolConfirmationActionId } from '../../../actions/chatToolActions.js'; import { IChatCodeBlockInfo, IChatWidgetService } from '../../../chat.js'; +import { IChatToolRiskAssessmentService } from '../../../tools/chatToolRiskAssessmentService.js'; import { renderFileWidgets } from '../chatInlineAnchorWidget.js'; import { CodeBlockPart, ICodeBlockRenderOptions } from '../codeBlockPart.js'; import { IChatContentPartRenderContext } from '../chatContentParts.js'; @@ -58,13 +59,14 @@ export class ToolConfirmationSubPart extends AbstractToolConfirmationSubPart { @ILanguageModelToolsService languageModelToolsService: ILanguageModelToolsService, @IChatMarkdownAnchorService private readonly chatMarkdownAnchorService: IChatMarkdownAnchorService, @ILanguageModelToolsConfirmationService private readonly confirmationService: ILanguageModelToolsConfirmationService, + @IChatToolRiskAssessmentService riskAssessmentService: IChatToolRiskAssessmentService, ) { const state = toolInvocation.state.get(); if (state.type !== IChatToolInvocation.StateKind.WaitingForConfirmation || !state.confirmationMessages?.title) { throw new Error('Confirmation messages are missing'); } - super(toolInvocation, context, instantiationService, keybindingService, contextKeyService, chatWidgetService, languageModelToolsService); + super(toolInvocation, context, instantiationService, keybindingService, contextKeyService, chatWidgetService, languageModelToolsService, riskAssessmentService); this.render({ allowActionId: AcceptToolConfirmationActionId, diff --git a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatToolPostExecuteConfirmationPart.ts b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatToolPostExecuteConfirmationPart.ts index da2b868f353e9..654129d02122e 100644 --- a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatToolPostExecuteConfirmationPart.ts +++ b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatToolPostExecuteConfirmationPart.ts @@ -16,6 +16,7 @@ import { ILanguageModelToolsConfirmationService } from '../../../../common/tools import { ILanguageModelToolsService, IToolResultDataPart, IToolResultPromptTsxPart, IToolResultTextPart, stringifyPromptTsxPart } from '../../../../common/tools/languageModelToolsService.js'; import { AcceptToolPostConfirmationActionId, SkipToolPostConfirmationActionId } from '../../../actions/chatToolActions.js'; import { IChatCodeBlockInfo, IChatWidgetService } from '../../../chat.js'; +import { IChatToolRiskAssessmentService } from '../../../tools/chatToolRiskAssessmentService.js'; import { IChatContentPartRenderContext } from '../chatContentParts.js'; import { ChatCollapsibleIOPart } from '../chatToolInputOutputContentPart.js'; import { ChatToolOutputContentSubPart } from '../chatToolOutputContentSubPart.js'; @@ -36,8 +37,9 @@ export class ChatToolPostExecuteConfirmationPart extends AbstractToolConfirmatio @IChatWidgetService chatWidgetService: IChatWidgetService, @ILanguageModelToolsService languageModelToolsService: ILanguageModelToolsService, @ILanguageModelToolsConfirmationService private readonly confirmationService: ILanguageModelToolsConfirmationService, + @IChatToolRiskAssessmentService riskAssessmentService: IChatToolRiskAssessmentService, ) { - super(toolInvocation, context, instantiationService, keybindingService, contextKeyService, chatWidgetService, languageModelToolsService); + super(toolInvocation, context, instantiationService, keybindingService, contextKeyService, chatWidgetService, languageModelToolsService, riskAssessmentService); const subtitle = toolInvocation.pastTenseMessage || toolInvocation.invocationMessage; this.render({ allowActionId: AcceptToolPostConfirmationActionId, diff --git a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/toolRiskBadgeHelper.ts b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/toolRiskBadgeHelper.ts new file mode 100644 index 0000000000000..e50b2daa31c9b --- /dev/null +++ b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/toolRiskBadgeHelper.ts @@ -0,0 +1,71 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { CancellationTokenSource } from '../../../../../../../base/common/cancellation.js'; +import { DisposableStore, toDisposable } from '../../../../../../../base/common/lifecycle.js'; +import { IInstantiationService } from '../../../../../../../platform/instantiation/common/instantiation.js'; +import { IChatToolRiskAssessmentService, ToolRiskPromptKind } from '../../../tools/chatToolRiskAssessmentService.js'; +import { IToolData } from '../../../../common/tools/languageModelToolsService.js'; +import { ToolRiskBadgeWidget } from './toolRiskBadgeWidget.js'; + +/** + * Creates a {@link ToolRiskBadgeWidget} for a tool confirmation surface. + * + * Returns `undefined` when the risk-assessment feature is disabled. Otherwise + * returns a widget that either renders a cached assessment synchronously, or + * shows the loading state and assesses asynchronously, hiding itself on failure + * or when no assessment can be produced. + * + * The widget (and the {@link CancellationTokenSource} used for the asynchronous + * assessment) are registered on the provided {@link DisposableStore}, so callers + * do not need to manage their lifetime separately; disposing the store cancels + * any in-flight assessment. The widget is returned so terminal confirmations can + * still attach `setDetails` / `onDidHide`; most callers only need the widget's + * `domNode` to pass as a `footerBanner`. + * + * `kind` selects the assessment rubric (terminal vs. generic); when omitted it + * is auto-detected from the tool id. + */ +export function createToolRiskBadge( + store: DisposableStore, + instantiationService: IInstantiationService, + riskAssessmentService: IChatToolRiskAssessmentService, + tool: IToolData, + parameters: unknown, + kind?: ToolRiskPromptKind, +): ToolRiskBadgeWidget | undefined { + if (!riskAssessmentService.isEnabled()) { + return undefined; + } + + const widget = store.add(instantiationService.createInstance(ToolRiskBadgeWidget)); + const cached = riskAssessmentService.getCached(tool, parameters, kind); + if (cached) { + widget.setAssessment(cached); + return widget; + } + + widget.setLoading(); + const cts = new CancellationTokenSource(); + store.add(toDisposable(() => cts.dispose(true))); + (async () => { + try { + const result = await riskAssessmentService.assess(tool, parameters, cts.token, kind); + if (cts.token.isCancellationRequested || widget.isDisposed) { + return; + } + if (!result) { + widget.setHidden(); + return; + } + widget.setAssessment(result); + } catch { + if (!widget.isDisposed) { + widget.setHidden(); + } + } + })(); + return widget; +} From 6beaf5d15ca30f197fdc86684c58a23960b3d3d6 Mon Sep 17 00:00:00 2001 From: justschen Date: Fri, 5 Jun 2026 22:01:29 -0700 Subject: [PATCH 2/4] Fix disposable leaks: check isEnabled() before resolving tool in risk badge The risk-badge factory's isEnabled() guard ran after the call sites' languageModelToolsService.getTool() lookup. With the feature disabled, getTool() was still invoked; under the shared component-fixture mock (which omits getTool), this threw inside the confirmation subpart constructor, leaving partially-constructed parts unregistered and reported as leaked disposables. Move the getTool() lookup into createToolRiskBadge() after the isEnabled() check, so it is only reached when the feature is on. The factory now takes languageModelToolsService + toolId, removing the duplicated lookup and null-check from the terminal, generic, and elicitation confirmation surfaces. --- .../chatElicitationContentPart.ts | 6 +----- .../abstractToolConfirmationSubPart.ts | 6 +----- .../chatTerminalToolConfirmationSubPart.ts | 5 +---- .../toolRiskBadgeHelper.ts | 21 +++++++++++++------ 4 files changed, 18 insertions(+), 20 deletions(-) diff --git a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/chatElicitationContentPart.ts b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/chatElicitationContentPart.ts index 7326303897d56..e964961986115 100644 --- a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/chatElicitationContentPart.ts +++ b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/chatElicitationContentPart.ts @@ -134,11 +134,7 @@ export class ChatElicitationContentPart extends Disposable implements IChatConte return undefined; } const { toolId, parameters } = elicitation.riskAssessment; - const tool = this.languageModelToolsService.getTool(toolId); - if (!tool) { - return undefined; - } - return createToolRiskBadge(this._store, this.instantiationService, this.riskAssessmentService, tool, parameters)?.domNode; + return createToolRiskBadge(this._store, this.instantiationService, this.riskAssessmentService, this.languageModelToolsService, toolId, parameters)?.domNode; } hasSameContent(other: IChatProgressRenderableResponseContent): boolean { diff --git a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/abstractToolConfirmationSubPart.ts b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/abstractToolConfirmationSubPart.ts index f573ea1459d39..e4d7215efc932 100644 --- a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/abstractToolConfirmationSubPart.ts +++ b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/abstractToolConfirmationSubPart.ts @@ -216,11 +216,7 @@ export abstract class AbstractToolConfirmationSubPart extends BaseChatToolInvoca * as a `footerBanner` for the confirmation widget. */ protected createRiskBadgeDomNode(parameters: unknown): HTMLElement | undefined { - const tool = this.languageModelToolsService.getTool(this.toolInvocation.toolId); - if (!tool) { - return undefined; - } - return createToolRiskBadge(this._store, this.instantiationService, this.riskAssessmentService, tool, parameters)?.domNode; + return createToolRiskBadge(this._store, this.instantiationService, this.riskAssessmentService, this.languageModelToolsService, this.toolInvocation.toolId, parameters)?.domNode; } /** diff --git a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatTerminalToolConfirmationSubPart.ts b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatTerminalToolConfirmationSubPart.ts index 12ec75f688788..9b4b9c61927fa 100644 --- a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatTerminalToolConfirmationSubPart.ts +++ b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/chatTerminalToolConfirmationSubPart.ts @@ -190,10 +190,7 @@ export class ChatTerminalToolConfirmationSubPart extends BaseChatToolInvocationS position: { hoverPosition: HoverPosition.LEFT }, })); - const tool = this.languageModelToolsService.getTool(this.toolInvocation.toolId); - const riskBadge = tool - ? createToolRiskBadge(this._store, this.instantiationService, this.riskAssessmentService, tool, state.parameters, 'terminal') - : undefined; + const riskBadge = createToolRiskBadge(this._store, this.instantiationService, this.riskAssessmentService, this.languageModelToolsService, this.toolInvocation.toolId, state.parameters, 'terminal'); const confirmWidget = this._register(this.instantiationService.createInstance( ChatCustomConfirmationWidget, diff --git a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/toolRiskBadgeHelper.ts b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/toolRiskBadgeHelper.ts index e50b2daa31c9b..f4af8a2ab5dc3 100644 --- a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/toolRiskBadgeHelper.ts +++ b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/toolRiskBadgeHelper.ts @@ -7,16 +7,16 @@ import { CancellationTokenSource } from '../../../../../../../base/common/cancel import { DisposableStore, toDisposable } from '../../../../../../../base/common/lifecycle.js'; import { IInstantiationService } from '../../../../../../../platform/instantiation/common/instantiation.js'; import { IChatToolRiskAssessmentService, ToolRiskPromptKind } from '../../../tools/chatToolRiskAssessmentService.js'; -import { IToolData } from '../../../../common/tools/languageModelToolsService.js'; +import { ILanguageModelToolsService } from '../../../../common/tools/languageModelToolsService.js'; import { ToolRiskBadgeWidget } from './toolRiskBadgeWidget.js'; /** * Creates a {@link ToolRiskBadgeWidget} for a tool confirmation surface. * - * Returns `undefined` when the risk-assessment feature is disabled. Otherwise - * returns a widget that either renders a cached assessment synchronously, or - * shows the loading state and assesses asynchronously, hiding itself on failure - * or when no assessment can be produced. + * Returns `undefined` when the risk-assessment feature is disabled or the tool + * is unknown. Otherwise returns a widget that either renders a cached assessment + * synchronously, or shows the loading state and assesses asynchronously, hiding + * itself on failure or when no assessment can be produced. * * The widget (and the {@link CancellationTokenSource} used for the asynchronous * assessment) are registered on the provided {@link DisposableStore}, so callers @@ -32,14 +32,23 @@ export function createToolRiskBadge( store: DisposableStore, instantiationService: IInstantiationService, riskAssessmentService: IChatToolRiskAssessmentService, - tool: IToolData, + languageModelToolsService: ILanguageModelToolsService, + toolId: string, parameters: unknown, kind?: ToolRiskPromptKind, ): ToolRiskBadgeWidget | undefined { + // Check the feature flag before resolving the tool so the (potentially + // expensive or unavailable) tool lookup is skipped when risk assessment is + // turned off. if (!riskAssessmentService.isEnabled()) { return undefined; } + const tool = languageModelToolsService.getTool(toolId); + if (!tool) { + return undefined; + } + const widget = store.add(instantiationService.createInstance(ToolRiskBadgeWidget)); const cached = riskAssessmentService.getCached(tool, parameters, kind); if (cached) { From dce519c7a969236008814938d7c7a7a4b0a4e6af Mon Sep 17 00:00:00 2001 From: justschen Date: Sat, 6 Jun 2026 17:31:00 -0700 Subject: [PATCH 3/4] autopilot gated risk assessemtn --- .../tools/chatToolRiskAssessmentService.ts | 23 +- .../tools/languageModelToolsService.ts | 165 +++++++- .../abstractToolConfirmationSubPart.ts | 5 +- .../toolRiskBadgeHelper.ts | 25 +- .../contrib/chat/common/constants.ts | 9 + .../tools/languageModelToolsService.test.ts | 352 +++++++++++++++++- 6 files changed, 538 insertions(+), 41 deletions(-) diff --git a/src/vs/workbench/contrib/chat/browser/tools/chatToolRiskAssessmentService.ts b/src/vs/workbench/contrib/chat/browser/tools/chatToolRiskAssessmentService.ts index 934dd0234e4b9..71d99b6cdac2c 100644 --- a/src/vs/workbench/contrib/chat/browser/tools/chatToolRiskAssessmentService.ts +++ b/src/vs/workbench/contrib/chat/browser/tools/chatToolRiskAssessmentService.ts @@ -29,9 +29,8 @@ export interface IToolRiskAssessment { export const IChatToolRiskAssessmentService = createDecorator('chatToolRiskAssessmentService'); /** - * Which rubric the model uses to assess a tool call. `terminal` evaluates a - * shell command; `generic` evaluates file edits, reads, fetches, and other - * tool calls. When omitted, the kind is auto-detected from the tool id. + * Which rubric the model uses to assess a tool call: `terminal` for a shell command, `generic` + * for file edits, reads, fetches, and everything else. When omitted, auto-detected from the tool id. */ export type ToolRiskPromptKind = 'terminal' | 'generic'; @@ -42,11 +41,11 @@ export interface IChatToolRiskAssessmentService { /** Synchronously read a previously cached assessment, or undefined if none. */ getCached(tool: IToolData, parameters: unknown, kind?: ToolRiskPromptKind): IToolRiskAssessment | undefined; /** - * Get a cached or freshly-computed risk assessment for a tool call. - * Returns `undefined` when the feature is disabled, no model is available, - * or the assessment cannot be parsed. + * Get a cached or freshly-computed risk assessment for a tool call. Returns `undefined` when no + * model is available or the assessment cannot be parsed, or when the feature is disabled unless + * `options.ignoreEnablement` is set (used by the Autopilot risk gate). */ - assess(tool: IToolData, parameters: unknown, token: CancellationToken, kind?: ToolRiskPromptKind): Promise; + assess(tool: IToolData, parameters: unknown, token: CancellationToken, kind?: ToolRiskPromptKind, options?: { ignoreEnablement?: boolean }): Promise; } const MAX_PARAM_BYTES = 2000; @@ -75,8 +74,8 @@ export class ChatToolRiskAssessmentService implements IChatToolRiskAssessmentSer return this._cache.get(this._cacheKey(tool, parameters, resolveRiskPromptKind(tool, kind)))?.assessment; } - async assess(tool: IToolData, parameters: unknown, token: CancellationToken, kind?: ToolRiskPromptKind): Promise { - if (!this.isEnabled()) { + async assess(tool: IToolData, parameters: unknown, token: CancellationToken, kind?: ToolRiskPromptKind, options?: { ignoreEnablement?: boolean }): Promise { + if (!options?.ignoreEnablement && !this.isEnabled()) { return undefined; } @@ -158,10 +157,8 @@ export class ChatToolRiskAssessmentService implements IChatToolRiskAssessmentSer } /** - * Resolve which rubric to assess a tool call under. Callers that know the - * surface (e.g. the terminal confirmation) pass an explicit kind; otherwise it - * is auto-detected from the tool id so the built-in `run_in_terminal` tool - * keeps the terminal rubric. + * Resolve which rubric to assess a tool call under. An explicit kind wins; otherwise it is + * auto-detected from the tool id so `run_in_terminal` keeps the terminal rubric. */ function resolveRiskPromptKind(tool: IToolData, kind: ToolRiskPromptKind | undefined): ToolRiskPromptKind { return kind ?? (tool.id === TerminalToolId.RunInTerminal ? 'terminal' : 'generic'); diff --git a/src/vs/workbench/contrib/chat/browser/tools/languageModelToolsService.ts b/src/vs/workbench/contrib/chat/browser/tools/languageModelToolsService.ts index b0f882aba191f..ed0793ebf1470 100644 --- a/src/vs/workbench/contrib/chat/browser/tools/languageModelToolsService.ts +++ b/src/vs/workbench/contrib/chat/browser/tools/languageModelToolsService.ts @@ -42,7 +42,7 @@ import { ChatContextKeys } from '../../common/actions/chatContextKeys.js'; import { ChatRequestToolReferenceEntry, toToolSetVariableEntry, toToolVariableEntry } from '../../common/attachments/chatVariableEntries.js'; import { IVariableReference } from '../../common/chatModes.js'; import { ConfirmedReason, IChatService, IChatToolInvocation, ToolConfirmKind } from '../../common/chatService/chatService.js'; -import { ChatConfiguration, isAutoApproveLevel } from '../../common/constants.js'; +import { ChatConfiguration, isAutoApproveLevel, isAutopilotLevel } from '../../common/constants.js'; import { localChatSessionType } from '../../common/chatSessionsService.js'; import { ILanguageModelChatMetadata } from '../../common/languageModels.js'; import { IChatModel, IChatRequestModel } from '../../common/model/chatModel.js'; @@ -51,10 +51,12 @@ import { chatSessionResourceToId, getChatSessionType } from '../../common/model/ import { HookType } from '../../common/promptSyntax/hookTypes.js'; import { CopilotChatSettingId, CopilotToolId } from '../../common/tools/copilotToolIds.js'; import { ILanguageModelToolsConfirmationService } from '../../common/tools/languageModelToolsConfirmationService.js'; +import { TerminalToolId } from '../../common/tools/terminalToolIds.js'; import { CountTokensCallback, createToolSchemaUri, IBeginToolCallOptions, IExternalPreToolUseHookResult, ILanguageModelToolsService, IPreparedToolInvocation, isToolSet, IToolAndToolSetEnablementMap, IToolData, IToolImpl, IToolInvocation, IToolInvokedEvent, IToolResult, IToolResultInputOutputDetails, IToolSet, SpecedToolAliases, stringifyPromptTsxPart, ToolDataSource, ToolInvocationPresentation, toolMatchesModel, ToolSet, ToolSetForModel, VSCodeToolReference } from '../../common/tools/languageModelToolsService.js'; import { IToolResultCompressor } from '../../common/tools/toolResultCompressor.js'; import { getToolConfirmationAlert } from '../accessibility/chatAccessibilityProvider.js'; import { IChatWidgetService } from '../chat.js'; +import { IChatToolRiskAssessmentService, ToolRiskLevel } from './chatToolRiskAssessmentService.js'; const jsonSchemaRegistry = Registry.as(JSONContributionRegistry.Extensions.JSONContribution); @@ -73,6 +75,13 @@ export const enum AutoApproveStorageKeys { const SkipAutoApproveConfirmationKey = 'vscode.chat.tools.global.autoApprove.testMode'; +/** + * Marks a {@link ToolConfirmKind.ConfirmationNotNeeded} decision that came from the session + * auto-approving everything, rather than a per-tool setting or an explicit user action. Shared so + * `shouldAutoConfirm`, the Autopilot risk gate, and approval telemetry use the same string. + */ +const autoApproveAllReason = 'auto-approve-all'; + // This tool will always require user confirmation even in auto approval mode. // Users cannot auto approve this tool via settings either, as this is a tool used before the agentic loop. const toolIdsThatCannotBeAutoApproved = new Set([ @@ -80,6 +89,15 @@ const toolIdsThatCannotBeAutoApproved = new Set([ 'vscode_get_modified_files_confirmation', ]); +// Fetch uses two tools: the model-facing 'copilot_fetchWebPage' and the internal +// 'vscode_fetchWebPage_internal' it delegates to. Both auto-approve themselves, so the Autopilot +// risk gate classifies them to catch dangerous fetches (leaking secrets to an attacker URL, +// hitting internal hosts). +const fetchWebPageToolIds = new Set([ + 'copilot_fetchWebPage', + 'vscode_fetchWebPage_internal', +]); + export const globalAutoApproveDescription = localize2( { key: 'autoApprove3.markdown', @@ -140,6 +158,7 @@ export class LanguageModelToolsService extends Disposable implements ILanguageMo @ICommandService private readonly _commandService: ICommandService, @IChatWidgetService private readonly _chatWidgetService: IChatWidgetService, @IToolResultCompressor private readonly _toolResultCompressor: IToolResultCompressor, + @IChatToolRiskAssessmentService private readonly _riskAssessmentService: IChatToolRiskAssessmentService, ) { super(); @@ -591,9 +610,13 @@ export class LanguageModelToolsService extends Disposable implements ILanguageMo preparedInvocation = await this.prepareToolInvocationWithHookResult(tool, dto, preToolUseHookResult, token); prepareTimeWatch.stop(); - const { autoConfirmed, preparedInvocation: updatedPreparedInvocation } = await this.resolveAutoConfirmFromHook(preToolUseHookResult, tool, dto, preparedInvocation, dto.context?.sessionResource); + const { autoConfirmed: resolvedAutoConfirmed, preparedInvocation: updatedPreparedInvocation } = await this.resolveAutoConfirmFromHook(preToolUseHookResult, tool, dto, preparedInvocation, dto.context?.sessionResource); preparedInvocation = updatedPreparedInvocation; + // In Autopilot, run the risk classifier on an auto-approved call that would + // otherwise show a confirmation. A "red" rating skips the call; anything else + // (including a classifier failure) keeps the original auto-confirmation. + const { autoConfirmed, skipExplanation: riskSkipExplanation } = await this._maybeApplyAutopilotRiskGate(tool, dto, preparedInvocation, resolvedAutoConfirmed, token); // Important: a tool invocation that will be autoconfirmed should never // be in the chat response in the `NeedsConfirmation` state, even briefly, @@ -612,6 +635,27 @@ export class LanguageModelToolsService extends Disposable implements ILanguageMo } dto.toolSpecificData = toolInvocation?.toolSpecificData; + + // Enforce a risk skip here, before the confirmation flow below: run_in_terminal + // suppresses its own confirmation under Autopilot and never reaches it. The tool + // is not run, and an info note explains why. + if (riskSkipExplanation) { + this._logToolApprovalTelemetry(tool, dto, { type: ToolConfirmKind.Skipped }); + // Terminal and edit tools hide their invocation part once complete, so show the + // reason as a separate info note. + this._chatService.appendProgress(request, { + kind: 'info', + content: new MarkdownString(localize('autopilotRiskSkipped', "Autopilot skipped \"{0}\" because it was assessed as high-risk: {1}", tool.data.displayName, riskSkipExplanation)), + }); + toolResult = { + content: [{ + kind: 'text', + value: `Autopilot skipped this tool call because it was automatically assessed as high-risk: ${riskSkipExplanation} The action was not performed. Do not retry it as-is — choose a safer approach or leave it for the user to run manually.` + }] + }; + return toolResult; + } + if (preparedInvocation?.confirmationMessages?.title) { if (!IChatToolInvocation.executionConfirmedOrDenied(toolInvocation) && !autoConfirmed) { this.playAccessibilitySignal([toolInvocation], dto.context?.sessionResource); @@ -760,7 +804,7 @@ export class LanguageModelToolsService extends Disposable implements ILanguageMo [ToolConfirmKind.UserAction]: 'userAction', [ToolConfirmKind.Skipped]: 'skipped', }; - const allowedConfirmationNotNeededReasons = new Set(['auto-approve-all', 'inlineChat']); + const allowedConfirmationNotNeededReasons = new Set([autoApproveAllReason, 'inlineChat']); let confirmationNotNeededReason: string | undefined; if (reason.type === ToolConfirmKind.ConfirmationNotNeeded && reason.reason) { const raw = typeof reason.reason === 'string' ? reason.reason : reason.reason.value; @@ -875,6 +919,94 @@ export class LanguageModelToolsService extends Disposable implements ILanguageMo return { autoConfirmed, preparedInvocation }; } + /** + * In Autopilot, runs the risk classifier on an auto-approved call and skips it when the rating + * is {@link ToolRiskLevel.Red}. Any other result returns the original auto-confirmation + * unchanged. + * + * To keep the classifier off the hot path, it only runs when all of these hold: + * - the call was auto-approved by the session approving everything, or is a `run_in_terminal` / + * fetch call that self-approved (these can run risky commands or prompt-injected URLs without + * ever showing a confirmation); + * - it would otherwise show a confirmation (the self-approving tools above are the exception); + * - the session is a local panel session at the Autopilot level with Advanced Autopilot on. + * + * This is independent of `chat.tools.riskAssessment.enabled`, which only controls the + * confirmation risk badge. CLI and agent-host sessions handle their own confirmations and are + * excluded. + * + * Fails open: a cancelled, unavailable, or failed assessment keeps the original + * auto-confirmation so Autopilot keeps moving. + */ + private async _maybeApplyAutopilotRiskGate( + tool: IToolEntry, + dto: IToolInvocation, + preparedInvocation: IPreparedToolInvocation | undefined, + autoConfirmed: ConfirmedReason | undefined, + token: CancellationToken, + ): Promise<{ autoConfirmed: ConfirmedReason | undefined; skipExplanation?: string }> { + const isTerminalTool = tool.data.id === TerminalToolId.RunInTerminal; + const isFetchTool = fetchWebPageToolIds.has(tool.data.id); + const isAlwaysClassifyTool = isTerminalTool || isFetchTool; + + // Normally only gate calls the session auto-approved wholesale (the `autoApproveAllReason` + // sentinel). A per-tool setting, user action, or hook carries a concrete reason and is + // respected as-is. + // + // Exception: run_in_terminal and fetch self-approve without a confirmation, so a risky command + // or a prompt-injected URL would run unclassified. Gate them when they arrive self-approved + // (no reason and no confirmation of their own); an explicit allow carries a concrete reason + // instead of `undefined`, so it stays respected. + const isBlanketSessionApprove = autoConfirmed?.type === ToolConfirmKind.ConfirmationNotNeeded + && autoConfirmed.reason === autoApproveAllReason; + const isSelfApprovedAlwaysClassify = isAlwaysClassifyTool + && autoConfirmed === undefined + && !preparedInvocation?.confirmationMessages?.title; + if (!isBlanketSessionApprove && !isSelfApprovedAlwaysClassify) { + return { autoConfirmed }; + } + + // Only gate calls that would otherwise show a confirmation, plus the self-approving tools above. + if (!isAlwaysClassifyTool && !preparedInvocation?.confirmationMessages?.title) { + return { autoConfirmed }; + } + + // Check the Advanced Autopilot flag first: it is default-off, so the common case bails before + // the session lookups below. This does not consult `chat.tools.riskAssessment.enabled`, which + // only controls the confirmation risk badge. + if (this._configurationService.getValue(ChatConfiguration.AutopilotAdvancedEnabled) !== true) { + return { autoConfirmed }; + } + + // Scope to local panel sessions at the Autopilot level. CLI and agent-host sessions handle + // their own confirmations. + const sessionResource = dto.context?.sessionResource; + if (!sessionResource || getChatSessionType(sessionResource) !== localChatSessionType) { + return { autoConfirmed }; + } + if (!this._isSessionInAutopilotLevel(sessionResource)) { + return { autoConfirmed }; + } + + try { + // ignoreEnablement: assess even when the risk-badge setting is off. + const assessment = await this._riskAssessmentService.assess(tool.data, dto.parameters, token, undefined, { ignoreEnablement: true }); + if (token.isCancellationRequested) { + return { autoConfirmed }; + } + if (assessment?.risk === ToolRiskLevel.Red) { + const explanation = assessment.explanation.trim() || 'The action was assessed as potentially destructive or irreversible.'; + this._logService.info(`[LanguageModelToolsService#invokeTool] Autopilot skipping high-risk tool ${tool.data.id}: ${explanation}`); + return { autoConfirmed: { type: ToolConfirmKind.Skipped }, skipExplanation: explanation }; + } + } catch (err) { + this._logService.warn(`[LanguageModelToolsService#invokeTool] Autopilot risk assessment failed for tool ${tool.data.id}, allowing: ${toErrorMessage(err)}`); + } + + // Green/orange, no assessment, or a failure: keep the original auto-confirmation (fail open). + return { autoConfirmed }; + } + private async prepareToolInvocation(tool: IToolEntry, dto: IToolInvocation, forceConfirmationReason: string | undefined, token: CancellationToken): Promise { let prepared: IPreparedToolInvocation | undefined; if (tool.impl!.prepareToolInvocation) { @@ -1155,6 +1287,29 @@ export class LanguageModelToolsService extends Disposable implements ILanguageMo return isAutoApproveLevel(request?.modeInfo?.permissionLevel) || this._isSessionLiveAutoApproveLevel(chatSessionResource); } + /** + * True if the session's live permission picker level is Autopilot. Like + * {@link _isSessionLiveAutoApproveLevel}, but excludes plain Auto-Approve. + */ + private _isSessionLiveAutopilotLevel(chatSessionResource: URI): boolean { + const widget = this._chatWidgetService.getWidgetBySessionResource(chatSessionResource) + ?? this._chatWidgetService.lastFocusedWidget; + return !!widget && isAutopilotLevel(widget.input.currentModeInfo.permissionLevel); + } + + /** + * True if the session is at the Autopilot level (not plain Auto-Approve), via either the last + * request's stamped level or the live picker level. + */ + private _isSessionInAutopilotLevel(chatSessionResource: URI | undefined): boolean { + if (!chatSessionResource) { + return false; + } + const model = this._chatService.getSession(chatSessionResource); + const request = model?.getRequests().at(-1); + return isAutopilotLevel(request?.modeInfo?.permissionLevel) || this._isSessionLiveAutopilotLevel(chatSessionResource); + } + private getEligibleForAutoApprovalSpecialCase(toolData: IToolData): string | undefined { if (toolData.id === 'vscode_fetchWebPage_internal') { return 'fetch'; @@ -1210,7 +1365,7 @@ export class LanguageModelToolsService extends Disposable implements ILanguageMo if (chatSessionResource && !this._isAutoApprovePolicyRestricted() && this._isSessionInAutoApproveLevel(chatSessionResource)) { // CLI sessions still need their multi-option dialogs (e.g. uncommitted changes). if (!(toolIdsThatCannotBeAutoApproved.has(tool.data.id) && getChatSessionType(chatSessionResource) !== localChatSessionType)) { - return { type: ToolConfirmKind.ConfirmationNotNeeded, reason: 'auto-approve-all' }; + return { type: ToolConfirmKind.ConfirmationNotNeeded, reason: autoApproveAllReason }; } } @@ -1251,7 +1406,7 @@ export class LanguageModelToolsService extends Disposable implements ILanguageMo const sessionAutoApprove = chatSessionResource && !this._isAutoApprovePolicyRestricted() && this._isSessionInAutoApproveLevel(chatSessionResource); if (sessionAutoApprove) { if (!(toolIdsThatCannotBeAutoApproved.has(toolId) && getChatSessionType(chatSessionResource!) !== localChatSessionType)) { - return { type: ToolConfirmKind.ConfirmationNotNeeded, reason: 'auto-approve-all' }; + return { type: ToolConfirmKind.ConfirmationNotNeeded, reason: autoApproveAllReason }; } } diff --git a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/abstractToolConfirmationSubPart.ts b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/abstractToolConfirmationSubPart.ts index e4d7215efc932..4555ca617cd62 100644 --- a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/abstractToolConfirmationSubPart.ts +++ b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/abstractToolConfirmationSubPart.ts @@ -117,9 +117,8 @@ export abstract class AbstractToolConfirmationSubPart extends BaseChatToolInvoca const contentElement = this.createContentElement(); const tool = languageModelToolsService.getTool(toolInvocation.toolId); - // Risk badges describe a pending action, so they are only shown for the - // pre-execution confirmation — not post-approval, where the tool has - // already run. + // Risk badges describe a pending action, so they only show on the pre-execution + // confirmation, not after the tool has run. const riskBadge = state.type === IChatToolInvocation.StateKind.WaitingForConfirmation ? this.createRiskBadgeDomNode(state.parameters) : undefined; diff --git a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/toolRiskBadgeHelper.ts b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/toolRiskBadgeHelper.ts index f4af8a2ab5dc3..e1dedd48d2084 100644 --- a/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/toolRiskBadgeHelper.ts +++ b/src/vs/workbench/contrib/chat/browser/widget/chatContentParts/toolInvocationParts/toolRiskBadgeHelper.ts @@ -11,22 +11,15 @@ import { ILanguageModelToolsService } from '../../../../common/tools/languageMod import { ToolRiskBadgeWidget } from './toolRiskBadgeWidget.js'; /** - * Creates a {@link ToolRiskBadgeWidget} for a tool confirmation surface. + * Creates a {@link ToolRiskBadgeWidget} for a tool confirmation surface, or `undefined` when the + * feature is disabled or the tool is unknown. A cached assessment renders synchronously; otherwise + * the badge shows a loading state and assesses asynchronously, hiding itself on failure. * - * Returns `undefined` when the risk-assessment feature is disabled or the tool - * is unknown. Otherwise returns a widget that either renders a cached assessment - * synchronously, or shows the loading state and assesses asynchronously, hiding - * itself on failure or when no assessment can be produced. + * The widget and its assessment token are registered on `store`, so disposing the store cancels + * any in-flight assessment. The widget is returned so terminal confirmations can attach + * `setDetails` / `onDidHide`; most callers only need its `domNode` as a `footerBanner`. * - * The widget (and the {@link CancellationTokenSource} used for the asynchronous - * assessment) are registered on the provided {@link DisposableStore}, so callers - * do not need to manage their lifetime separately; disposing the store cancels - * any in-flight assessment. The widget is returned so terminal confirmations can - * still attach `setDetails` / `onDidHide`; most callers only need the widget's - * `domNode` to pass as a `footerBanner`. - * - * `kind` selects the assessment rubric (terminal vs. generic); when omitted it - * is auto-detected from the tool id. + * `kind` selects the rubric (terminal vs. generic); when omitted it is auto-detected from the tool id. */ export function createToolRiskBadge( store: DisposableStore, @@ -37,9 +30,7 @@ export function createToolRiskBadge( parameters: unknown, kind?: ToolRiskPromptKind, ): ToolRiskBadgeWidget | undefined { - // Check the feature flag before resolving the tool so the (potentially - // expensive or unavailable) tool lookup is skipped when risk assessment is - // turned off. + // Check the feature flag before the tool lookup so it is skipped when disabled. if (!riskAssessmentService.isEnabled()) { return undefined; } diff --git a/src/vs/workbench/contrib/chat/common/constants.ts b/src/vs/workbench/contrib/chat/common/constants.ts index 2e073a826c561..7a715331fe856 100644 --- a/src/vs/workbench/contrib/chat/common/constants.ts +++ b/src/vs/workbench/contrib/chat/common/constants.ts @@ -130,6 +130,15 @@ export function isAutoApproveLevel(level: ChatPermissionLevel | undefined): bool return level === ChatPermissionLevel.AutoApprove || level === ChatPermissionLevel.Autopilot; } +/** + * True for {@link ChatPermissionLevel.Autopilot} only. Unlike {@link isAutoApproveLevel}, this + * excludes {@link ChatPermissionLevel.AutoApprove}, so it can gate Autopilot-only behavior such as + * risk-based skipping of tool calls. + */ +export function isAutopilotLevel(level: ChatPermissionLevel | undefined): boolean { + return level === ChatPermissionLevel.Autopilot; +} + // Thinking display modes for pinned content export enum ThinkingDisplayMode { Collapsed = 'collapsed', diff --git a/src/vs/workbench/contrib/chat/test/browser/tools/languageModelToolsService.test.ts b/src/vs/workbench/contrib/chat/test/browser/tools/languageModelToolsService.test.ts index 0ec94c6fe35af..2eb04ea38e088 100644 --- a/src/vs/workbench/contrib/chat/test/browser/tools/languageModelToolsService.test.ts +++ b/src/vs/workbench/contrib/chat/test/browser/tools/languageModelToolsService.test.ts @@ -6,7 +6,7 @@ import * as assert from 'assert'; import { Barrier } from '../../../../../../base/common/async.js'; import { VSBuffer } from '../../../../../../base/common/buffer.js'; -import { CancellationToken } from '../../../../../../base/common/cancellation.js'; +import { CancellationToken, CancellationTokenSource } from '../../../../../../base/common/cancellation.js'; import { CancellationError, isCancellationError } from '../../../../../../base/common/errors.js'; import { URI } from '../../../../../../base/common/uri.js'; import { ensureNoDisposablesAreLeakedInTestSuite } from '../../../../../../base/test/common/utils.js'; @@ -23,8 +23,9 @@ import { ConfirmationOptionKind } from '../../../../../../platform/agentHost/com import { ITelemetryService } from '../../../../../../platform/telemetry/common/telemetry.js'; import { workbenchInstantiationService } from '../../../../../test/browser/workbenchTestServices.js'; import { LanguageModelToolsService } from '../../../browser/tools/languageModelToolsService.js'; +import { IChatToolRiskAssessmentService, IToolRiskAssessment, ToolRiskLevel, ToolRiskPromptKind } from '../../../browser/tools/chatToolRiskAssessmentService.js'; import { ChatModel, IChatModel } from '../../../common/model/chatModel.js'; -import { IChatService, IChatToolInputInvocationData, IChatToolInvocation, ToolConfirmKind } from '../../../common/chatService/chatService.js'; +import { IChatService, IChatProgress, IChatInfoMessage, IChatToolInputInvocationData, IChatToolInvocation, ToolConfirmKind } from '../../../common/chatService/chatService.js'; import { ChatConfiguration, ChatPermissionLevel } from '../../../common/constants.js'; import { SpecedToolAliases, isToolResultInputOutputDetails, IToolData, IToolImpl, IToolInvocation, ToolDataSource, ToolSet, IToolResultTextPart } from '../../../common/tools/languageModelToolsService.js'; import { MockChatService } from '../../common/chatService/mockChatService.js'; @@ -70,6 +71,45 @@ class TestTelemetryService implements Partial { } } +/** + * Configurable stub for {@link IChatToolRiskAssessmentService}. `enabled` models the + * `chat.tools.riskAssessment.enabled` confirmation-badge setting; tests that exercise the + * gate set `assessment`, `assessError`, or `onAssess` and inspect `assessCalls`. Note the + * Autopilot gate is independent of `enabled` (it passes `ignoreEnablement`), so the gate's + * opt-in is driven by Advanced Autopilot, not this field. + */ +class TestChatToolRiskAssessmentService implements IChatToolRiskAssessmentService { + declare readonly _serviceBrand: undefined; + + public enabled = false; + public assessment: IToolRiskAssessment | undefined = undefined; + public assessError: Error | undefined = undefined; + /** Invoked synchronously at the start of {@link assess} so tests can cancel mid-flight. */ + public onAssess: (() => void) | undefined = undefined; + public readonly assessCalls: { toolId: string; parameters: unknown; kind?: ToolRiskPromptKind }[] = []; + + isEnabled(): boolean { + return this.enabled; + } + + getCached(): IToolRiskAssessment | undefined { + return undefined; + } + + async assess(tool: IToolData, parameters: unknown, _token: CancellationToken, kind?: ToolRiskPromptKind, options?: { ignoreEnablement?: boolean }): Promise { + this.assessCalls.push({ toolId: tool.id, parameters, kind }); + this.onAssess?.(); + // Mirror the real service: honor the badge setting unless the caller opts out. + if (!options?.ignoreEnablement && !this.enabled) { + return undefined; + } + if (this.assessError) { + throw this.assessError; + } + return this.assessment; + } +} + function registerToolForTest(service: LanguageModelToolsService, store: any, id: string, impl: IToolImpl, data?: Partial) { const toolData: IToolData = { id, @@ -121,6 +161,7 @@ interface TestToolsServiceSetup { chatService: MockChatService; service: LanguageModelToolsService; contextKeyService: IContextKeyService; + riskAssessmentService: TestChatToolRiskAssessmentService; } interface TestToolsServiceOptions { @@ -152,6 +193,8 @@ function createTestToolsService(store: ReturnType Promise<{ content: { value: string }[] }>; wasInvoked: () => boolean } { + const withConfirmation = opts?.withConfirmation ?? true; + const permissionLevel = opts?.permissionLevel ?? ChatPermissionLevel.Autopilot; + const advancedEnabled = opts?.advancedEnabled ?? true; + const toolId = opts?.toolId ?? 'riskGateTool'; + + setup.configurationService.setUserConfiguration(ChatConfiguration.AutopilotAdvancedEnabled, advancedEnabled); + setup.configurationService.setUserConfiguration('chat.tools.global.autoApprove', false); + + let invoked = false; + const tool = registerToolForTest(setup.service, store, toolId, { + prepareToolInvocation: async () => (withConfirmation ? { confirmationMessages: { title: 'Confirm?', message: 'Proceed?' } } : {}), + invoke: async () => { invoked = true; return { content: [{ kind: 'text', value: 'ran' }] }; }, + }); + + const sessionId = 'riskGateSession'; + stubGetSession(setup.chatService, sessionId, { requestId: 'req-risk', modeInfo: { permissionLevel } }); + + return { + invoke: (token: CancellationToken = CancellationToken.None) => setup.service.invokeTool(tool.makeDto({ x: 1 }, { sessionId }), async () => 0, token) as Promise<{ content: { value: string }[] }>, + wasInvoked: () => invoked, + }; } suite('LanguageModelToolsService', () => { @@ -1612,6 +1689,275 @@ suite('LanguageModelToolsService', () => { assert.strictEqual(result.content[0].value, 'terminal executed'); }); + test('autopilot risk gate skips a tool assessed as high-risk (red)', async () => { + const setup = createTestToolsService(store); + setup.riskAssessmentService.enabled = true; + setup.riskAssessmentService.assessment = { risk: ToolRiskLevel.Red, explanation: 'Deletes source files irreversibly.' }; + const t = setupRiskGateTool(setup, store); + + const result = await t.invoke(); + + assert.deepStrictEqual( + { + invoked: t.wasInvoked(), + assessCalls: setup.riskAssessmentService.assessCalls.length, + mentionsRisk: String(result.content[0].value).includes('Deletes source files irreversibly.'), + }, + { invoked: false, assessCalls: 1, mentionsRisk: true }, + ); + }); + + test('autopilot risk gate allows a low-risk (green) tool call', async () => { + const setup = createTestToolsService(store); + setup.riskAssessmentService.enabled = true; + setup.riskAssessmentService.assessment = { risk: ToolRiskLevel.Green, explanation: 'Reads a file.' }; + const t = setupRiskGateTool(setup, store); + + const result = await t.invoke(); + + assert.deepStrictEqual( + { invoked: t.wasInvoked(), assessCalls: setup.riskAssessmentService.assessCalls.length, value: result.content[0].value }, + { invoked: true, assessCalls: 1, value: 'ran' }, + ); + }); + + test('autopilot risk gate allows a medium-risk (orange) tool call (red-only threshold)', async () => { + const setup = createTestToolsService(store); + setup.riskAssessmentService.enabled = true; + setup.riskAssessmentService.assessment = { risk: ToolRiskLevel.Orange, explanation: 'Edits a file.' }; + const t = setupRiskGateTool(setup, store); + + const result = await t.invoke(); + + assert.deepStrictEqual( + { invoked: t.wasInvoked(), assessCalls: setup.riskAssessmentService.assessCalls.length, value: result.content[0].value }, + { invoked: true, assessCalls: 1, value: 'ran' }, + ); + }); + + test('autopilot risk gate fails open when the classifier returns no assessment', async () => { + const setup = createTestToolsService(store); + setup.riskAssessmentService.enabled = true; + setup.riskAssessmentService.assessment = undefined; + const t = setupRiskGateTool(setup, store); + + const result = await t.invoke(); + + assert.deepStrictEqual( + { invoked: t.wasInvoked(), assessCalls: setup.riskAssessmentService.assessCalls.length, value: result.content[0].value }, + { invoked: true, assessCalls: 1, value: 'ran' }, + ); + }); + + test('autopilot risk gate fails open when the classifier throws', async () => { + const setup = createTestToolsService(store); + setup.riskAssessmentService.enabled = true; + setup.riskAssessmentService.assessError = new Error('network down'); + const t = setupRiskGateTool(setup, store); + + const result = await t.invoke(); + + assert.deepStrictEqual( + { invoked: t.wasInvoked(), value: result.content[0].value }, + { invoked: true, value: 'ran' }, + ); + }); + + test('autopilot risk gate does not assess tool calls that have no confirmation', async () => { + const setup = createTestToolsService(store); + setup.riskAssessmentService.enabled = true; + setup.riskAssessmentService.assessment = { risk: ToolRiskLevel.Red, explanation: 'should not matter' }; + const t = setupRiskGateTool(setup, store, { withConfirmation: false }); + + const result = await t.invoke(); + + assert.deepStrictEqual( + { invoked: t.wasInvoked(), assessCalls: setup.riskAssessmentService.assessCalls.length, value: result.content[0].value }, + { invoked: true, assessCalls: 0, value: 'ran' }, + ); + }); + + test('autopilot risk gate classifies a terminal command even when it has no confirmation', async () => { + // run_in_terminal suppresses its own confirmation under auto-approve sessions, so the + // gate must classify it anyway; a red command is skipped despite the missing confirmation. + const setup = createTestToolsService(store); + setup.riskAssessmentService.enabled = true; + setup.riskAssessmentService.assessment = { risk: ToolRiskLevel.Red, explanation: 'Force-pushes main, overwriting history.' }; + const t = setupRiskGateTool(setup, store, { withConfirmation: false, toolId: 'run_in_terminal' }); + + const result = await t.invoke(); + + assert.deepStrictEqual( + { + invoked: t.wasInvoked(), + assessCalls: setup.riskAssessmentService.assessCalls.length, + isRiskMessage: String(result.content[0].value).startsWith('Autopilot skipped this tool call'), + }, + { invoked: false, assessCalls: 1, isRiskMessage: true }, + ); + }); + + test('autopilot risk gate runs a non-red terminal command that has no confirmation', async () => { + // A terminal command is always classified in Autopilot, but a non-red verdict still runs. + const setup = createTestToolsService(store); + setup.riskAssessmentService.enabled = true; + setup.riskAssessmentService.assessment = { risk: ToolRiskLevel.Orange, explanation: 'Installs a package.' }; + const t = setupRiskGateTool(setup, store, { withConfirmation: false, toolId: 'run_in_terminal' }); + + const result = await t.invoke(); + + assert.deepStrictEqual( + { invoked: t.wasInvoked(), assessCalls: setup.riskAssessmentService.assessCalls.length, value: result.content[0].value }, + { invoked: true, assessCalls: 1, value: 'ran' }, + ); + }); + + test('autopilot risk gate classifies a fetch web page call even when it has no confirmation', async () => { + // Fetch web page tools auto-approve themselves (URL in the prompt / trusted domain) and so + // surface no confirmation; the gate must classify them anyway so a dangerous URL (e.g. one + // injected into the prompt to exfiltrate secrets) is still skipped when assessed red. + const setup = createTestToolsService(store); + setup.riskAssessmentService.enabled = true; + setup.riskAssessmentService.assessment = { risk: ToolRiskLevel.Red, explanation: 'Sends workspace secrets to an untrusted host.' }; + const t = setupRiskGateTool(setup, store, { withConfirmation: false, toolId: 'vscode_fetchWebPage_internal' }); + + const result = await t.invoke(); + + assert.deepStrictEqual( + { + invoked: t.wasInvoked(), + assessCalls: setup.riskAssessmentService.assessCalls.length, + isRiskMessage: String(result.content[0].value).startsWith('Autopilot skipped this tool call'), + }, + { invoked: false, assessCalls: 1, isRiskMessage: true }, + ); + }); + + test('autopilot risk gate runs a non-red fetch web page call that has no confirmation', async () => { + // A fetch is always classified in Autopilot, but a non-red verdict still runs. + const setup = createTestToolsService(store); + setup.riskAssessmentService.enabled = true; + setup.riskAssessmentService.assessment = { risk: ToolRiskLevel.Green, explanation: 'Fetches public documentation.' }; + const t = setupRiskGateTool(setup, store, { withConfirmation: false, toolId: 'copilot_fetchWebPage' }); + + const result = await t.invoke(); + + assert.deepStrictEqual( + { invoked: t.wasInvoked(), assessCalls: setup.riskAssessmentService.assessCalls.length, value: result.content[0].value }, + { invoked: true, assessCalls: 1, value: 'ran' }, + ); + }); + + test('autopilot risk gate is inert when Advanced Autopilot is disabled', async () => { + const setup = createTestToolsService(store); + setup.riskAssessmentService.enabled = true; + setup.riskAssessmentService.assessment = { risk: ToolRiskLevel.Red, explanation: 'should not matter' }; + const t = setupRiskGateTool(setup, store, { advancedEnabled: false }); + + const result = await t.invoke(); + + assert.deepStrictEqual( + { invoked: t.wasInvoked(), assessCalls: setup.riskAssessmentService.assessCalls.length, value: result.content[0].value }, + { invoked: true, assessCalls: 0, value: 'ran' }, + ); + }); + + test('autopilot risk gate does not apply at the plain Auto-Approve level', async () => { + const setup = createTestToolsService(store); + setup.riskAssessmentService.enabled = true; + setup.riskAssessmentService.assessment = { risk: ToolRiskLevel.Red, explanation: 'should not matter' }; + const t = setupRiskGateTool(setup, store, { permissionLevel: ChatPermissionLevel.AutoApprove }); + + const result = await t.invoke(); + + assert.deepStrictEqual( + { invoked: t.wasInvoked(), assessCalls: setup.riskAssessmentService.assessCalls.length, value: result.content[0].value }, + { invoked: true, assessCalls: 0, value: 'ran' }, + ); + }); + + test('autopilot risk gate runs even when the risk assessment badge setting is disabled', async () => { + // The gate is independent of chat.tools.riskAssessment.enabled (which only controls the + // confirmation risk badge): a red verdict still skips the call. Also verifies the gate + // passes ignoreEnablement — without it the stub would return undefined and the tool would run. + const setup = createTestToolsService(store); + setup.riskAssessmentService.enabled = false; + setup.riskAssessmentService.assessment = { risk: ToolRiskLevel.Red, explanation: 'Deletes source files irreversibly.' }; + const t = setupRiskGateTool(setup, store); + + const result = await t.invoke(); + + assert.deepStrictEqual( + { + invoked: t.wasInvoked(), + assessCalls: setup.riskAssessmentService.assessCalls.length, + isRiskMessage: String(result.content[0].value).startsWith('Autopilot skipped this tool call'), + }, + { invoked: false, assessCalls: 1, isRiskMessage: true }, + ); + }); + + test('autopilot risk gate skips on red even when the classifier explanation is empty', async () => { + const setup = createTestToolsService(store); + setup.riskAssessmentService.enabled = true; + setup.riskAssessmentService.assessment = { risk: ToolRiskLevel.Red, explanation: '' }; + const t = setupRiskGateTool(setup, store); + + const result = await t.invoke(); + + // The skip must still read as an automated risk-skip, never the user-skip fallback message. + assert.deepStrictEqual( + { + invoked: t.wasInvoked(), + assessCalls: setup.riskAssessmentService.assessCalls.length, + isRiskMessage: String(result.content[0].value).startsWith('Autopilot skipped this tool call'), + isUserSkipMessage: String(result.content[0].value).includes('The user chose to skip'), + }, + { invoked: false, assessCalls: 1, isRiskMessage: true, isUserSkipMessage: false }, + ); + }); + + test('autopilot risk gate does not skip when cancelled during assessment', async () => { + const setup = createTestToolsService(store); + setup.riskAssessmentService.enabled = true; + setup.riskAssessmentService.assessment = { risk: ToolRiskLevel.Red, explanation: 'Deletes source files irreversibly.' }; + const t = setupRiskGateTool(setup, store); + + // Cancel synchronously while the classifier is running: the gate must abandon the + // assessment and propagate cancellation rather than mask it as a risk-skip result. + const cts = store.add(new CancellationTokenSource()); + setup.riskAssessmentService.onAssess = () => cts.cancel(); + + await assert.rejects(() => t.invoke(cts.token), err => isCancellationError(err)); + assert.deepStrictEqual( + { invoked: t.wasInvoked(), assessCalls: setup.riskAssessmentService.assessCalls.length }, + { invoked: false, assessCalls: 1 }, + ); + }); + + test('autopilot risk gate surfaces an info note to the user when it skips a high-risk tool', async () => { + const setup = createTestToolsService(store); + setup.riskAssessmentService.enabled = true; + setup.riskAssessmentService.assessment = { risk: ToolRiskLevel.Red, explanation: 'Deletes source files irreversibly.' }; + const t = setupRiskGateTool(setup, store); + + // The tool invocation part hides itself after completion, so the reason is surfaced + // as a separate info note appended to the response stream. + const progresses: IChatProgress[] = []; + setup.chatService.appendProgress = (_request, progress) => { progresses.push(progress); }; + + await t.invoke(); + + const info = progresses.find((p): p is IChatInfoMessage => p.kind === 'info'); + assert.deepStrictEqual( + { + hasInfo: !!info, + mentionsRisk: !!info && info.content.value.includes('Deletes source files irreversibly.'), + }, + { hasInfo: true, mentionsRisk: true }, + ); + }); + test('bypass approvals auto-approves terminal tool with confirmation messages', async () => { const { service: testService, chatService: testChatService } = createTestToolsService(store, { configureServices: config => { From 60274d9bfdb369d0aace0bfb49cd74d8eedbe43f Mon Sep 17 00:00:00 2001 From: Justin Chen <54879025+justschen@users.noreply.github.com> Date: Sat, 6 Jun 2026 17:51:36 -0700 Subject: [PATCH 4/4] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- .../contrib/chat/browser/tools/languageModelToolsService.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/vs/workbench/contrib/chat/browser/tools/languageModelToolsService.ts b/src/vs/workbench/contrib/chat/browser/tools/languageModelToolsService.ts index ed0793ebf1470..90485807192f1 100644 --- a/src/vs/workbench/contrib/chat/browser/tools/languageModelToolsService.ts +++ b/src/vs/workbench/contrib/chat/browser/tools/languageModelToolsService.ts @@ -995,7 +995,8 @@ export class LanguageModelToolsService extends Disposable implements ILanguageMo return { autoConfirmed }; } if (assessment?.risk === ToolRiskLevel.Red) { - const explanation = assessment.explanation.trim() || 'The action was assessed as potentially destructive or irreversible.'; + const fallbackExplanation = localize('autopilotRiskSkipFallback', "The action was assessed as potentially destructive or irreversible."); + const explanation = assessment.explanation.trim() || fallbackExplanation; this._logService.info(`[LanguageModelToolsService#invokeTool] Autopilot skipping high-risk tool ${tool.data.id}: ${explanation}`); return { autoConfirmed: { type: ToolConfirmKind.Skipped }, skipExplanation: explanation }; }