From 96ce95c7c09dcde6e794bffd2a844d0936839e21 Mon Sep 17 00:00:00 2001
From: weigang <guweigang@bullsoft.org>
Date: Wed, 6 May 2026 09:58:45 +0800
Subject: [PATCH 01/10] config: add OpenAI gateway settings

---
 examples/paseo-relay/paseo-relay.toml |   5 +
 src/config.v                          | 595 +++++++++++++++++++++-----
 src/multi_server_runtime_config.v     |  46 +-
 src/provider_spec.v                   |   1 +
 src/server_logic_test.v               |   4 +-
 5 files changed, 546 insertions(+), 105 deletions(-)

diff --git a/examples/paseo-relay/paseo-relay.toml b/examples/paseo-relay/paseo-relay.toml
index ddb2ebf..320cd61 100644
--- a/examples/paseo-relay/paseo-relay.toml
+++ b/examples/paseo-relay/paseo-relay.toml
@@ -24,6 +24,11 @@ port = 19901
 root = "${paths.root}/examples/paseo-relay"
 executor = "vjsx"
 app = "${paths.vjsx_app}"
+websocket_affinity.enabled = false
+websocket_affinity.source = "app"
+websocket_affinity.key = "serverId"
+websocket_affinity.scope = "lane"
+websocket_affinity.fallback = "reject"
 websocket_actor.enabled = true
 websocket_actor.fallback = "unkeyed"
 websocket_actor.queue_timeout_ms = 30000
diff --git a/src/config.v b/src/config.v
index 99b09c5..4ef59ed 100644
--- a/src/config.v
+++ b/src/config.v
@@ -70,6 +70,24 @@ mut:
 	enable_network    bool     @[toml: 'enable_network']
 }
 
+struct PluginConfig {
+mut:
+	kind              string = 'vjsx'
+	entry             string
+	app_entry         string   @[toml: 'app_entry']
+	module_root       string   @[toml: 'module_root']
+	build_root        string   @[toml: 'build_root']
+	signature_root    string   @[toml: 'signature_root']
+	signature_include []string @[toml: 'signature_include']
+	signature_exclude []string @[toml: 'signature_exclude']
+	runtime_profile   string = 'script'   @[toml: 'runtime_profile']
+	thread_count      int    = 1      @[toml: 'thread_count']
+	max_requests      int      @[toml: 'max_requests']
+	enable_fs         bool     @[toml: 'enable_fs']
+	enable_process    bool     @[toml: 'enable_process']
+	enable_network    bool     @[toml: 'enable_network']
+}
+
 struct WebSocketAffinityConfig {
 mut:
 	enabled  bool
@@ -88,12 +106,12 @@ mut:
 
 struct WebSocketActorConfig {
 mut:
-	enabled          bool
-	sources          []WebSocketActorSourceConfig
-	fallback         string
-	queue_timeout_ms int      @[toml: 'queue_timeout_ms']
-	max_queue_per_key int     @[toml: 'max_queue_per_key']
-	events           []string
+	enabled           bool
+	sources           []WebSocketActorSourceConfig
+	fallback          string
+	queue_timeout_ms  int @[toml: 'queue_timeout_ms']
+	max_queue_per_key int @[toml: 'max_queue_per_key']
+	events            []string
 }
 
 struct AdminConfig {
@@ -157,6 +175,43 @@ mut:
 	flush_interval_ms  int    = 400    @[toml: 'flush_interval_ms']
 }
 
+struct OpenAIEndpointsConfig {
+mut:
+	models           bool = true @[toml: 'models']
+	chat_completions bool = true @[toml: 'chat_completions']
+	responses        bool = true @[toml: 'responses']
+	embeddings       bool @[toml: 'embeddings']
+}
+
+struct OpenAIBackendConfig {
+mut:
+	kind        string = 'openai_http'
+	base_url    string @[toml: 'base_url']
+	executor    string
+	api_key     string @[toml: 'api_key']
+	api_key_env string = 'OPENAI_API_KEY' @[toml: 'api_key_env']
+	timeout_ms  int    = 60000    @[toml: 'timeout_ms']
+}
+
+struct OpenAIRouteConfig {
+mut:
+	model          string
+	models         []string
+	backend        string
+	upstream_model string @[toml: 'upstream_model']
+}
+
+struct OpenAIConfig {
+mut:
+	enabled         bool
+	base_path       string = '/v1' @[toml: 'base_path']
+	default_backend string @[toml: 'default_backend']
+	plugin          string
+	endpoints       OpenAIEndpointsConfig
+	backends        map[string]OpenAIBackendConfig
+	routes          map[string]OpenAIRouteConfig
+}
+
 struct BridgeConfig {
 mut:
 	enabled   bool
@@ -204,47 +259,51 @@ mut:
 
 struct SiteConfig {
 mut:
-	project_root string @[toml: 'project_root']
-	host         string = '127.0.0.1'
-	port         int
-	app          string
-	worker_entry string
-	paths        PathsConfig
-	worker       WorkerConfig
-	executor     ExecutorConfig
-	php          PhpConfig
-	vjsx         VjsxConfig
+	project_root       string @[toml: 'project_root']
+	host               string = '127.0.0.1'
+	port               int
+	app                string
+	worker_entry       string
+	paths              PathsConfig
+	worker             WorkerConfig
+	executor           ExecutorConfig
+	php                PhpConfig
+	vjsx               VjsxConfig
+	plugins            map[string]PluginConfig
 	websocket_affinity WebSocketAffinityConfig @[toml: 'websocket_affinity']
-	websocket_actor WebSocketActorConfig @[toml: 'websocket_actor']
-	assets       AssetsConfig
-	runtime      RuntimeConfig
-	mcp          McpConfig
-	feishu       FeishuConfig
-	codex        CodexConfig
-	db           DbConfig
+	websocket_actor    WebSocketActorConfig    @[toml: 'websocket_actor']
+	assets             AssetsConfig
+	runtime            RuntimeConfig
+	mcp                McpConfig
+	feishu             FeishuConfig
+	codex              CodexConfig
+	openai             OpenAIConfig
+	db                 DbConfig
 }
 
 struct VhttpdConfig {
 mut:
-	server      ServerConfig
-	files       FilesConfig
-	paths       PathsConfig
-	worker      WorkerConfig
-	executor    ExecutorConfig
-	php         PhpConfig
-	vjsx        VjsxConfig
+	server             ServerConfig
+	files              FilesConfig
+	paths              PathsConfig
+	worker             WorkerConfig
+	executor           ExecutorConfig
+	php                PhpConfig
+	vjsx               VjsxConfig
+	plugins            map[string]PluginConfig
 	websocket_affinity WebSocketAffinityConfig @[toml: 'websocket_affinity']
-	websocket_actor WebSocketActorConfig @[toml: 'websocket_actor']
-	admin       AdminConfig
-	assets      AssetsConfig
-	runtime     RuntimeConfig
-	mcp         McpConfig
-	feishu      FeishuConfig
-	codex       CodexConfig
-	db          DbConfig
-	listeners   map[string]ListenerConfig
-	sites       map[string]SiteConfig
-	config_path string
+	websocket_actor    WebSocketActorConfig    @[toml: 'websocket_actor']
+	admin              AdminConfig
+	assets             AssetsConfig
+	runtime            RuntimeConfig
+	mcp                McpConfig
+	feishu             FeishuConfig
+	codex              CodexConfig
+	openai             OpenAIConfig
+	db                 DbConfig
+	listeners          map[string]ListenerConfig
+	sites              map[string]SiteConfig
+	config_path        string
 }
 
 fn default_vhttpd_config() VhttpdConfig {
@@ -346,11 +405,14 @@ fn load_vhttpd_config(args []string) !VhttpdConfig {
 	doc := toml.parse_text(text)!
 	decode_paths_config(doc, mut cfg)!
 	decode_feishu_config(doc, mut cfg)!
+	decode_openai_root_config(doc, mut cfg)!
+	decode_plugins_root_config(doc, mut cfg)!
 	if root_any := doc.value_opt('bridge') {
 		root := root_any.as_map()
-		if cfg.feishu.bridge.ws_url.trim_space() == '' && cfg.feishu.bridge.client_id.trim_space() == ''
-			&& cfg.feishu.bridge.token.trim_space() == '' && cfg.feishu.bridge.target_id.trim_space() == ''
-			&& !cfg.feishu.bridge.enabled {
+		if cfg.feishu.bridge.ws_url.trim_space() == ''
+			&& cfg.feishu.bridge.client_id.trim_space() == ''
+			&& cfg.feishu.bridge.token.trim_space() == ''
+			&& cfg.feishu.bridge.target_id.trim_space() == '' && !cfg.feishu.bridge.enabled {
 			cfg.feishu.bridge = decode_bridge_config_map(root)
 		}
 	}
@@ -424,6 +486,20 @@ fn decode_feishu_config(doc toml.Doc, mut cfg VhttpdConfig) ! {
 	cfg.feishu.apps = apps.clone()
 }
 
+fn decode_openai_root_config(doc toml.Doc, mut cfg VhttpdConfig) ! {
+	if root_any := doc.value_opt('openai') {
+		root := root_any.as_map()
+		cfg.openai = decode_openai_config_map(root)
+	}
+}
+
+fn decode_plugins_root_config(doc toml.Doc, mut cfg VhttpdConfig) ! {
+	if root_any := doc.value_opt('plugins') {
+		root := root_any.as_map()
+		cfg.plugins = decode_plugins_config_map(root)
+	}
+}
+
 fn toml_string_from_map(entry map[string]toml.Any, key string, default_val string) string {
 	return (entry[key] or { toml.Any(default_val) }).string()
 }
@@ -595,6 +671,62 @@ fn decode_vjsx_config_map(entry map[string]toml.Any) VjsxConfig {
 	return cfg
 }
 
+fn decode_plugin_config_map(entry map[string]toml.Any) PluginConfig {
+	mut cfg := PluginConfig{}
+	if 'kind' in entry {
+		cfg.kind = toml_string_from_map(entry, 'kind', cfg.kind)
+	}
+	if 'entry' in entry {
+		cfg.entry = toml_string_from_map(entry, 'entry', cfg.entry)
+	}
+	if 'app_entry' in entry {
+		cfg.app_entry = toml_string_from_map(entry, 'app_entry', cfg.app_entry)
+	}
+	if cfg.app_entry.trim_space() == '' && cfg.entry.trim_space() != '' {
+		cfg.app_entry = cfg.entry
+	}
+	if 'module_root' in entry {
+		cfg.module_root = toml_string_from_map(entry, 'module_root', cfg.module_root)
+	}
+	if 'build_root' in entry {
+		cfg.build_root = toml_string_from_map(entry, 'build_root', cfg.build_root)
+	}
+	if 'signature_root' in entry {
+		cfg.signature_root = toml_string_from_map(entry, 'signature_root', cfg.signature_root)
+	}
+	cfg.signature_include = toml_string_list_from_map(entry, 'signature_include')
+	cfg.signature_exclude = toml_string_list_from_map(entry, 'signature_exclude')
+	if 'runtime_profile' in entry {
+		cfg.runtime_profile = toml_string_from_map(entry, 'runtime_profile', cfg.runtime_profile)
+	}
+	if 'thread_count' in entry {
+		cfg.thread_count = toml_int_from_map(entry, 'thread_count', cfg.thread_count)
+	}
+	if 'max_requests' in entry {
+		cfg.max_requests = toml_int_from_map(entry, 'max_requests', cfg.max_requests)
+	}
+	if 'enable_fs' in entry {
+		cfg.enable_fs = toml_bool_from_map(entry, 'enable_fs', cfg.enable_fs)
+	}
+	if 'enable_process' in entry {
+		cfg.enable_process = toml_bool_from_map(entry, 'enable_process', cfg.enable_process)
+	}
+	if 'enable_network' in entry {
+		cfg.enable_network = toml_bool_from_map(entry, 'enable_network', cfg.enable_network)
+	}
+	return cfg
+}
+
+fn decode_plugins_config_map(entry map[string]toml.Any) map[string]PluginConfig {
+	mut plugins := map[string]PluginConfig{}
+	for name, value in entry {
+		if value is map[string]toml.Any {
+			plugins[name] = decode_plugin_config_map(value)
+		}
+	}
+	return plugins
+}
+
 fn decode_websocket_affinity_config_map(entry map[string]toml.Any) WebSocketAffinityConfig {
 	mut cfg := WebSocketAffinityConfig{}
 	if 'enabled' in entry {
@@ -793,6 +925,112 @@ fn decode_codex_config_map(entry map[string]toml.Any) CodexConfig {
 	return cfg
 }
 
+fn decode_openai_endpoints_config_map(entry map[string]toml.Any) OpenAIEndpointsConfig {
+	mut cfg := OpenAIEndpointsConfig{}
+	if 'models' in entry {
+		cfg.models = toml_bool_from_map(entry, 'models', cfg.models)
+	}
+	if 'chat_completions' in entry {
+		cfg.chat_completions = toml_bool_from_map(entry, 'chat_completions', cfg.chat_completions)
+	}
+	if 'responses' in entry {
+		cfg.responses = toml_bool_from_map(entry, 'responses', cfg.responses)
+	}
+	if 'embeddings' in entry {
+		cfg.embeddings = toml_bool_from_map(entry, 'embeddings', cfg.embeddings)
+	}
+	return cfg
+}
+
+fn decode_openai_backend_config_map(entry map[string]toml.Any) OpenAIBackendConfig {
+	mut cfg := OpenAIBackendConfig{}
+	if 'kind' in entry {
+		cfg.kind = toml_string_from_map(entry, 'kind', cfg.kind)
+	}
+	if 'base_url' in entry {
+		cfg.base_url = toml_string_from_map(entry, 'base_url', cfg.base_url)
+	}
+	if 'executor' in entry {
+		cfg.executor = toml_string_from_map(entry, 'executor', cfg.executor)
+	}
+	if 'api_key' in entry {
+		cfg.api_key = toml_string_from_map(entry, 'api_key', cfg.api_key)
+	}
+	if 'api_key_env' in entry {
+		cfg.api_key_env = toml_string_from_map(entry, 'api_key_env', cfg.api_key_env)
+	}
+	if 'timeout_ms' in entry {
+		cfg.timeout_ms = toml_int_from_map(entry, 'timeout_ms', cfg.timeout_ms)
+	}
+	return cfg
+}
+
+fn decode_openai_route_config_map(entry map[string]toml.Any) OpenAIRouteConfig {
+	mut cfg := OpenAIRouteConfig{}
+	if 'model' in entry {
+		cfg.model = toml_string_from_map(entry, 'model', cfg.model)
+	}
+	cfg.models = toml_string_list_from_map(entry, 'models')
+	if 'backend' in entry {
+		cfg.backend = toml_string_from_map(entry, 'backend', cfg.backend)
+	}
+	if 'upstream_model' in entry {
+		cfg.upstream_model = toml_string_from_map(entry, 'upstream_model', cfg.upstream_model)
+	}
+	return cfg
+}
+
+fn decode_openai_config_map(entry map[string]toml.Any) OpenAIConfig {
+	mut cfg := OpenAIConfig{}
+	if 'enabled' in entry {
+		cfg.enabled = toml_bool_from_map(entry, 'enabled', cfg.enabled)
+	}
+	if 'base_path' in entry {
+		cfg.base_path = toml_string_from_map(entry, 'base_path', cfg.base_path)
+	}
+	if 'default_backend' in entry {
+		cfg.default_backend = toml_string_from_map(entry, 'default_backend', cfg.default_backend)
+	}
+	if 'plugin' in entry {
+		cfg.plugin = toml_string_from_map(entry, 'plugin', cfg.plugin)
+	}
+	if endpoints_any := entry['endpoints'] {
+		if endpoints_any is map[string]toml.Any {
+			cfg.endpoints = decode_openai_endpoints_config_map(endpoints_any)
+		}
+	}
+	mut backends := map[string]OpenAIBackendConfig{}
+	if backends_any := entry['backends'] {
+		if backends_any is map[string]toml.Any {
+			for name, value in backends_any {
+				if value is map[string]toml.Any {
+					backends[name] = decode_openai_backend_config_map(value)
+				}
+			}
+		}
+	}
+	cfg.backends = backends.clone()
+	mut routes := map[string]OpenAIRouteConfig{}
+	if routes_any := entry['routes'] {
+		if routes_any is map[string]toml.Any {
+			for name, value in routes_any {
+				if value is map[string]toml.Any {
+					mut route := decode_openai_route_config_map(value)
+					if route.model.trim_space() == '' {
+						route.model = name
+					}
+					if route.models.len == 0 && route.model.trim_space() != '' {
+						route.models = [route.model]
+					}
+					routes[name] = route
+				}
+			}
+		}
+	}
+	cfg.routes = routes.clone()
+	return cfg
+}
+
 fn decode_bridge_config_map(entry map[string]toml.Any) BridgeConfig {
 	mut cfg := BridgeConfig{}
 	if 'enabled' in entry {
@@ -874,6 +1112,11 @@ fn decode_site_config_map(entry map[string]toml.Any) SiteConfig {
 			cfg.vjsx = decode_vjsx_config_map(vjsx_any)
 		}
 	}
+	if plugins_any := entry['plugins'] {
+		if plugins_any is map[string]toml.Any {
+			cfg.plugins = decode_plugins_config_map(plugins_any)
+		}
+	}
 	if websocket_affinity_any := entry['websocket_affinity'] {
 		if websocket_affinity_any is map[string]toml.Any {
 			cfg.websocket_affinity = decode_websocket_affinity_config_map(websocket_affinity_any)
@@ -909,6 +1152,11 @@ fn decode_site_config_map(entry map[string]toml.Any) SiteConfig {
 			cfg.codex = decode_codex_config_map(codex_any)
 		}
 	}
+	if openai_any := entry['openai'] {
+		if openai_any is map[string]toml.Any {
+			cfg.openai = decode_openai_config_map(openai_any)
+		}
+	}
 	return cfg
 }
 
@@ -942,8 +1190,8 @@ fn resolve_config_variables(mut cfg VhttpdConfig, config_path string) ! {
 	for _ in 0 .. max_passes {
 		mut changed := false
 		mut vars := build_config_variable_map(cfg)
-		cfg.paths.root, changed = expand_config_string(cfg.paths.root, 'paths', vars, env_map,
-			changed)!
+		cfg.paths.root, changed = expand_config_string(cfg.paths.root, 'paths', vars,
+			env_map, changed)!
 		vars['paths.root'] = resolve_config_path(base_dir, cfg.paths.root)
 		mut next_paths := map[string]string{}
 		for key, value in cfg.paths.values {
@@ -954,30 +1202,30 @@ fn resolve_config_variables(mut cfg VhttpdConfig, config_path string) ! {
 			}
 		}
 		cfg.paths.values = next_paths.clone()
-		cfg.server.host, changed = expand_config_string(cfg.server.host, 'server', vars, env_map,
-			changed)!
-		cfg.files.event_log, changed = expand_config_string(cfg.files.event_log, 'files', vars,
+		cfg.server.host, changed = expand_config_string(cfg.server.host, 'server', vars,
 			env_map, changed)!
-		cfg.files.pid_file, changed = expand_config_string(cfg.files.pid_file, 'files', vars, env_map,
-			changed)!
-		cfg.worker.cmd, changed = expand_config_string(cfg.worker.cmd, 'worker', vars, env_map,
-			changed)!
-		cfg.worker.socket, changed = expand_config_string(cfg.worker.socket, 'worker', vars, env_map,
-			changed)!
-		cfg.worker.socket_prefix, changed = expand_config_string(cfg.worker.socket_prefix, 'worker',
+		cfg.files.event_log, changed = expand_config_string(cfg.files.event_log, 'files',
 			vars, env_map, changed)!
-		cfg.executor.kind, changed = expand_config_string(cfg.executor.kind, 'executor', vars, env_map,
-			changed)!
-		cfg.vjsx.app_entry, changed = expand_config_string(cfg.vjsx.app_entry, 'vjsx', vars, env_map,
-			changed)!
-		cfg.vjsx.module_root, changed = expand_config_string(cfg.vjsx.module_root, 'vjsx', vars,
-			env_map, changed)!
-		cfg.vjsx.build_root, changed = expand_config_string(cfg.vjsx.build_root, 'vjsx', vars,
+		cfg.files.pid_file, changed = expand_config_string(cfg.files.pid_file, 'files',
+			vars, env_map, changed)!
+		cfg.worker.cmd, changed = expand_config_string(cfg.worker.cmd, 'worker', vars,
 			env_map, changed)!
-		cfg.vjsx.signature_root, changed = expand_config_string(cfg.vjsx.signature_root, 'vjsx',
+		cfg.worker.socket, changed = expand_config_string(cfg.worker.socket, 'worker',
+			vars, env_map, changed)!
+		cfg.worker.socket_prefix, changed = expand_config_string(cfg.worker.socket_prefix,
+			'worker', vars, env_map, changed)!
+		cfg.executor.kind, changed = expand_config_string(cfg.executor.kind, 'executor',
+			vars, env_map, changed)!
+		cfg.vjsx.app_entry, changed = expand_config_string(cfg.vjsx.app_entry, 'vjsx',
 			vars, env_map, changed)!
-		cfg.vjsx.runtime_profile, changed = expand_config_string(cfg.vjsx.runtime_profile, 'vjsx',
+		cfg.vjsx.module_root, changed = expand_config_string(cfg.vjsx.module_root, 'vjsx',
 			vars, env_map, changed)!
+		cfg.vjsx.build_root, changed = expand_config_string(cfg.vjsx.build_root, 'vjsx',
+			vars, env_map, changed)!
+		cfg.vjsx.signature_root, changed = expand_config_string(cfg.vjsx.signature_root,
+			'vjsx', vars, env_map, changed)!
+		cfg.vjsx.runtime_profile, changed = expand_config_string(cfg.vjsx.runtime_profile,
+			'vjsx', vars, env_map, changed)!
 		for i, raw in cfg.vjsx.signature_include {
 			next, c := expand_config_string(raw, 'vjsx', vars, env_map, false)!
 			if c {
@@ -992,6 +1240,60 @@ fn resolve_config_variables(mut cfg VhttpdConfig, config_path string) ! {
 				changed = true
 			}
 		}
+		mut next_plugins := map[string]PluginConfig{}
+		for name, plugin in cfg.plugins {
+			entry, entry_changed := expand_config_string(plugin.entry, 'plugins.${name}',
+				vars, env_map, false)!
+			app_entry, app_entry_changed := expand_config_string(plugin.app_entry, 'plugins.${name}',
+				vars, env_map, false)!
+			module_root, module_root_changed := expand_config_string(plugin.module_root,
+				'plugins.${name}', vars, env_map, false)!
+			build_root, build_root_changed := expand_config_string(plugin.build_root,
+				'plugins.${name}', vars, env_map, false)!
+			signature_root, signature_root_changed := expand_config_string(plugin.signature_root,
+				'plugins.${name}', vars, env_map, false)!
+			runtime_profile, runtime_profile_changed := expand_config_string(plugin.runtime_profile,
+				'plugins.${name}', vars, env_map, false)!
+			mut signature_include := plugin.signature_include.clone()
+			for i, raw in signature_include {
+				next, c := expand_config_string(raw, 'plugins.${name}', vars, env_map,
+					false)!
+				if c {
+					signature_include[i] = next
+					changed = true
+				}
+			}
+			mut signature_exclude := plugin.signature_exclude.clone()
+			for i, raw in signature_exclude {
+				next, c := expand_config_string(raw, 'plugins.${name}', vars, env_map,
+					false)!
+				if c {
+					signature_exclude[i] = next
+					changed = true
+				}
+			}
+			next_plugins[name] = PluginConfig{
+				kind:              plugin.kind
+				entry:             entry
+				app_entry:         app_entry
+				module_root:       module_root
+				build_root:        build_root
+				signature_root:    signature_root
+				signature_include: signature_include
+				signature_exclude: signature_exclude
+				runtime_profile:   runtime_profile
+				thread_count:      plugin.thread_count
+				max_requests:      plugin.max_requests
+				enable_fs:         plugin.enable_fs
+				enable_process:    plugin.enable_process
+				enable_network:    plugin.enable_network
+			}
+			if entry_changed || app_entry_changed || module_root_changed || build_root_changed
+				|| signature_root_changed || runtime_profile_changed {
+				changed = true
+			}
+		}
+		cfg.plugins = next_plugins.clone()
 		for i, raw in cfg.worker.sockets {
 			next, c := expand_config_string(raw, 'worker', vars, env_map, false)!
 			if c {
@@ -999,11 +1301,12 @@ fn resolve_config_variables(mut cfg VhttpdConfig, config_path string) ! {
 				changed = true
 			}
 		}
-		cfg.php.bin, changed = expand_config_string(cfg.php.bin, 'php', vars, env_map, changed)!
-		cfg.php.worker_entry, changed = expand_config_string(cfg.php.worker_entry, 'php', vars,
-			env_map, changed)!
-		cfg.php.app_entry, changed = expand_config_string(cfg.php.app_entry, 'php', vars, env_map,
+		cfg.php.bin, changed = expand_config_string(cfg.php.bin, 'php', vars, env_map,
 			changed)!
+		cfg.php.worker_entry, changed = expand_config_string(cfg.php.worker_entry, 'php',
+			vars, env_map, changed)!
+		cfg.php.app_entry, changed = expand_config_string(cfg.php.app_entry, 'php', vars,
+			env_map, changed)!
 		for i, raw in cfg.php.extensions {
 			next, c := expand_config_string(raw, 'php', vars, env_map, false)!
 			if c {
@@ -1027,26 +1330,26 @@ fn resolve_config_variables(mut cfg VhttpdConfig, config_path string) ! {
 			}
 		}
 		cfg.worker.env = next_env.clone()
-		cfg.admin.host, changed = expand_config_string(cfg.admin.host, 'admin', vars, env_map,
-			changed)!
-		cfg.admin.token, changed = expand_config_string(cfg.admin.token, 'admin', vars, env_map,
-			changed)!
-		cfg.assets.prefix, changed = expand_config_string(cfg.assets.prefix, 'assets', vars, env_map,
-			changed)!
-		cfg.assets.root, changed = expand_config_string(cfg.assets.root, 'assets', vars, env_map,
-			changed)!
-		cfg.assets.cache_control, changed = expand_config_string(cfg.assets.cache_control, 'assets',
+		cfg.admin.host, changed = expand_config_string(cfg.admin.host, 'admin', vars,
+			env_map, changed)!
+		cfg.admin.token, changed = expand_config_string(cfg.admin.token, 'admin', vars,
+			env_map, changed)!
+		cfg.assets.prefix, changed = expand_config_string(cfg.assets.prefix, 'assets',
 			vars, env_map, changed)!
-		cfg.runtime.timezone, changed = expand_config_string(cfg.runtime.timezone, 'runtime', vars,
+		cfg.assets.root, changed = expand_config_string(cfg.assets.root, 'assets', vars,
 			env_map, changed)!
-		cfg.feishu.open_base_url, changed = expand_config_string(cfg.feishu.open_base_url, 'feishu',
+		cfg.assets.cache_control, changed = expand_config_string(cfg.assets.cache_control,
+			'assets', vars, env_map, changed)!
+		cfg.runtime.timezone, changed = expand_config_string(cfg.runtime.timezone, 'runtime',
 			vars, env_map, changed)!
+		cfg.feishu.open_base_url, changed = expand_config_string(cfg.feishu.open_base_url,
+			'feishu', vars, env_map, changed)!
 		mut next_apps := map[string]FeishuAppConfig{}
 		for name, app_cfg in cfg.feishu.apps {
-			app_id, app_id_changed := expand_config_string(app_cfg.app_id, 'feishu.${name}', vars, env_map,
-				false)!
-			app_secret, app_secret_changed := expand_config_string(app_cfg.app_secret, 'feishu.${name}',
+			app_id, app_id_changed := expand_config_string(app_cfg.app_id, 'feishu.${name}',
 				vars, env_map, false)!
+			app_secret, app_secret_changed := expand_config_string(app_cfg.app_secret,
+				'feishu.${name}', vars, env_map, false)!
 			next_apps[name] = FeishuAppConfig{
 				app_id:     app_id
 				app_secret: app_secret
@@ -1058,27 +1361,86 @@ fn resolve_config_variables(mut cfg VhttpdConfig, config_path string) ! {
 		cfg.feishu.apps = next_apps.clone()
 
 		// codex
-		cfg.codex.url, changed = expand_config_string(cfg.codex.url, 'codex', vars, env_map, changed)!
-		cfg.codex.model, changed = expand_config_string(cfg.codex.model, 'codex', vars, env_map,
-			changed)!
-		cfg.codex.effort, changed = expand_config_string(cfg.codex.effort, 'codex', vars, env_map,
-			changed)!
-		cfg.codex.cwd, changed = expand_config_string(cfg.codex.cwd, 'codex', vars, env_map, changed)!
-		cfg.codex.approval_policy, changed = expand_config_string(cfg.codex.approval_policy, 'codex',
-			vars, env_map, changed)!
-		cfg.codex.sandbox, changed = expand_config_string(cfg.codex.sandbox, 'codex', vars, env_map,
-			changed)!
-		cfg.feishu.bridge.ws_url, changed = expand_config_string(cfg.feishu.bridge.ws_url, 'feishu.bridge', vars, env_map,
+		cfg.codex.url, changed = expand_config_string(cfg.codex.url, 'codex', vars, env_map,
 			changed)!
-		cfg.feishu.bridge.client_id, changed = expand_config_string(cfg.feishu.bridge.client_id, 'feishu.bridge', vars,
+		cfg.codex.model, changed = expand_config_string(cfg.codex.model, 'codex', vars,
 			env_map, changed)!
-		cfg.feishu.bridge.token, changed = expand_config_string(cfg.feishu.bridge.token, 'feishu.bridge', vars, env_map,
-			changed)!
-		cfg.feishu.bridge.target_id, changed = expand_config_string(cfg.feishu.bridge.target_id, 'feishu.bridge', vars,
-			env_map, changed)!
-
-		cfg.feishu.bridge.target_id, changed = expand_config_string(cfg.feishu.bridge.target_id, 'feishu.bridge', vars,
+		cfg.codex.effort, changed = expand_config_string(cfg.codex.effort, 'codex', vars,
 			env_map, changed)!
+		cfg.codex.cwd, changed = expand_config_string(cfg.codex.cwd, 'codex', vars, env_map,
+			changed)!
+		cfg.codex.approval_policy, changed = expand_config_string(cfg.codex.approval_policy,
+			'codex', vars, env_map, changed)!
+		cfg.codex.sandbox, changed = expand_config_string(cfg.codex.sandbox, 'codex',
+			vars, env_map, changed)!
+		cfg.openai.base_path, changed = expand_config_string(cfg.openai.base_path, 'openai',
+			vars, env_map, changed)!
+		cfg.openai.default_backend, changed = expand_config_string(cfg.openai.default_backend,
+			'openai', vars, env_map, changed)!
+		cfg.openai.plugin, changed = expand_config_string(cfg.openai.plugin, 'openai',
+			vars, env_map, changed)!
+		mut next_openai_backends := map[string]OpenAIBackendConfig{}
+		for name, backend in cfg.openai.backends {
+			base_url, base_url_changed := expand_config_string(backend.base_url, 'openai.backends.${name}',
+				vars, env_map, false)!
+			api_key, api_key_changed := expand_config_string(backend.api_key, 'openai.backends.${name}',
+				vars, env_map, false)!
+			executor, executor_changed := expand_config_string(backend.executor, 'openai.backends.${name}',
+				vars, env_map, false)!
+			api_key_env, api_key_env_changed := expand_config_string(backend.api_key_env,
+				'openai.backends.${name}', vars, env_map, false)!
+			next_openai_backends[name] = OpenAIBackendConfig{
+				kind:        backend.kind
+				base_url:    base_url
+				executor:    executor
+				api_key:     api_key
+				api_key_env: api_key_env
+				timeout_ms:  backend.timeout_ms
+			}
+			if base_url_changed || executor_changed || api_key_changed || api_key_env_changed {
+				changed = true
+			}
+		}
+		cfg.openai.backends = next_openai_backends.clone()
+		mut next_openai_routes := map[string]OpenAIRouteConfig{}
+		for name, route in cfg.openai.routes {
+			model, model_changed := expand_config_string(route.model, 'openai.routes.${name}',
+				vars, env_map, false)!
+			backend, backend_changed := expand_config_string(route.backend, 'openai.routes.${name}',
+				vars, env_map, false)!
+			upstream_model, upstream_model_changed := expand_config_string(route.upstream_model,
+				'openai.routes.${name}', vars, env_map, false)!
+			mut models := route.models.clone()
+			for i, raw in models {
+				next, c := expand_config_string(raw, 'openai.routes.${name}', vars, env_map,
+					false)!
+				if c {
+					models[i] = next
+					changed = true
+				}
+			}
+			next_openai_routes[name] = OpenAIRouteConfig{
+				model:          model
+				models:         models
+				backend:        backend
+				upstream_model: upstream_model
+			}
+			if model_changed || backend_changed || upstream_model_changed {
+				changed = true
+			}
+		}
+		cfg.openai.routes = next_openai_routes.clone()
+		cfg.feishu.bridge.ws_url, changed = expand_config_string(cfg.feishu.bridge.ws_url,
+			'feishu.bridge', vars, env_map, changed)!
+		cfg.feishu.bridge.client_id, changed = expand_config_string(cfg.feishu.bridge.client_id,
+			'feishu.bridge', vars, env_map, changed)!
+		cfg.feishu.bridge.token, changed = expand_config_string(cfg.feishu.bridge.token,
+			'feishu.bridge', vars, env_map, changed)!
+		cfg.feishu.bridge.target_id, changed = expand_config_string(cfg.feishu.bridge.target_id,
+			'feishu.bridge', vars, env_map, changed)!
+
+		cfg.feishu.bridge.target_id, changed = expand_config_string(cfg.feishu.bridge.target_id,
+			'feishu.bridge', vars, env_map, changed)!
 
 		if !changed {
 			resolve_config_paths(mut cfg, config_path)
@@ -1149,6 +1511,14 @@ fn resolve_config_paths(mut cfg VhttpdConfig, config_path string) {
 	cfg.vjsx.module_root = resolve_config_path(cfg.paths.root, cfg.vjsx.module_root)
 	cfg.vjsx.build_root = resolve_config_path(cfg.paths.root, cfg.vjsx.build_root)
 	cfg.vjsx.signature_root = resolve_config_path(cfg.paths.root, cfg.vjsx.signature_root)
+	for name, mut plugin in cfg.plugins {
+		plugin.entry = resolve_config_path(cfg.paths.root, plugin.entry)
+		plugin.app_entry = resolve_config_path(cfg.paths.root, plugin.app_entry)
+		plugin.module_root = resolve_config_path(cfg.paths.root, plugin.module_root)
+		plugin.build_root = resolve_config_path(cfg.paths.root, plugin.build_root)
+		plugin.signature_root = resolve_config_path(cfg.paths.root, plugin.signature_root)
+		cfg.plugins[name] = plugin
+	}
 	cfg.assets.root = resolve_config_path(cfg.paths.root, cfg.assets.root)
 	cfg.codex.cwd = resolve_config_path(cfg.paths.root, cfg.codex.cwd)
 }
@@ -1199,6 +1569,10 @@ fn build_config_variable_map(cfg VhttpdConfig) map[string]string {
 		'feishu.bridge.client_id':        cfg.feishu.bridge.client_id
 		'feishu.bridge.token':            cfg.feishu.bridge.token
 		'feishu.bridge.target_id':        cfg.feishu.bridge.target_id
+		'openai.enabled':                 '${cfg.openai.enabled}'
+		'openai.base_path':               cfg.openai.base_path
+		'openai.default_backend':         cfg.openai.default_backend
+		'openai.plugin':                  cfg.openai.plugin
 	}
 	for key, value in cfg.paths.values {
 		vars['paths.${key}'] = value
@@ -1213,6 +1587,25 @@ fn build_config_variable_map(cfg VhttpdConfig) map[string]string {
 		vars['feishu.${name}.app_id'] = app_cfg.app_id
 		vars['feishu.${name}.app_secret'] = app_cfg.app_secret
 	}
+	for name, backend in cfg.openai.backends {
+		vars['openai.backends.${name}.kind'] = backend.kind
+		vars['openai.backends.${name}.base_url'] = backend.base_url
+		vars['openai.backends.${name}.api_key_env'] = backend.api_key_env
+	}
+	for name, route in cfg.openai.routes {
+		vars['openai.routes.${name}.model'] = route.model
+		vars['openai.routes.${name}.backend'] = route.backend
+		vars['openai.routes.${name}.upstream_model'] = route.upstream_model
+	}
+	for name, plugin in cfg.plugins {
+		vars['plugins.${name}.kind'] = plugin.kind
+		vars['plugins.${name}.entry'] = plugin.entry
+		vars['plugins.${name}.app_entry'] = plugin.app_entry
+		vars['plugins.${name}.module_root'] = plugin.module_root
+		vars['plugins.${name}.build_root'] = plugin.build_root
+		vars['plugins.${name}.signature_root'] = plugin.signature_root
+		vars['plugins.${name}.runtime_profile'] = plugin.runtime_profile
+	}
 	return vars
 }
 
diff --git a/src/multi_server_runtime_config.v b/src/multi_server_runtime_config.v
index 8bbc1fb..ed783d6 100644
--- a/src/multi_server_runtime_config.v
+++ b/src/multi_server_runtime_config.v
@@ -166,6 +166,13 @@ fn merge_vjsx_config(base VjsxConfig, override VjsxConfig) VjsxConfig {
 	return cfg
 }
 
+fn merge_plugins_config(base map[string]PluginConfig, override map[string]PluginConfig) map[string]PluginConfig {
+	if override.len == 0 {
+		return base.clone()
+	}
+	return override.clone()
+}
+
 fn merge_websocket_affinity_config(base WebSocketAffinityConfig, override WebSocketAffinityConfig) WebSocketAffinityConfig {
 	defaults := default_vhttpd_config().websocket_affinity
 	mut cfg := base
@@ -337,6 +344,39 @@ fn merge_bridge_config(base BridgeConfig, override BridgeConfig) BridgeConfig {
 	return cfg
 }
 
+fn merge_openai_config(base OpenAIConfig, override OpenAIConfig) OpenAIConfig {
+	defaults := default_vhttpd_config().openai
+	mut cfg := base
+	if override.enabled != defaults.enabled {
+		cfg.enabled = override.enabled
+	}
+	if override.base_path != defaults.base_path {
+		cfg.base_path = override.base_path
+	}
+	if override.default_backend != defaults.default_backend {
+		cfg.default_backend = override.default_backend
+	}
+	if override.endpoints.models != defaults.endpoints.models {
+		cfg.endpoints.models = override.endpoints.models
+	}
+	if override.endpoints.chat_completions != defaults.endpoints.chat_completions {
+		cfg.endpoints.chat_completions = override.endpoints.chat_completions
+	}
+	if override.endpoints.responses != defaults.endpoints.responses {
+		cfg.endpoints.responses = override.endpoints.responses
+	}
+	if override.endpoints.embeddings != defaults.endpoints.embeddings {
+		cfg.endpoints.embeddings = override.endpoints.embeddings
+	}
+	if override.backends.len > 0 {
+		cfg.backends = override.backends.clone()
+	}
+	if override.routes.len > 0 {
+		cfg.routes = override.routes.clone()
+	}
+	return cfg
+}
+
 fn site_config_as_vhttpd_config(global_cfg VhttpdConfig, site_cfg SiteConfig) VhttpdConfig {
 	mut cfg := global_cfg
 	cfg.listeners = map[string]ListenerConfig{}
@@ -348,6 +388,7 @@ fn site_config_as_vhttpd_config(global_cfg VhttpdConfig, site_cfg SiteConfig) Vh
 		env_map := map[string]string{}
 		project_root, _ = expand_config_string(project_root, '', global_vars, env_map,
 			false) or { site_cfg.project_root, false }
+		project_root = resolve_config_path(global_cfg.paths.root, project_root)
 		cfg.paths = PathsConfig{
 			root:   project_root
 			values: cfg.paths.values.clone()
@@ -357,10 +398,10 @@ fn site_config_as_vhttpd_config(global_cfg VhttpdConfig, site_cfg SiteConfig) Vh
 	cfg.executor = merge_executor_config(global_cfg.executor, site_cfg.executor, site_cfg)
 	cfg.php = merge_php_config(global_cfg.php, site_cfg.php)
 	cfg.vjsx = merge_vjsx_config(global_cfg.vjsx, site_cfg.vjsx)
+	cfg.plugins = merge_plugins_config(global_cfg.plugins, site_cfg.plugins)
 	cfg.websocket_affinity = merge_websocket_affinity_config(global_cfg.websocket_affinity,
 		site_cfg.websocket_affinity)
-	cfg.websocket_actor = merge_websocket_actor_config(global_cfg.websocket_actor,
-		site_cfg.websocket_actor)
+	cfg.websocket_actor = merge_websocket_actor_config(global_cfg.websocket_actor, site_cfg.websocket_actor)
 	if site_cfg.worker_entry.trim_space() != '' && cfg.executor.kind == 'php'
 		&& cfg.php.worker_entry.trim_space() == '' {
 		cfg.php.worker_entry = site_cfg.worker_entry
@@ -381,6 +422,7 @@ fn site_config_as_vhttpd_config(global_cfg VhttpdConfig, site_cfg SiteConfig) Vh
 	cfg.mcp = merge_mcp_config(global_cfg.mcp, site_cfg.mcp)
 	cfg.feishu = merge_feishu_config(global_cfg.feishu, site_cfg.feishu)
 	cfg.codex = merge_codex_config(global_cfg.codex, site_cfg.codex)
+	cfg.openai = merge_openai_config(global_cfg.openai, site_cfg.openai)
 	cfg.feishu.bridge = merge_bridge_config(global_cfg.feishu.bridge, site_cfg.feishu.bridge)
 	cfg.config_path = global_cfg.config_path
 	return cfg
diff --git a/src/provider_spec.v b/src/provider_spec.v
index eb966eb..d60d113 100644
--- a/src/provider_spec.v
+++ b/src/provider_spec.v
@@ -3,6 +3,7 @@ module main
 pub enum ProviderRouteKind {
 	codex
 	feishu
+	openai
 	ollama
 	generic
 }
diff --git a/src/server_logic_test.v b/src/server_logic_test.v
index be70e3c..18db287 100644
--- a/src/server_logic_test.v
+++ b/src/server_logic_test.v
@@ -1940,7 +1940,7 @@ fn test_paseo_relay_example_config_enables_websocket_dispatch() {
 	config_path := os.join_path(os.dir(@FILE), '..', 'examples', 'paseo-relay', 'paseo-relay.toml')
 	cfg := load_vhttpd_config(['--config', config_path]) or { panic(err) }
 	assert cfg.worker.websocket_dispatch
-	assert cfg.sites['paseo_relay'].websocket_affinity.enabled
+	assert !cfg.sites['paseo_relay'].websocket_affinity.enabled
 	assert cfg.sites['paseo_relay'].websocket_affinity.source == 'app'
 	assert cfg.sites['paseo_relay'].websocket_affinity.key == 'serverId'
 	assert cfg.sites['paseo_relay'].websocket_affinity.fallback == 'reject'
@@ -1954,7 +1954,7 @@ fn test_paseo_relay_example_config_enables_websocket_dispatch() {
 	}
 	assert runtime.listeners.len == 1
 	assert runtime.listeners[0].runtime_cfg.executor_plan.bootstrap.websocket_dispatch_mode
-	assert runtime.listeners[0].site_cfg.websocket_affinity.enabled
+	assert !runtime.listeners[0].site_cfg.websocket_affinity.enabled
 	assert runtime.listeners[0].site_cfg.websocket_affinity.key == 'serverId'
 	assert runtime.listeners[0].site_cfg.websocket_actor.enabled
 	assert runtime.listeners[0].site_cfg.websocket_actor.sources.len == 3

From 406ff943021d893c26dae24439399f0c8301e8da Mon Sep 17 00:00:00 2001
From: weigang <guweigang@bullsoft.org>
Date: Wed, 6 May 2026 09:58:57 +0800
Subject: [PATCH 02/10] openai: add aggregation gateway runtime

---
 src/app_runtime_builder.v      |   12 +-
 src/command_executor.v         |   20 +-
 src/inproc_vjsx_executor.v     |  374 ++++-
 src/main.v                     |  153 +-
 src/openai_runtime.v           | 2709 ++++++++++++++++++++++++++++++++
 src/openai_runtime_test.v      |  388 +++++
 src/plugin_runtime.v           |  118 ++
 src/server_shutdown_hooks.v    |    1 +
 src/worker_backend_transport.v |   30 +-
 v.mod                          |    1 +
 10 files changed, 3664 insertions(+), 142 deletions(-)
 create mode 100644 src/openai_runtime.v
 create mode 100644 src/openai_runtime_test.v
 create mode 100644 src/plugin_runtime.v

diff --git a/src/app_runtime_builder.v b/src/app_runtime_builder.v
index 70ff548..d96a6d8 100644
--- a/src/app_runtime_builder.v
+++ b/src/app_runtime_builder.v
@@ -64,6 +64,8 @@ fn build_app_runtime(provider_settings ProviderRuntimeSettings, executor_plan Lo
 		admin_on_data_plane:                      !build_cfg.admin_enabled
 		admin_token:                              build_cfg.admin_token
 		runtime_config_json:                      json.encode(cfg)
+		plugin_configs:                           cfg.plugins.clone()
+		plugin_vjsx:                              build_vjsx_plugin_runtimes(cfg.plugins)
 		assets_enabled:                           build_cfg.assets_enabled
 		assets_prefix:                            build_cfg.assets_prefix
 		assets_root:                              build_cfg.assets_root
@@ -79,6 +81,14 @@ fn build_app_runtime(provider_settings ProviderRuntimeSettings, executor_plan Lo
 		feishu_reconnect_delay_ms:                provider_settings.feishu.reconnect_delay_ms
 		feishu_token_refresh_skew_seconds:        provider_settings.feishu.token_refresh_skew_seconds
 		feishu_recent_event_limit:                provider_settings.feishu.recent_event_limit
+		openai_enabled:                           cfg.openai.enabled
+		openai_base_path:                         cfg.openai.base_path
+		openai_default_backend:                   cfg.openai.default_backend
+		openai_plugin:                            cfg.openai.plugin
+		openai_endpoints:                         cfg.openai.endpoints
+		openai_backends:                          cfg.openai.backends.clone()
+		openai_routes:                            cfg.openai.routes.clone()
+		openai_responses:                         new_memory_state_store[OpenAIResponseRecord]()
 		websocket_upstream_recent_dispatch_limit: 50
 		auto_start_dynamic_upstreams:             true
 		feishu_static_apps:                       provider_settings.feishu.apps.clone()
@@ -97,7 +107,7 @@ fn build_app_runtime(provider_settings ProviderRuntimeSettings, executor_plan Lo
 			specs:    map[string]ProviderSpec{}
 		}
 		ollama_enabled:                           provider_settings.ollama_enabled
-		db_runtime:                                build_db_runtime(provider_settings.db)
+		db_runtime:                               build_db_runtime(provider_settings.db)
 		fixture_websocket_runtime:                map[string]FixtureWebSocketUpstreamRuntime{}
 		websocket_upstream_recent_activities:     []WebSocketUpstreamActivitySnapshot{}
 		provider_instance_specs:                  map[string]ProviderInstanceSpec{}
diff --git a/src/command_executor.v b/src/command_executor.v
index 2cba5d2..5cfe5d2 100644
--- a/src/command_executor.v
+++ b/src/command_executor.v
@@ -100,10 +100,12 @@ pub fn (mut exec CommandExecutor) execute(source_activity_id string, ctx Dispatc
 	for command in commands {
 		mut next := command
 		mut metadata := command.metadata.clone()
-		if (metadata['trace_id'] or { '' }).trim_space() == '' && ctx.session.trace_id.trim_space() != '' {
+		if (metadata['trace_id'] or { '' }).trim_space() == ''
+			&& ctx.session.trace_id.trim_space() != '' {
 			metadata['trace_id'] = ctx.session.trace_id
 		}
-		if (metadata['request_id'] or { '' }).trim_space() == '' && ctx.session.request_id.trim_space() != '' {
+		if (metadata['request_id'] or { '' }).trim_space() == ''
+			&& ctx.session.request_id.trim_space() != '' {
 			metadata['request_id'] = ctx.session.request_id
 		}
 		next.metadata = metadata.clone()
@@ -143,10 +145,13 @@ pub fn (mut exec CommandExecutor) execute_websocket_upstream_commands(source_act
 	log.info('[ws-cmd] executing ${commands.len} commands from ${source_activity_id}')
 	for index, command in commands {
 		normalized := NormalizedCommand.from_worker_command(command)
-		log.info('[ws-cmd]   #${index}: type=${normalized.routing_type()} kind=${normalized.kind} event=${normalized.normalized_event('')} provider=${normalized.normalized_provider('')} stream_id=${normalized.correlation.stream_id} trace_id=${normalized.metadata['trace_id'] or { '' }} request_id=${normalized.correlation.request_id}')
+		log.info('[ws-cmd]   #${index}: type=${normalized.routing_type()} kind=${normalized.kind} event=${normalized.normalized_event('')} provider=${normalized.normalized_provider('')} stream_id=${normalized.correlation.stream_id} trace_id=${normalized.metadata['trace_id'] or {
+			''
+		}} request_id=${normalized.correlation.request_id}')
 		mut snapshot := exec.new_snapshot(source_activity_id, index, command)
 		if normalized.is_provider_instance_command() {
-			handled, exec_err := exec.execute_provider_instance_command(normalized, mut snapshot)
+			handled, exec_err := exec.execute_provider_instance_command(normalized, mut
+				snapshot)
 			if handled {
 				if exec_err != '' {
 					last_error = exec_err
@@ -156,7 +161,8 @@ pub fn (mut exec CommandExecutor) execute_websocket_upstream_commands(source_act
 			}
 		}
 		route := exec.route_from_normalized(normalized)
-		handled, exec_err := exec.execute_routed_command(route, command, normalized, mut snapshot)
+		handled, exec_err := exec.execute_routed_command(route, command, normalized, mut
+			snapshot)
 		if handled {
 			if exec_err != '' {
 				last_error = exec_err
@@ -231,11 +237,15 @@ fn (mut exec CommandExecutor) execute_routed_command(route ProviderRouteKind, co
 				false, ''
 			}
 		}
+		.openai {
+			false, ''
+		}
 		.generic {
 			exec.generic.execute(command, normalized, mut snapshot)
 		}
 	}
 }
+
 // Unified App-level entrypoint, now backed by CommandExecutor object.
 fn (mut app App) execute_command_envelopes(source_activity_id string, ctx DispatchContext, commands []WorkerWebSocketUpstreamCommand) ([]WebSocketUpstreamCommandActivity, string) {
 	mut executor := CommandExecutor.new(mut app)
diff --git a/src/inproc_vjsx_executor.v b/src/inproc_vjsx_executor.v
index 5a6be39..2bd27b1 100644
--- a/src/inproc_vjsx_executor.v
+++ b/src/inproc_vjsx_executor.v
@@ -1306,6 +1306,7 @@ fn websocket_actor_value_from_source(frame WorkerWebSocketFrame, source WebSocke
 		'metadata' { (frame.metadata[key_name] or { '' }).trim_space() }
 		else { (frame.query[key_name] or { '' }).trim_space() }
 	}
+
 	if value == '' {
 		return WebSocketActorDecision{}
 	}
@@ -1492,8 +1493,7 @@ fn (e InProcVjsxExecutor) resolve_websocket_dispatch_affinity(frame WorkerWebSoc
 		}
 		return '', affinity.priority, false
 	}
-	return affinity_key, affinity.priority, websocket_should_pin_affinity_lane(frame,
-		affinity_key)
+	return affinity_key, affinity.priority, websocket_should_pin_affinity_lane(frame, affinity_key)
 }
 
 fn websocket_should_pin_affinity_lane(frame WorkerWebSocketFrame, affinity_key string) bool {
@@ -1607,37 +1607,35 @@ fn (e InProcVjsxExecutor) lane_worker_by_id(lane_id string) ?VjsxLaneWorker {
 	return none
 }
 
-fn (state &VjsxExecutorState) schedule_lane_wakeup(lane_id string, wake_at_ms i64, generation u64) {
+fn (mut state VjsxExecutorState) schedule_lane_wakeup(lane_id string, wake_at_ms i64, generation u64) {
 	if isnil(state) || lane_id.trim_space() == '' {
 		return
 	}
-	mut state_ref := state
-	state_ref.mu.@lock()
-	state_ref.lane_wakeup_by_id[lane_id] = VjsxLaneWakeup{
+	state.mu.@lock()
+	state.lane_wakeup_by_id[lane_id] = VjsxLaneWakeup{
 		wake_at_ms: wake_at_ms
 		generation: generation
 	}
-	state_ref.mu.unlock()
+	state.mu.unlock()
 	go state.deliver_lane_wakeup(lane_id, wake_at_ms, generation)
 }
 
-fn (state &VjsxExecutorState) cancel_lane_wakeup(lane_id string, generation u64) {
+fn (mut state VjsxExecutorState) cancel_lane_wakeup(lane_id string, generation u64) {
 	if isnil(state) || lane_id.trim_space() == '' {
 		return
 	}
-	mut state_ref := state
-	state_ref.mu.@lock()
-	current_wakeup := state_ref.lane_wakeup_by_id[lane_id] or {
-		state_ref.mu.unlock()
+	state.mu.@lock()
+	current_wakeup := state.lane_wakeup_by_id[lane_id] or {
+		state.mu.unlock()
 		return
 	}
 	if current_wakeup.generation == generation {
-		state_ref.lane_wakeup_by_id.delete(lane_id)
+		state.lane_wakeup_by_id.delete(lane_id)
 	}
-	state_ref.mu.unlock()
+	state.mu.unlock()
 }
 
-fn (state &VjsxExecutorState) deliver_lane_wakeup(lane_id string, wake_at_ms i64, generation u64) {
+fn (mut state VjsxExecutorState) deliver_lane_wakeup(lane_id string, wake_at_ms i64, generation u64) {
 	if isnil(state) || lane_id.trim_space() == '' {
 		return
 	}
@@ -1645,18 +1643,17 @@ fn (state &VjsxExecutorState) deliver_lane_wakeup(lane_id string, wake_at_ms i64
 	if delay_ms > 0 {
 		time.sleep(time.millisecond * int(delay_ms))
 	}
-	mut state_ref := state
-	state_ref.mu.@lock()
-	current_wakeup := state_ref.lane_wakeup_by_id[lane_id] or {
-		state_ref.mu.unlock()
+	state.mu.@lock()
+	current_wakeup := state.lane_wakeup_by_id[lane_id] or {
+		state.mu.unlock()
 		return
 	}
 	if current_wakeup.wake_at_ms != wake_at_ms || current_wakeup.generation != generation {
-		state_ref.mu.unlock()
+		state.mu.unlock()
 		return
 	}
-	state_ref.lane_wakeup_by_id.delete(lane_id)
-	state_ref.mu.unlock()
+	state.lane_wakeup_by_id.delete(lane_id)
+	state.mu.unlock()
 	executor := InProcVjsxExecutor{
 		state: state
 	}
@@ -2006,7 +2003,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c
 				}
 				log.debug('[vhttpd] lane worker recv lane=${lane_id} event=${task.frame.event} request_id=${task.frame.request_id} trace_id=${task.frame.trace_id}')
 				lane := worker_executor.lane_snapshot_by_id(lane_id) or {
-					err_msg = inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_lane_not_found')
+					err_msg = inproc_vjsx_normalize_error_message(err.msg(),
+						'inproc_vjsx_executor_lane_not_found')
 					log.debug('[vhttpd] lane worker reply_error lane=${lane_id} event=${task.frame.event} request_id=${task.frame.request_id} error=${err_msg}')
 					task.slot.mu.@lock()
 					task.slot.result = InProcVjsxWebSocketTaskResult{
@@ -2020,7 +2018,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c
 				}
 				response_json = worker_executor.dispatch_websocket_callback_on_lane(mut task_app,
 					task.frame, lane) or {
-					err_msg = inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_websocket_dispatch_failed')
+					err_msg = inproc_vjsx_normalize_error_message(err.msg(),
+						'inproc_vjsx_executor_websocket_dispatch_failed')
 					eprintln('[vhttpd] websocket lane worker error lane=${lane_id} event=${task.frame.event} path=${task.frame.path} request_id=${task.frame.request_id} trace_id=${task.frame.trace_id} query=${task.frame.query} error=${err_msg}')
 					log.debug('[vhttpd] lane worker reply_error lane=${lane_id} event=${task.frame.event} request_id=${task.frame.request_id} error=${err_msg}')
 					task.slot.mu.@lock()
@@ -2049,7 +2048,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c
 					task.slot.mu.@lock()
 					task.slot.result = InProcVjsxLaneSnapshotTaskResult{
 						ok:    false
-						error: inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_lane_not_found')
+						error: inproc_vjsx_normalize_error_message(err.msg(),
+							'inproc_vjsx_executor_lane_not_found')
 					}
 					task.slot.ready = true
 					task.slot.mu.unlock()
@@ -2072,7 +2072,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c
 					task.slot.mu.@lock()
 					task.slot.result = InProcVjsxLaneSnapshotTaskResult{
 						ok:    false
-						error: inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_snapshot_failed')
+						error: inproc_vjsx_normalize_error_message(err.msg(),
+							'inproc_vjsx_executor_snapshot_failed')
 					}
 					task.slot.ready = true
 					task.slot.mu.unlock()
@@ -2094,7 +2095,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c
 					task.slot.mu.@lock()
 					task.slot.result = InProcVjsxLaneWarmupTaskResult{
 						ok:    false
-						error: inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_lane_not_found')
+						error: inproc_vjsx_normalize_error_message(err.msg(),
+							'inproc_vjsx_executor_lane_not_found')
 					}
 					task.slot.ready = true
 					task.slot.mu.unlock()
@@ -2118,7 +2120,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c
 					task.slot.mu.@lock()
 					task.slot.result = InProcVjsxLaneWarmupTaskResult{
 						ok:    false
-						error: inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_warmup_host_failed')
+						error: inproc_vjsx_normalize_error_message(err.msg(),
+							'inproc_vjsx_executor_warmup_host_failed')
 					}
 					task.slot.ready = true
 					task.slot.mu.unlock()
@@ -2129,7 +2132,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c
 					task.slot.mu.@lock()
 					task.slot.result = InProcVjsxLaneWarmupTaskResult{
 						ok:    false
-						error: inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_warmup_startup_failed')
+						error: inproc_vjsx_normalize_error_message(err.msg(),
+							'inproc_vjsx_executor_warmup_startup_failed')
 					}
 					task.slot.ready = true
 					task.slot.mu.unlock()
@@ -2180,7 +2184,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c
 					task.slot.mu.@lock()
 					task.slot.result = InProcVjsxLaneAffinityTaskResult{
 						ok:    false
-						error: inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_lane_not_found')
+						error: inproc_vjsx_normalize_error_message(err.msg(),
+							'inproc_vjsx_executor_lane_not_found')
 					}
 					task.slot.ready = true
 					task.slot.mu.unlock()
@@ -2195,7 +2200,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c
 						task.slot.mu.@lock()
 						task.slot.result = InProcVjsxLaneAffinityTaskResult{
 							ok:    false
-							error: inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_websocket_actor_failed')
+							error: inproc_vjsx_normalize_error_message(err.msg(),
+								'inproc_vjsx_executor_websocket_actor_failed')
 						}
 						task.slot.ready = true
 						task.slot.mu.unlock()
@@ -2208,7 +2214,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c
 						task.slot.mu.@lock()
 						task.slot.result = InProcVjsxLaneAffinityTaskResult{
 							ok:    false
-							error: inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_websocket_affinity_failed')
+							error: inproc_vjsx_normalize_error_message(err.msg(),
+								'inproc_vjsx_executor_websocket_affinity_failed')
 						}
 						task.slot.ready = true
 						task.slot.mu.unlock()
@@ -2465,8 +2472,9 @@ fn inproc_vjsx_host_snapshot_builder(state_ptr &VjsxExecutorState, idx int) vjsx
 					return ctx.js_undefined()
 				}
 				lane := executor.lane_snapshot_by_id(lane_id) or { return ctx.js_undefined() }
-				raw := executor.execute_snapshot_hook(mut app, executor.lane_index_by_id(lane.id),
-					lane) or { return ctx.js_undefined() }
+				raw := executor.execute_snapshot_hook(mut app, executor.lane_index_by_id(lane.id), lane) or {
+					return ctx.js_undefined()
+				}
 				if raw.trim_space() == '' || raw.trim_space() == 'undefined'
 					|| raw.trim_space() == 'null' {
 					return ctx.js_undefined()
@@ -2557,7 +2565,8 @@ fn inproc_vjsx_host_session_store_builder(mut state VjsxExecutorState, idx int)
 				}
 				'set' {
 					if req.ttl_ms > 0 {
-						state.session_store.set_with_ttl(full_key, req.value, req.ttl_ms * time.millisecond) or {
+						state.session_store.set_with_ttl(full_key, req.value,
+							req.ttl_ms * time.millisecond) or {
 							return ctx.js_string(json.encode(InProcVjsxHostSessionStoreResponse{
 								error: err.msg()
 							}))
@@ -2584,7 +2593,8 @@ fn inproc_vjsx_host_session_store_builder(mut state VjsxExecutorState, idx int)
 						}
 					} else {
 						swapped = state.session_store.compare_and_swap_set_with_ttl(full_key,
-							req.expected_found, req.expected_value, req.value, req.ttl_ms * time.millisecond) or {
+							req.expected_found, req.expected_value, req.value,
+							req.ttl_ms * time.millisecond) or {
 							return ctx.js_string(json.encode(InProcVjsxHostSessionStoreResponse{
 								error: err.msg()
 							}))
@@ -2618,7 +2628,8 @@ fn inproc_vjsx_host_session_store_builder(mut state VjsxExecutorState, idx int)
 				}
 				'keys' {
 					prefix := '${namespace}:'
-					keys := state.session_store.keys().filter(it.starts_with(prefix)).map(it[prefix.len..])
+					keys :=
+						state.session_store.keys().filter(it.starts_with(prefix)).map(it[prefix.len..])
 					ctx.js_string(json.encode(InProcVjsxHostSessionStoreResponse{
 						ok:    true
 						found: keys.len > 0
@@ -2751,6 +2762,7 @@ fn inproc_vjsx_host_http_fetch_builder(mut state VjsxExecutorState, idx int) vjs
 				'OPTIONS' { http.Method.options }
 				else { http.Method.get }
 			}
+
 			body := parsed.body
 			mut header := http.new_header()
 			for name, value in parsed.headers {
@@ -2984,6 +2996,12 @@ fn inproc_vjsx_module_aliases(kind string) []string {
 			['websocket_upstream', 'websocketUpstream', 'handleWebSocketUpstream',
 				'handle_websocket_upstream']
 		}
+		'plugin' {
+			['plugin', 'handlePlugin', 'handle_plugin']
+		}
+		'openai' {
+			['openai', 'openaiPlugin', 'handleOpenAI', 'handleOpenai', 'handle_openai']
+		}
 		'startup' {
 			['startup', 'lane_startup', 'laneStartup']
 		}
@@ -3010,6 +3028,8 @@ fn inproc_vjsx_global_handler_name(kind string) string {
 		'websocket_affinity' { '__vhttpd_websocket_affinity_handle' }
 		'websocket_actor' { '__vhttpd_websocket_actor_handle' }
 		'websocket_upstream' { '__vhttpd_websocket_upstream_handle' }
+		'plugin' { '__vhttpd_plugin_handle' }
+		'openai' { '__vhttpd_openai_handle' }
 		'startup' { '__vhttpd_startup_handle' }
 		'app_startup' { '__vhttpd_app_startup_handle' }
 		'snapshot' { '__vhttpd_snapshot_handle' }
@@ -3299,6 +3319,7 @@ fn inproc_vjsx_new_runtime_session_ptr(config VjsxRuntimeFacadeConfig) !&vjsx.Ru
 			return error('inproc_vjsx_executor_unsupported_runtime_profile:${config.runtime_profile}')
 		}
 	}
+
 	// Keep the RuntimeSession on the heap; lane hosts outlive ensure_lane_host().
 	mut session := session_value
 	return &session
@@ -3322,6 +3343,7 @@ fn inproc_vjsx_log_runtime_profile(lane_id string, idx int, runtime_profile stri
 		'node' { vjsx.RuntimeProfileKind.node }
 		else { vjsx.RuntimeProfileKind.unknown }
 	}
+
 	missing := if expected_kind == .unknown {
 		[]string{}
 	} else {
@@ -3368,10 +3390,10 @@ fn (e InProcVjsxExecutor) ensure_lane_host(idx int) ! {
 	session.set_diagnostic_handler(inproc_vjsx_log_runtime_diagnostic)
 	session.configure_event_loop(vjsx.RuntimeSessionEventLoopConfig{
 		session_id:     lane_id
-		wake_fn:        fn [state, lane_id] (req vjsx.RuntimeSessionWakeRequest) {
+		wake_fn:        fn [mut state, lane_id] (req vjsx.RuntimeSessionWakeRequest) {
 			state.schedule_lane_wakeup(lane_id, req.wake_at_ms, req.generation)
 		}
-		cancel_wake_fn: fn [state, lane_id] (req vjsx.RuntimeSessionWakeCancelRequest) {
+		cancel_wake_fn: fn [mut state, lane_id] (req vjsx.RuntimeSessionWakeCancelRequest) {
 			state.cancel_lane_wakeup(lane_id, req.generation)
 		}
 	})
@@ -3384,14 +3406,15 @@ fn (e InProcVjsxExecutor) ensure_lane_host(idx int) ! {
 	mut has_http_handler := false
 	mut has_websocket_handler := false
 	mut has_upstream_handler := false
+	mut has_plugin_handler := false
 	if as_module {
 		if vjsx.is_typescript_file(config.app_entry)
 			|| vjsx.is_runtime_module_file(config.app_entry) {
 			runtimejs.install_typescript_runtime(ctx)!
 		}
 		log.debug('[vhttpd] ensure_lane_host importing module lane=${lane_id} idx=${idx}')
-		module_entry_path := runtimejs.build_runtime_module_entry(ctx, config.app_entry,
-			true, temp_root) or {
+		module_entry_path := runtimejs.build_runtime_module_entry(ctx, config.app_entry, true,
+			temp_root) or {
 			session.close()
 			os.rmdir_all(temp_root) or {}
 			return error('inproc_vjsx_executor_bootstrap_failed:${err.msg()}')
@@ -3408,7 +3431,12 @@ fn (e InProcVjsxExecutor) ensure_lane_host(idx int) ! {
 		has_upstream_handler =
 			inproc_vjsx_module_has_callable(&module_binding_value, 'websocket_upstream')
 			|| inproc_vjsx_global_has_callable(ctx, 'websocket_upstream')
-		if !has_http_handler && !has_websocket_handler && !has_upstream_handler {
+		has_plugin_handler = inproc_vjsx_module_has_callable(&module_binding_value, 'plugin')
+			|| inproc_vjsx_module_has_callable(&module_binding_value, 'openai')
+			|| inproc_vjsx_global_has_callable(ctx, 'plugin')
+			|| inproc_vjsx_global_has_callable(ctx, 'openai')
+		if !has_http_handler && !has_websocket_handler && !has_upstream_handler
+			&& !has_plugin_handler {
 			mut cleanup_binding := module_binding_value
 			cleanup_binding.close()
 			session.close()
@@ -3445,8 +3473,7 @@ fn (e InProcVjsxExecutor) ensure_lane_host(idx int) ! {
 		module_binding_ptr = &module_binding
 	} else {
 		log.debug('[vhttpd] ensure_lane_host loading script entry lane=${lane_id} idx=${idx}')
-		mut entry_exports := load_inproc_vjsx_entry(mut ctx, config, idx, source_signature,
-			false) or {
+		mut entry_exports := load_inproc_vjsx_entry(mut ctx, config, idx, source_signature, false) or {
 			session.close()
 			os.rmdir_all(temp_root) or {}
 			return error('inproc_vjsx_executor_bootstrap_failed:${err.msg()}')
@@ -3475,13 +3502,20 @@ fn (e InProcVjsxExecutor) ensure_lane_host(idx int) ! {
 		http_handler := ctx.js_global('__vhttpd_handle')
 		websocket_handler := ctx.js_global('__vhttpd_websocket_handle')
 		upstream_handler := ctx.js_global('__vhttpd_websocket_upstream_handle')
+		plugin_handler := ctx.js_global('__vhttpd_plugin_handle')
+		openai_handler := ctx.js_global('__vhttpd_openai_handle')
 		has_http_handler = !http_handler.is_undefined() && http_handler.is_function()
 		has_websocket_handler = !websocket_handler.is_undefined() && websocket_handler.is_function()
 		has_upstream_handler = !upstream_handler.is_undefined() && upstream_handler.is_function()
+		has_plugin_handler = (!plugin_handler.is_undefined() && plugin_handler.is_function())
+			|| (!openai_handler.is_undefined() && openai_handler.is_function())
 		http_handler.free()
 		websocket_handler.free()
 		upstream_handler.free()
-		if !has_http_handler && !has_websocket_handler && !has_upstream_handler {
+		plugin_handler.free()
+		openai_handler.free()
+		if !has_http_handler && !has_websocket_handler && !has_upstream_handler
+			&& !has_plugin_handler {
 			session.close()
 			os.rmdir_all(temp_root) or {}
 			return error('inproc_vjsx_executor_missing_handler')
@@ -3504,7 +3538,7 @@ fn (e InProcVjsxExecutor) ensure_lane_host(idx int) ! {
 	}
 	state.lanes[idx].healthy = true
 	state.lanes[idx].dirty = false
-	log.debug('[vhttpd] ensure_lane_host ready lane=${lane_id} idx=${idx} http=${has_http_handler} websocket=${has_websocket_handler} upstream=${has_upstream_handler}')
+	log.debug('[vhttpd] ensure_lane_host ready lane=${lane_id} idx=${idx} http=${has_http_handler} websocket=${has_websocket_handler} upstream=${has_upstream_handler} plugin=${has_plugin_handler}')
 }
 
 fn (e InProcVjsxExecutor) activate_lane_request_context(idx int, mut app App, lane_id string, req HttpLogicDispatchRequest) {
@@ -3727,11 +3761,10 @@ fn (e InProcVjsxExecutor) aggregate_runtime_lane_snapshots(mut app App, current_
 	state.mu.unlock()
 	mut items := []string{}
 	for lane in lanes {
-		items << inproc_vjsx_aggregated_snapshot_item_json(lane.id, true, json.encode(app.admin_runtime_snapshot()),
-			'')
+		items << inproc_vjsx_aggregated_snapshot_item_json(lane.id, true,
+			json.encode(app.admin_runtime_snapshot()), '')
 	}
-	return inproc_vjsx_aggregated_snapshot_json('all_lanes', 'runtime', current_lane_id,
-		items)
+	return inproc_vjsx_aggregated_snapshot_json('all_lanes', 'runtime', current_lane_id, items)
 }
 
 fn (e InProcVjsxExecutor) aggregate_app_lane_snapshots(mut app App, current_lane_id string, include_current bool) string {
@@ -3761,8 +3794,7 @@ fn (e InProcVjsxExecutor) aggregate_app_lane_snapshots(mut app App, current_lane
 			items << inproc_vjsx_aggregated_snapshot_item_json(lane.id, false, '', '')
 			continue
 		}
-		items << inproc_vjsx_aggregated_snapshot_item_json(lane.id, true, lane_snapshot,
-			'')
+		items << inproc_vjsx_aggregated_snapshot_item_json(lane.id, true, lane_snapshot, '')
 	}
 	return inproc_vjsx_aggregated_snapshot_json(scope, 'app', current_lane_id, items)
 }
@@ -3961,7 +3993,8 @@ fn build_websocket_js_runtime(ctx &vjsx.Context, runtime_meta InProcVjsxRuntimeM
 	request.set('protocolVersion', runtime_meta.request_protocol_version)
 	request.set('remoteAddr', runtime_meta.request_remote_addr)
 	request.set('ip', runtime_meta.request_remote_addr)
-	request.set('server', websocket_js_value_from_json(ctx, json.encode(runtime_meta.request_server)))
+	request.set('server', websocket_js_value_from_json(ctx,
+		json.encode(runtime_meta.request_server)))
 	runtime.set('request', request)
 	runtime.set('method', runtime_meta.method)
 	runtime.set('path', runtime_meta.path)
@@ -4446,8 +4479,8 @@ fn (e InProcVjsxExecutor) websocket_callback_input(lane VjsxExecutionLane, frame
 }
 
 fn build_websocket_callback_payload(ctx &vjsx.Context, input InProcVjsxWebSocketCallbackInput, runtime_config_json string, mut app App) (vjsx.Value, vjsx.Value) {
-	mut js_runtime := build_websocket_js_runtime(ctx, input.runtime_meta, runtime_config_json, mut
-		app)
+	mut js_runtime :=
+		build_websocket_js_runtime(ctx, input.runtime_meta, runtime_config_json, mut app)
 	create_frame_fn := ctx.js_global('__vhttpd_create_websocket_frame')
 	defer {
 		create_frame_fn.free()
@@ -4525,12 +4558,14 @@ fn inproc_vjsx_invoke_websocket_callback(host VjsxLaneHost, ctx &vjsx.Context, j
 		invoke_arg.free()
 	}
 	mut result := host.call_handler(invoke_handler, invoke_arg) or {
-		err_msg := inproc_vjsx_context_error_message(ctx, err.msg(), 'inproc_vjsx_executor_websocket_handler_failed')
+		err_msg := inproc_vjsx_context_error_message(ctx, err.msg(),
+			'inproc_vjsx_executor_websocket_handler_failed')
 		return error(err_msg)
 	}
 	if result.is_exception() {
 		result.free()
-		err_msg := inproc_vjsx_context_error_message(ctx, 'exception', 'inproc_vjsx_executor_websocket_handler_failed')
+		err_msg := inproc_vjsx_context_error_message(ctx, 'exception',
+			'inproc_vjsx_executor_websocket_handler_failed')
 		return error(err_msg)
 	}
 	return result
@@ -4543,14 +4578,16 @@ fn inproc_vjsx_normalize_websocket_callback_result(host VjsxLaneHost, ctx &vjsx.
 	}
 	log.debug('[vhttpd] websocket_on_lane handler_ok lane=${lane.id} idx=${idx} event=${frame.event} promise=${result.instanceof('Promise')}')
 	resolved := host.resolve_value(result) or {
-		err_msg := inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_websocket_handler_failed')
+		err_msg := inproc_vjsx_normalize_error_message(err.msg(),
+			'inproc_vjsx_executor_websocket_handler_failed')
 		return error(err_msg)
 	}
 	defer {
 		resolved.free()
 	}
 	mut normalized := host.call_handler(normalize_fn, js_frame, resolved) or {
-		err_msg := inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_websocket_normalize_failed')
+		err_msg := inproc_vjsx_normalize_error_message(err.msg(),
+			'inproc_vjsx_executor_websocket_normalize_failed')
 		return error('inproc_vjsx_executor_websocket_normalize_failed:${err_msg}')
 	}
 	defer {
@@ -4630,8 +4667,8 @@ fn (e InProcVjsxExecutor) resolve_websocket_affinity_on_lane(mut app App, frame
 	state.mu.unlock()
 	ctx := host.context()
 	runtime_meta := e.websocket_runtime_meta(lane, frame)
-	mut js_runtime := build_websocket_js_runtime(ctx, runtime_meta, app.runtime_config_json, mut
-		app)
+	mut js_runtime :=
+		build_websocket_js_runtime(ctx, runtime_meta, app.runtime_config_json, mut app)
 	defer {
 		js_runtime.free()
 	}
@@ -4644,7 +4681,8 @@ fn (e InProcVjsxExecutor) resolve_websocket_affinity_on_lane(mut app App, frame
 			e.record_lane_success(lane.id)
 			return WebSocketAffinityDecision{}
 		}
-		err_msg := inproc_vjsx_context_error_message(ctx, err.msg(), 'inproc_vjsx_executor_websocket_affinity_failed')
+		err_msg := inproc_vjsx_context_error_message(ctx, err.msg(),
+			'inproc_vjsx_executor_websocket_affinity_failed')
 		e.record_lane_error(lane.id, err_msg)
 		return error(err_msg)
 	}
@@ -4652,12 +4690,14 @@ fn (e InProcVjsxExecutor) resolve_websocket_affinity_on_lane(mut app App, frame
 		result.free()
 	}
 	if result.is_exception() {
-		err_msg := inproc_vjsx_context_error_message(ctx, 'exception', 'inproc_vjsx_executor_websocket_affinity_failed')
+		err_msg := inproc_vjsx_context_error_message(ctx, 'exception',
+			'inproc_vjsx_executor_websocket_affinity_failed')
 		e.record_lane_error(lane.id, err_msg)
 		return error(err_msg)
 	}
 	resolved := host.resolve_value(result) or {
-		err_msg := inproc_vjsx_context_error_message(ctx, err.msg(), 'inproc_vjsx_executor_websocket_affinity_failed')
+		err_msg := inproc_vjsx_context_error_message(ctx, err.msg(),
+			'inproc_vjsx_executor_websocket_affinity_failed')
 		e.record_lane_error(lane.id, err_msg)
 		return error(err_msg)
 	}
@@ -4699,8 +4739,8 @@ fn (e InProcVjsxExecutor) resolve_websocket_actor_on_lane(mut app App, frame Wor
 	state.mu.unlock()
 	ctx := host.context()
 	runtime_meta := e.websocket_runtime_meta(lane, frame)
-	mut js_runtime := build_websocket_js_runtime(ctx, runtime_meta, app.runtime_config_json, mut
-		app)
+	mut js_runtime :=
+		build_websocket_js_runtime(ctx, runtime_meta, app.runtime_config_json, mut app)
 	defer {
 		js_runtime.free()
 	}
@@ -4713,7 +4753,8 @@ fn (e InProcVjsxExecutor) resolve_websocket_actor_on_lane(mut app App, frame Wor
 			e.record_lane_success(lane.id)
 			return WebSocketActorDecision{}
 		}
-		err_msg := inproc_vjsx_context_error_message(ctx, err.msg(), 'inproc_vjsx_executor_websocket_actor_failed')
+		err_msg := inproc_vjsx_context_error_message(ctx, err.msg(),
+			'inproc_vjsx_executor_websocket_actor_failed')
 		e.record_lane_error(lane.id, err_msg)
 		return error(err_msg)
 	}
@@ -4721,7 +4762,8 @@ fn (e InProcVjsxExecutor) resolve_websocket_actor_on_lane(mut app App, frame Wor
 		result.free()
 	}
 	if result.is_exception() {
-		err_msg := inproc_vjsx_context_error_message(ctx, 'exception', 'inproc_vjsx_executor_websocket_actor_failed')
+		err_msg := inproc_vjsx_context_error_message(ctx, 'exception',
+			'inproc_vjsx_executor_websocket_actor_failed')
 		e.record_lane_error(lane.id, err_msg)
 		return error(err_msg)
 	}
@@ -5088,6 +5130,190 @@ pub fn (e InProcVjsxExecutor) dispatch_http(mut app App, req HttpLogicDispatchRe
 	return error(last_err)
 }
 
+fn (e InProcVjsxExecutor) call_plugin_once(mut app App, req PluginCallRequest) !PluginCallResponse {
+	e.bootstrap_placeholder()!
+	lane := e.acquire_next_lane(inproc_vjsx_lane_wait_timeout_ms)!
+	defer {
+		e.release_lane(lane.id)
+	}
+	idx := e.lane_index_by_id(lane.id)
+	if idx < 0 {
+		e.record_lane_error(lane.id, 'inproc_vjsx_executor_lane_not_found')
+		return error('inproc_vjsx_executor_lane_not_found')
+	}
+	e.ensure_lane_host(idx) or {
+		e.record_lane_error(lane.id, err.msg())
+		return error(err.msg())
+	}
+	e.run_startup_hooks(mut app, idx, lane) or {
+		e.record_lane_error(lane.id, err.msg())
+		return error(err.msg())
+	}
+	e.activate_lane_request_context(idx, mut app, lane.id, HttpLogicDispatchRequest{
+		method:     req.op
+		path:       '/_plugin/${req.capability}'
+		trace_id:   req.trace_id
+		request_id: req.request_id
+	})
+	defer {
+		e.clear_lane_request_context(idx)
+	}
+	mut state := e.state
+	state.mu.@lock()
+	mut host := state.hosts[idx]
+	state.mu.unlock()
+	ctx := host.context()
+	req_obj := ctx.json_parse(json.encode(req))
+	defer {
+		req_obj.free()
+	}
+	entry_kind := if req.capability.trim_space() == '' {
+		'plugin'
+	} else {
+		req.capability.trim_space()
+	}
+	mut result := host.call_entry(entry_kind, req_obj) or {
+		if err.msg() == 'inproc_vjsx_executor_missing_${entry_kind}_handler' {
+			host.call_entry('plugin', req_obj) or {
+				e.record_lane_error(lane.id, err.msg())
+				return error('inproc_vjsx_executor_plugin_handler_failed:${err.msg()}')
+			}
+		} else {
+			e.record_lane_error(lane.id, err.msg())
+			return error('inproc_vjsx_executor_plugin_handler_failed:${err.msg()}')
+		}
+	}
+	defer {
+		result.free()
+	}
+	resolved := host.resolve_value(result) or {
+		e.record_lane_error(lane.id, err.msg())
+		return error('inproc_vjsx_executor_plugin_handler_failed:${err.msg()}')
+	}
+	defer {
+		resolved.free()
+	}
+	raw := resolved.json_stringify()
+	e.record_lane_success(lane.id)
+	return PluginCallResponse{
+		ok:     true
+		result: raw
+	}
+}
+
+pub fn (e InProcVjsxExecutor) call_plugin(mut app App, req PluginCallRequest) !PluginCallResponse {
+	e.remember_app(mut app)
+	mut last_err := 'inproc_vjsx_executor_plugin_call_failed'
+	for attempt in 0 .. inproc_vjsx_dispatch_retry_attempts {
+		resp := e.call_plugin_once(mut app, req) or {
+			last_err = err.msg()
+			if attempt + 1 < inproc_vjsx_dispatch_retry_attempts
+				&& inproc_vjsx_should_retry_dispatch(last_err) {
+				continue
+			}
+			return error(last_err)
+		}
+		return resp
+	}
+	return error(last_err)
+}
+
+fn (e InProcVjsxExecutor) call_plugin_stream_once(mut app App, req PluginCallRequest, on_frame PluginStreamFrameFn) !PluginStreamCallResponse {
+	e.bootstrap_placeholder()!
+	lane := e.acquire_next_lane(inproc_vjsx_lane_wait_timeout_ms)!
+	defer {
+		e.release_lane(lane.id)
+	}
+	idx := e.lane_index_by_id(lane.id)
+	if idx < 0 {
+		e.record_lane_error(lane.id, 'inproc_vjsx_executor_lane_not_found')
+		return error('inproc_vjsx_executor_lane_not_found')
+	}
+	e.ensure_lane_host(idx) or {
+		e.record_lane_error(lane.id, err.msg())
+		return error(err.msg())
+	}
+	e.run_startup_hooks(mut app, idx, lane) or {
+		e.record_lane_error(lane.id, err.msg())
+		return error(err.msg())
+	}
+	e.activate_lane_request_context(idx, mut app, lane.id, HttpLogicDispatchRequest{
+		method:     req.op
+		path:       '/_plugin/${req.capability}'
+		trace_id:   req.trace_id
+		request_id: req.request_id
+	})
+	defer {
+		e.clear_lane_request_context(idx)
+	}
+	mut state := e.state
+	state.mu.@lock()
+	mut host := state.hosts[idx]
+	state.mu.unlock()
+	ctx := host.context()
+	req_obj := ctx.json_parse(json.encode(req))
+	defer {
+		req_obj.free()
+	}
+	entry_kind := if req.capability.trim_space() == '' {
+		'plugin'
+	} else {
+		req.capability.trim_space()
+	}
+	mut result := host.call_entry(entry_kind, req_obj) or {
+		if err.msg() == 'inproc_vjsx_executor_missing_${entry_kind}_handler' {
+			host.call_entry('plugin', req_obj) or {
+				e.record_lane_error(lane.id, err.msg())
+				return error('inproc_vjsx_executor_plugin_handler_failed:${err.msg()}')
+			}
+		} else {
+			e.record_lane_error(lane.id, err.msg())
+			return error('inproc_vjsx_executor_plugin_handler_failed:${err.msg()}')
+		}
+	}
+	defer {
+		result.free()
+	}
+	if host.session.is_streamable_value(result) {
+		completed := host.session.stream_value(result, fn [on_frame] (frame vjsx.Value) !bool {
+			raw := frame.json_stringify()
+			return on_frame(raw)!
+		}) or {
+			e.record_lane_error(lane.id, err.msg())
+			return error('inproc_vjsx_executor_plugin_stream_failed:${err.msg()}')
+		}
+		e.record_lane_success(lane.id)
+		return PluginStreamCallResponse{
+			streamed: true
+			response: PluginCallResponse{
+				ok:     true
+				result: '{"streamed":true,"completed":${completed}}'
+			}
+		}
+	}
+	resolved := host.resolve_value(result) or {
+		e.record_lane_error(lane.id, err.msg())
+		return error('inproc_vjsx_executor_plugin_handler_failed:${err.msg()}')
+	}
+	defer {
+		resolved.free()
+	}
+	raw := resolved.json_stringify()
+	e.record_lane_success(lane.id)
+	return PluginStreamCallResponse{
+		streamed: false
+		response: PluginCallResponse{
+			ok:     true
+			result: raw
+		}
+	}
+}
+
+pub fn (e InProcVjsxExecutor) call_plugin_stream(mut app App, req PluginCallRequest, on_frame PluginStreamFrameFn) !PluginStreamCallResponse {
+	e.remember_app(mut app)
+	return e.call_plugin_stream_once(mut app, req, on_frame)
+}
+
 pub fn (e InProcVjsxExecutor) open_websocket_session(mut app App, req WebSocketSessionOpenRequest) !WebSocketSessionOpenOutcome {
 	e.remember_app(mut app)
 	_ = app
@@ -5349,8 +5575,8 @@ pub fn (e InProcVjsxExecutor) dispatch_websocket_event(mut app App, frame Worker
 			result := inproc_vjsx_await_websocket_task_result(done_ch, mut slot)!
 			if frame.event == 'open' {
 			}
-			return e.finalize_websocket_dispatch_response(frame, '', '', actor.key, actor.class_name,
-				actor.persist, result)
+			return e.finalize_websocket_dispatch_response(frame, '', '', actor.key,
+				actor.class_name, actor.persist, result)
 		}
 	}
 	affinity_key, affinity_priority, should_queue := e.resolve_websocket_dispatch_affinity(frame) or {
@@ -5394,8 +5620,8 @@ pub fn (e InProcVjsxExecutor) dispatch_websocket_event(mut app App, frame Worker
 			state.websocket_affinity_lane_by_key[affinity_key] or { '' }
 		}
 		state.mu.unlock()
-		return e.finalize_websocket_dispatch_response(frame, affinity_key, lane_id, '',
-			'', false, result)
+		return e.finalize_websocket_dispatch_response(frame, affinity_key, lane_id, '', '', false,
+			result)
 	}
 	lane, direct_affinity_key := e.acquire_websocket_lane(frame) or {
 		if err.msg() == 'inproc_vjsx_executor_websocket_affinity_key_missing' {
@@ -5428,6 +5654,6 @@ pub fn (e InProcVjsxExecutor) dispatch_websocket_event(mut app App, frame Worker
 	result := inproc_vjsx_await_websocket_task_result(done_ch, mut slot)!
 	if frame.event == 'open' {
 	}
-	return e.finalize_websocket_dispatch_response(frame, direct_affinity_key, lane.id,
-		'', '', false, result)
+	return e.finalize_websocket_dispatch_response(frame, direct_affinity_key, lane.id, '', '',
+		false, result)
 }
diff --git a/src/main.v b/src/main.v
index c43b349..2d709c9 100644
--- a/src/main.v
+++ b/src/main.v
@@ -37,6 +37,8 @@ pub mut:
 	admin_on_data_plane                         bool
 	admin_token                                 string
 	runtime_config_json                         string
+	plugin_configs                              map[string]PluginConfig
+	plugin_vjsx                                 map[string]InProcVjsxExecutor
 	assets_enabled                              bool
 	assets_prefix                               string
 	assets_root                                 string
@@ -52,6 +54,14 @@ pub mut:
 	feishu_reconnect_delay_ms                   int
 	feishu_token_refresh_skew_seconds           int
 	feishu_recent_event_limit                   int
+	openai_enabled                              bool
+	openai_base_path                            string
+	openai_default_backend                      string
+	openai_plugin                               string
+	openai_endpoints                            OpenAIEndpointsConfig
+	openai_backends                             map[string]OpenAIBackendConfig
+	openai_routes                               map[string]OpenAIRouteConfig
+	openai_responses                            MemoryStateStore[OpenAIResponseRecord]
 	websocket_upstream_recent_dispatch_limit    int
 	auto_start_dynamic_upstreams                bool
 	feishu_static_apps                          map[string]FeishuAppConfig
@@ -94,30 +104,30 @@ pub mut:
 	websocket_upstream_recent_activities        []WebSocketUpstreamActivitySnapshot
 	provider_instance_specs                     map[string]ProviderInstanceSpec = map[string]ProviderInstanceSpec{}
 	// codex upstream
-	codex_mu                     sync.Mutex
-	codex_runtime                CodexProviderRuntime
-	codex_instances              map[string]CodexProviderRuntime = map[string]CodexProviderRuntime{}
-	ollama_enabled               bool
-	db_runtime                   DbProviderRuntime
-	feishu_buffers               map[string]FeishuStreamBuffer
-	feishu_http_lane             shared FeishuHttpLane
-	feishu_control_http_lane     shared FeishuControlHttpLane
-	feishu_http_test_stub        bool
-	feishu_http_test_delay_ms    int
-	feishu_http_test_inflight    int
-	feishu_http_test_calls       int
-	feishu_http_test_message_seq int
-	feishu_card_bridge_mu        sync.Mutex
-	feishu_card_bridge_send_mu   sync.Mutex
-	feishu_card_bridge_clients   map[string]&websocket.Client = map[string]&websocket.Client{}
-	feishu_card_bridge_pending   map[string]chan FeishuCardBridgeResult = map[string]chan FeishuCardBridgeResult{}
+	codex_mu                         sync.Mutex
+	codex_runtime                    CodexProviderRuntime
+	codex_instances                  map[string]CodexProviderRuntime = map[string]CodexProviderRuntime{}
+	ollama_enabled                   bool
+	db_runtime                       DbProviderRuntime
+	feishu_buffers                   map[string]FeishuStreamBuffer
+	feishu_http_lane                 shared FeishuHttpLane
+	feishu_control_http_lane         shared FeishuControlHttpLane
+	feishu_http_test_stub            bool
+	feishu_http_test_delay_ms        int
+	feishu_http_test_inflight        int
+	feishu_http_test_calls           int
+	feishu_http_test_message_seq     int
+	feishu_card_bridge_mu            sync.Mutex
+	feishu_card_bridge_send_mu       sync.Mutex
+	feishu_card_bridge_clients       map[string]&websocket.Client            = map[string]&websocket.Client{}
+	feishu_card_bridge_pending       map[string]chan FeishuCardBridgeResult  = map[string]chan FeishuCardBridgeResult{}
 	feishu_card_bridge_proxy_pending map[string]chan FeishuBridgeProxyResult = map[string]chan FeishuBridgeProxyResult{}
-	feishu_card_bridge_client_conn   &websocket.Client = unsafe { nil }
+	feishu_card_bridge_client_conn   &websocket.Client                       = unsafe { nil }
 	feishu_card_bridge_enabled_flag  bool
-	feishu_card_bridge_ws_url    string
-	feishu_card_bridge_client_id string
-	feishu_card_bridge_token     string
-	feishu_card_bridge_target_id string
+	feishu_card_bridge_ws_url        string
+	feishu_card_bridge_client_id     string
+	feishu_card_bridge_token         string
+	feishu_card_bridge_target_id     string
 }
 
 struct CodexTarget {
@@ -283,15 +293,15 @@ struct WorkerWebSocketFrame {
 }
 
 struct WorkerWebSocketDispatchResponse {
-	mode        string
-	event       string
-	id          string
-	accepted    bool
-	closed      bool
-	commands    []WorkerWebSocketFrame
+	mode         string
+	event        string
+	id           string
+	accepted     bool
+	closed       bool
+	commands     []WorkerWebSocketFrame
 	affinity_key string @[json: 'affinity_key']
-	error       string
-	error_class string @[json: 'error_class']
+	error        string
+	error_class  string @[json: 'error_class']
 }
 
 struct WorkerWebSocketDispatchCommandFailure {
@@ -847,8 +857,7 @@ fn proxy_worker_websocket_dispatch(mut app App, mut ctx Context, method string,
 		app.ws_hub_meta_snapshot(req_id), map[string][]string{}, map[string]map[string]string{},
 		map[string]int{}, map[string][]string{})
 	resp := app.kernel_dispatch_websocket_event(open_frame) or {
-		err_msg := inproc_vjsx_normalize_error_message(err.msg(),
-			'inproc_vjsx_executor_websocket_open_failed')
+		err_msg := inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_websocket_open_failed')
 		log.error('[vhttpd] kernel_dispatch_websocket_event failed trace_id=${trace_id} path=${normalized_path} error=${err_msg}')
 		ctx.set_custom_header('x-vhttpd-trace-id', trace_id) or {}
 		ctx.set_custom_header('x-vhttpd-error-class', 'transport_error') or {}
@@ -882,8 +891,7 @@ fn proxy_worker_websocket_dispatch(mut app App, mut ctx Context, method string,
 	ctx.conn.set_read_timeout(time.infinite)
 	mut conn := ctx.conn
 	spawn handle_worker_websocket_dispatch_session(mut app, mut conn, key, method.to_upper(),
-		normalized_path, query, headers, remote_addr, req_id, trace_id, start_ms,
-		resp.commands.clone())
+		normalized_path, query, headers, remote_addr, req_id, trace_id, start_ms, resp.commands.clone())
 	return veb.no_result()
 }
 
@@ -961,18 +969,18 @@ fn handle_worker_websocket_dispatch_session(mut app App, mut client_conn net.Tcp
 	mut ws_server := websocket.new_server(.ip, 0, '')
 	mut lifecycle := &WebSocketDispatchConnState{}
 	mut state := &WebSocketDispatchBridgeState{
-		app:         &app
-		lifecycle:   lifecycle
+		app:           &app
+		lifecycle:     lifecycle
 		open_commands: open_commands.clone()
-		conn_id:     req_id
-		method:      method
-		path:        path
-		query:       query.clone()
-		headers:     headers.clone()
-		remote_addr: remote_addr
-		request_id:  req_id
-		trace_id:    trace_id
-		start_ms:    start_ms
+		conn_id:       req_id
+		method:        method
+		path:          path
+		query:         query.clone()
+		headers:       headers.clone()
+		remote_addr:   remote_addr
+		request_id:    req_id
+		trace_id:      trace_id
+		start_ms:      start_ms
 	}
 	ws_server.on_message_ref(worker_websocket_dispatch_message_cb, state)
 	ws_server.on_close_ref(worker_websocket_dispatch_close_cb, state)
@@ -980,9 +988,7 @@ fn handle_worker_websocket_dispatch_session(mut app App, mut client_conn net.Tcp
 	defer {
 		worker_websocket_dispatch_finalize(state)
 	}
-	ws_server.handle_handshake(mut client_conn, key) or {
-		return
-	}
+	ws_server.handle_handshake(mut client_conn, key) or { return }
 }
 
 fn worker_websocket_dispatch_attached_cb(mut sc websocket.ServerClient, ref voidptr) ! {
@@ -992,8 +998,8 @@ fn worker_websocket_dispatch_attached_cb(mut sc websocket.ServerClient, ref void
 	unsafe {
 		mut state := &WebSocketDispatchBridgeState(ref)
 		state.app.ws_hub_register_conn(state.conn_id, '', state.method, state.request_id,
-			state.trace_id, state.path, state.query, state.headers, state.remote_addr, sc.client,
-			state.lifecycle)
+			state.trace_id, state.path, state.query, state.headers, state.remote_addr,
+			sc.client, state.lifecycle)
 		worker_websocket_dispatch_process_open(state)
 		worker_websocket_dispatch_activate(state)
 	}
@@ -1109,7 +1115,10 @@ fn worker_websocket_dispatch_message_cb(mut ws websocket.Client, msg &websocket.
 		return
 	}
 	if result.failures.len > 0 {
-		if close_frame := state.app.websocket_dispatch_followup_failures(state.conn_id, state.method, state.path, state.query, state.headers, state.remote_addr, state.request_id, state.trace_id, result.failures) {
+		if close_frame := state.app.websocket_dispatch_followup_failures(state.conn_id,
+			state.method, state.path, state.query, state.headers, state.remote_addr, state.request_id,
+			state.trace_id, result.failures)
+		{
 			code := if close_frame.code > 0 { close_frame.code } else { 1000 }
 			worker_websocket_dispatch_begin_local_close(mut state)
 			ws.close(code, close_frame.reason)!
@@ -1934,8 +1943,14 @@ pub fn (mut app App) events_stream(mut ctx Context) veb.Result {
 
 @['/:path...'; get]
 pub fn (mut app App) proxy_get(mut ctx Context, path string) veb.Result {
+	start_ms := time.now().unix_milli()
 	log.info('[http] route proxy_get path=${path} url=${ctx.req.url}')
 	target := if ctx.req.url == '' { path } else { ctx.req.url }
+	req_id := resolve_request_id(ctx, target)
+	trace_id := resolve_trace_id(ctx, target)
+	if result := app.openai_try_handle(mut ctx, 'GET', target, req_id, trace_id, start_ms) {
+		return result
+	}
 	request_path, _ := normalize_request_target(target)
 	normalized_target := normalize_path(request_path)
 	if normalized_target == '/mcp' {
@@ -1950,8 +1965,14 @@ pub fn (mut app App) proxy_get(mut ctx Context, path string) veb.Result {
 
 @['/:path...'; post]
 pub fn (mut app App) proxy_post(mut ctx Context, path string) veb.Result {
+	start_ms := time.now().unix_milli()
 	log.info('[http] route proxy_post path=${path} url=${ctx.req.url} body_len=${ctx.req.data.len}')
 	target := if ctx.req.url == '' { path } else { ctx.req.url }
+	req_id := resolve_request_id(ctx, target)
+	trace_id := resolve_trace_id(ctx, target)
+	if result := app.openai_try_handle(mut ctx, 'POST', target, req_id, trace_id, start_ms) {
+		return result
+	}
 	request_path, _ := normalize_request_target(target)
 	normalized_target := normalize_path(request_path)
 	if normalized_target == '/mcp' {
@@ -1966,27 +1987,45 @@ pub fn (mut app App) proxy_post(mut ctx Context, path string) veb.Result {
 
 @['/:path...'; put]
 pub fn (mut app App) proxy_put(mut ctx Context, path string) veb.Result {
+	start_ms := time.now().unix_milli()
+	target := if ctx.req.url == '' { path } else { ctx.req.url }
+	req_id := resolve_request_id(ctx, target)
+	trace_id := resolve_trace_id(ctx, target)
+	if result := app.openai_try_handle(mut ctx, 'PUT', target, req_id, trace_id, start_ms) {
+		return result
+	}
 	if !app.has_http_logic_executor() {
 		ctx.res.set_status(.not_found)
 		return ctx.text('Not Found')
 	}
-	target := if ctx.req.url == '' { path } else { ctx.req.url }
 	return proxy_worker_response(mut app, mut ctx, 'PUT', target, '')
 }
 
 @['/:path...'; patch]
 pub fn (mut app App) proxy_patch(mut ctx Context, path string) veb.Result {
+	start_ms := time.now().unix_milli()
+	target := if ctx.req.url == '' { path } else { ctx.req.url }
+	req_id := resolve_request_id(ctx, target)
+	trace_id := resolve_trace_id(ctx, target)
+	if result := app.openai_try_handle(mut ctx, 'PATCH', target, req_id, trace_id, start_ms) {
+		return result
+	}
 	if !app.has_http_logic_executor() {
 		ctx.res.set_status(.not_found)
 		return ctx.text('Not Found')
 	}
-	target := if ctx.req.url == '' { path } else { ctx.req.url }
 	return proxy_worker_response(mut app, mut ctx, 'PATCH', target, '')
 }
 
 @['/:path...'; delete]
 pub fn (mut app App) proxy_delete(mut ctx Context, path string) veb.Result {
+	start_ms := time.now().unix_milli()
 	target := if ctx.req.url == '' { path } else { ctx.req.url }
+	req_id := resolve_request_id(ctx, target)
+	trace_id := resolve_trace_id(ctx, target)
+	if result := app.openai_try_handle(mut ctx, 'DELETE', target, req_id, trace_id, start_ms) {
+		return result
+	}
 	if normalize_path(target) == '/mcp' {
 		return app.mcp_delete(mut ctx)
 	}
@@ -1999,10 +2038,16 @@ pub fn (mut app App) proxy_delete(mut ctx Context, path string) veb.Result {
 
 @['/:path...'; head]
 pub fn (mut app App) proxy_head(mut ctx Context, path string) veb.Result {
+	start_ms := time.now().unix_milli()
+	target := if ctx.req.url == '' { path } else { ctx.req.url }
+	req_id := resolve_request_id(ctx, target)
+	trace_id := resolve_trace_id(ctx, target)
+	if result := app.openai_try_handle(mut ctx, 'HEAD', target, req_id, trace_id, start_ms) {
+		return result
+	}
 	if !app.has_http_logic_executor() {
 		ctx.res.set_status(.not_found)
 		return ctx.text('')
 	}
-	target := if ctx.req.url == '' { path } else { ctx.req.url }
 	return proxy_worker_response(mut app, mut ctx, 'HEAD', target, '')
 }
diff --git a/src/openai_runtime.v b/src/openai_runtime.v
new file mode 100644
index 0000000..e1a4332
--- /dev/null
+++ b/src/openai_runtime.v
@@ -0,0 +1,2709 @@
+module main
+
+import json
+import net
+import net.http
+import os
+import time
+import veb
+import x.json2
+
+const openai_response_registry_ttl = 24 * time.hour
+const openai_stream_done_fetch_error = 'openai_stream_done'
+
+struct OpenAIModelObject {
+	id       string
+	object   string = 'model'
+	created  int
+	owned_by string = 'vhttpd'
+}
+
+struct OpenAIModelsResponse {
+	object string = 'list'
+	data   []OpenAIModelObject
+}
+
+struct OpenAIErrorBody {
+	message string
+	typ     string @[json: 'type']
+	code    string
+}
+
+struct OpenAIErrorResponse {
+	error OpenAIErrorBody
+}
+
+struct OpenAIResolvedRoute {
+	route_name     string
+	model          string
+	backend_name   string
+	upstream_model string
+	backend        OpenAIBackendConfig
+}
+
+struct OpenAIUpstreamPlan {
+	backend         string
+	method          string
+	path            string
+	body            string
+	upstream_model  string @[json: 'upstream_model']
+	stream_mode     string @[json: 'stream_mode']
+	response_codec  string @[json: 'response_codec']
+	output_protocol string @[json: 'output_protocol']
+	mapper          string
+	headers         map[string]string
+}
+
+struct OpenAIResolvedPlan {
+	backend_name    string
+	backend         OpenAIBackendConfig
+	method          string
+	path            string
+	body            string
+	model           string
+	stream_mode     string
+	response_codec  string
+	output_protocol string
+	mapper          string
+	headers         map[string]string
+}
+
+struct OpenAIPluginPlanResult {
+	handled bool
+	plan    OpenAIResolvedPlan
+}
+
+struct OpenAIPluginModelsResult {
+	handled bool
+	models  []string
+}
+
+struct OpenAIResponseRecord {
+	id              string
+	backend_name    string
+	backend_kind    string
+	executor        string
+	model           string
+	status          string
+	created_at_unix i64
+	updated_at_unix i64
+	request_id      string
+	trace_id        string
+	body            string
+}
+
+@[heap]
+struct OpenAIResponsesStreamRegistryState {
+mut:
+	completed_body string
+}
+
+struct OpenAIPluginChatPayload {
+	method     string
+	path       string
+	model      string
+	stream     bool
+	body       string
+	base_path  string @[json: 'base_path']
+	request_id string @[json: 'request_id']
+	trace_id   string @[json: 'trace_id']
+}
+
+struct OpenAIPluginResponsesPayload {
+	method     string
+	path       string
+	model      string
+	stream     bool
+	body       string
+	base_path  string @[json: 'base_path']
+	request_id string @[json: 'request_id']
+	trace_id   string @[json: 'trace_id']
+}
+
+struct OpenAIPluginModelsPayload {
+	method     string
+	path       string
+	base_path  string @[json: 'base_path']
+	request_id string @[json: 'request_id']
+	trace_id   string @[json: 'trace_id']
+}
+
+struct OpenAIPluginFallbackPayload {
+	method         string
+	path           string
+	model          string
+	stream         bool
+	body           string
+	base_path      string @[json: 'base_path']
+	failed_backend string @[json: 'failed_backend']
+	status_code    int    @[json: 'status_code']
+	error_code     string @[json: 'error_code']
+	error_message  string @[json: 'error_message']
+	request_id     string @[json: 'request_id']
+	trace_id       string @[json: 'trace_id']
+}
+
+struct OpenAIExecutorPayload {
+	method          string
+	path            string
+	model           string
+	stream          bool
+	body            string
+	backend         string
+	request_id      string @[json: 'request_id']
+	trace_id        string @[json: 'trace_id']
+	response_codec  string @[json: 'response_codec']
+	output_protocol string @[json: 'output_protocol']
+}
+
+struct OpenAIPluginMapFramePayload {
+	model           string
+	frame           string
+	response_codec  string @[json: 'response_codec']
+	output_protocol string @[json: 'output_protocol']
+	request_id      string @[json: 'request_id']
+	trace_id        string @[json: 'trace_id']
+}
+
+struct OpenAIChatStreamDelta {
+	content string
+}
+
+struct OpenAIChatStreamChoice {
+	index int
+	delta OpenAIChatStreamDelta
+}
+
+struct OpenAIChatStreamChunk {
+	id      string
+	object  string = 'chat.completion.chunk'
+	created int
+	model   string
+	choices []OpenAIChatStreamChoice
+}
+
+struct OpenAIChatMessage {
+	role    string
+	content string
+}
+
+struct OpenAIChatCompletionChoice {
+	index         int
+	message       OpenAIChatMessage
+	finish_reason string @[json: 'finish_reason']
+}
+
+struct OpenAIChatCompletionResponse {
+	id      string
+	object  string = 'chat.completion'
+	created int
+	model   string
+	choices []OpenAIChatCompletionChoice
+}
+
+struct OpenAIFrameMapping {
+	content       string
+	tool_calls    []json2.Any
+	usage         map[string]int
+	done          bool
+	handled       bool
+	error         string
+	finish_reason string
+}
+
+struct OpenAIChunkDecodeState {
+mut:
+	mode            string = 'unknown'
+	buffer          string
+	remaining       int
+	need_chunk_crlf bool
+	done            bool
+}
+
+fn openai_hex_chunk_size(raw string) ?int {
+	hex_part := raw.all_before(';').trim_space()
+	if hex_part == '' {
+		return none
+	}
+	mut size := 0
+	for ch in hex_part {
+		mut value := -1
+		if ch >= `0` && ch <= `9` {
+			value = int(ch - `0`)
+		} else if ch >= `a` && ch <= `f` {
+			value = 10 + int(ch - `a`)
+		} else if ch >= `A` && ch <= `F` {
+			value = 10 + int(ch - `A`)
+		} else {
+			return none
+		}
+		size = (size * 16) + value
+	}
+	return size
+}
+
+fn openai_decode_progress_chunk(mut decoder OpenAIChunkDecodeState, chunk []u8) string {
+	if chunk.len == 0 || decoder.done {
+		return ''
+	}
+	incoming := chunk.bytestr()
+	if decoder.mode == 'plain' {
+		return incoming
+	}
+	decoder.buffer += incoming
+	if decoder.mode == 'unknown' {
+		if decoder.buffer.contains('\r\n') {
+			first_line := decoder.buffer.all_before('\r\n')
+			_ := openai_hex_chunk_size(first_line) or {
+				decoder.mode = 'plain'
+				out := decoder.buffer
+				decoder.buffer = ''
+				return out
+			}
+			decoder.mode = 'chunked'
+		} else if decoder.buffer.contains('\n') || decoder.buffer.len > 64 {
+			decoder.mode = 'plain'
+			out := decoder.buffer
+			decoder.buffer = ''
+			return out
+		} else {
+			return ''
+		}
+	}
+	mut out := ''
+	for decoder.mode == 'chunked' && decoder.buffer.len > 0 && !decoder.done {
+		if decoder.need_chunk_crlf {
+			if decoder.buffer.len < 2 {
+				break
+			}
+			if decoder.buffer.starts_with('\r\n') {
+				decoder.buffer = decoder.buffer[2..]
+			} else if decoder.buffer.starts_with('\n') {
+				decoder.buffer = decoder.buffer[1..]
+			}
+			decoder.need_chunk_crlf = false
+		}
+		if decoder.remaining == 0 {
+			if !decoder.buffer.contains('\r\n') {
+				break
+			}
+			line := decoder.buffer.all_before('\r\n')
+			decoder.buffer = decoder.buffer.all_after('\r\n')
+			size := openai_hex_chunk_size(line) or {
+				decoder.mode = 'plain'
+				out += decoder.buffer
+				decoder.buffer = ''
+				break
+			}
+			if size == 0 {
+				decoder.done = true
+				decoder.buffer = ''
+				break
+			}
+			decoder.remaining = size
+		}
+		if decoder.remaining > 0 {
+			take := if decoder.buffer.len < decoder.remaining {
+				decoder.buffer.len
+			} else {
+				decoder.remaining
+			}
+			out += decoder.buffer[..take]
+			decoder.buffer = decoder.buffer[take..]
+			decoder.remaining -= take
+			if decoder.remaining == 0 {
+				decoder.need_chunk_crlf = true
+			}
+		}
+	}
+	return out
+}
+
+@[heap]
+struct OpenAIStreamProxyState {
+mut:
+	conn             net.TcpConn
+	method           string
+	status_code      int
+	content_type     string
+	response_headers map[string]string
+	headers_written  bool
+	error_body       string
+	chunk_decoder    OpenAIChunkDecodeState
+	done             bool
+	done_probe       string
+	final_written    bool
+}
+
+@[heap]
+struct OpenAIMappedStreamProxyState {
+mut:
+	app              &App = unsafe { nil }
+	conn             net.TcpConn
+	method           string
+	status_code      int
+	response_headers map[string]string
+	headers_written  bool
+	line_buffer      string
+	model            string
+	request_id       string
+	trace_id         string
+	mapper           string
+	response_codec   string
+	output_protocol  string
+	created          int
+	done             bool
+	mapper_error     string
+	error_body       string
+	usage            map[string]int
+	chunk_decoder    OpenAIChunkDecodeState
+	final_written    bool
+}
+
+fn normalize_openai_base_path(raw string) string {
+	mut base := normalize_path(raw.trim_space())
+	for base.len > 1 && base.ends_with('/') {
+		base = base[..base.len - 1]
+	}
+	return base
+}
+
+fn openai_relative_path(target string, base_path string) ?string {
+	request_path, _ := normalize_request_target(target)
+	path := normalize_path(request_path)
+	base := normalize_openai_base_path(base_path)
+	if path == base {
+		return ''
+	}
+	prefix := '${base}/'
+	if !path.starts_with(prefix) {
+		return none
+	}
+	return '/' + path[prefix.len..]
+}
+
+fn openai_relative_target(target string, base_path string) ?string {
+	request_path, query := normalize_request_target(target)
+	path := normalize_path(request_path)
+	base := normalize_openai_base_path(base_path)
+	mut relative := ''
+	if path == base {
+		relative = ''
+	} else {
+		prefix := '${base}/'
+		if !path.starts_with(prefix) {
+			return none
+		}
+		relative = '/' + path[prefix.len..]
+	}
+	if query == '' {
+		return relative
+	}
+	return '${relative}?${query}'
+}
+
+fn openai_response_content_type(header http.Header, fallback string) string {
+	return header.get(.content_type) or { fallback }
+}
+
+fn openai_is_stream_request(body string) bool {
+	parsed := json2.decode[json2.Any](body) or { return false }
+	root := parsed.as_map()
+	stream_any := root['stream'] or { return false }
+	return stream_any.bool()
+}
+
+fn openai_is_stream_target(target string) bool {
+	_, query := normalize_request_target(target)
+	if query == '' {
+		return false
+	}
+	params := parse_query_map(query)
+	stream := params['stream'] or { return false }
+	return stream.to_lower() in ['1', 'true', 'yes']
+}
+
+fn openai_request_model(body string) string {
+	parsed := json2.decode[json2.Any](body) or { return '' }
+	root := parsed.as_map()
+	return (root['model'] or { json2.Any('') }).str()
+}
+
+fn openai_response_id_from_body(body string) string {
+	parsed := json2.decode[json2.Any](body) or { return '' }
+	root := parsed.as_map()
+	if (root['object'] or { json2.Any('') }).str() != 'response' {
+		return ''
+	}
+	return (root['id'] or { json2.Any('') }).str().trim_space()
+}
+
+fn openai_response_status_from_body(body string) string {
+	parsed := json2.decode[json2.Any](body) or { return '' }
+	root := parsed.as_map()
+	return (root['status'] or { json2.Any('') }).str()
+}
+
+fn openai_response_id_from_relative(relative string) string {
+	path := normalize_path(relative.all_before('?'))
+	prefix := '/responses/'
+	if !path.starts_with(prefix) {
+		return ''
+	}
+	rest := path[prefix.len..]
+	if rest.trim_space() == '' {
+		return ''
+	}
+	return rest.split('/')[0].trim_space()
+}
+
+fn openai_response_registry_record(plan OpenAIResolvedPlan, response_id string, body string, req_id string, trace_id string) OpenAIResponseRecord {
+	now := time.now().unix()
+	status := openai_response_status_from_body(body)
+	return OpenAIResponseRecord{
+		id:              response_id
+		backend_name:    plan.backend_name
+		backend_kind:    plan.backend.kind
+		executor:        plan.backend.executor
+		model:           plan.model
+		status:          if status == '' { 'completed' } else { status }
+		created_at_unix: now
+		updated_at_unix: now
+		request_id:      req_id
+		trace_id:        trace_id
+		body:            body
+	}
+}
+
+fn (mut app App) openai_store_response_record(plan OpenAIResolvedPlan, body string, req_id string, trace_id string) string {
+	response_id := openai_response_id_from_body(body)
+	if response_id == '' {
+		return ''
+	}
+	record := openai_response_registry_record(plan, response_id, body, req_id, trace_id)
+	app.openai_responses.set_with_ttl(response_id, record, openai_response_registry_ttl) or {}
+	return response_id
+}
+
+fn openai_replace_model_in_body(body string, upstream_model string) string {
+	if upstream_model.trim_space() == '' {
+		return body
+	}
+	parsed := json2.decode[json2.Any](body) or { return body }
+	mut root := parsed.as_map()
+	root['model'] = json2.Any(upstream_model)
+	return json2.Any(root).json_str()
+}
+
+fn openai_route_models(route OpenAIRouteConfig, route_name string) []string {
+	mut models := []string{}
+	for raw in route.models {
+		model := raw.trim_space()
+		if model != '' && model !in models {
+			models << model
+		}
+	}
+	if route.model.trim_space() != '' && route.model !in models {
+		models << route.model.trim_space()
+	}
+	if models.len == 0 && route_name.trim_space() != '' {
+		models << route_name.trim_space()
+	}
+	return models
+}
+
+fn (app &App) openai_models() []string {
+	mut models := []string{}
+	for name, route in app.openai_routes {
+		for model in openai_route_models(route, name) {
+			if model !in models {
+				models << model
+			}
+		}
+	}
+	models.sort()
+	return models
+}
+
+fn (app &App) openai_resolve_route(model string) !OpenAIResolvedRoute {
+	requested := model.trim_space()
+	if requested != '' {
+		for name, route in app.openai_routes {
+			if requested in openai_route_models(route, name) {
+				backend_name := if route.backend.trim_space() != '' {
+					route.backend.trim_space()
+				} else {
+					app.openai_default_backend.trim_space()
+				}
+				if backend_name == '' {
+					return error('missing backend for model ${requested}')
+				}
+				backend := app.openai_backends[backend_name] or {
+					return error('unknown backend ${backend_name}')
+				}
+				upstream_model := if route.upstream_model.trim_space() != '' {
+					route.upstream_model.trim_space()
+				} else {
+					requested
+				}
+				return OpenAIResolvedRoute{
+					route_name:     name
+					model:          requested
+					backend_name:   backend_name
+					upstream_model: upstream_model
+					backend:        backend
+				}
+			}
+		}
+	}
+	backend_name := app.openai_default_backend.trim_space()
+	if backend_name == '' {
+		return error('no matching route for model ${requested}')
+	}
+	backend := app.openai_backends[backend_name] or {
+		return error('unknown backend ${backend_name}')
+	}
+	return OpenAIResolvedRoute{
+		model:          requested
+		backend_name:   backend_name
+		upstream_model: requested
+		backend:        backend
+	}
+}
+
+fn openai_builtin_plan_from_route_for_endpoint_method(route OpenAIResolvedRoute, body string, upstream_path string, output_protocol string, method string) OpenAIResolvedPlan {
+	return OpenAIResolvedPlan{
+		backend_name:    route.backend_name
+		backend:         route.backend
+		method:          method.to_upper()
+		path:            upstream_path
+		body:            openai_replace_model_in_body(body, route.upstream_model)
+		model:           route.model
+		stream_mode:     'passthrough'
+		response_codec:  'sse'
+		output_protocol: output_protocol
+		mapper:          'builtin'
+		headers:         map[string]string{}
+	}
+}
+
+fn openai_builtin_plan_from_route_for_endpoint(route OpenAIResolvedRoute, body string, upstream_path string, output_protocol string) OpenAIResolvedPlan {
+	return openai_builtin_plan_from_route_for_endpoint_method(route, body, upstream_path,
+		output_protocol, 'POST')
+}
+
+fn openai_builtin_plan_from_route(route OpenAIResolvedRoute, body string) OpenAIResolvedPlan {
+	return openai_builtin_plan_from_route_for_endpoint(route, body, '/chat/completions',
+		'openai.chat.completion')
+}
+
+fn openai_json_string_field(obj map[string]json2.Any, key string, default_val string) string {
+	value := obj[key] or { return default_val }
+	text := value.str()
+	if text == '' {
+		return default_val
+	}
+	return text
+}
+
+fn openai_json_string_map_field(obj map[string]json2.Any, key string) map[string]string {
+	mut out := map[string]string{}
+	value := obj[key] or { return out }
+	for name, item in value.as_map() {
+		out[name] = item.str()
+	}
+	return out
+}
+
+fn openai_plan_error(code string, message string) IError {
+	return error('${code}:${message}')
+}
+
+fn openai_plan_error_code(err_msg string) string {
+	if err_msg.starts_with('openai_plugin_plan_') && err_msg.contains(':') {
+		return err_msg.all_before(':')
+	}
+	if err_msg.starts_with('openai_plugin_') && err_msg.contains(':') {
+		return err_msg.all_before(':')
+	}
+	if err_msg.starts_with('unknown backend ') {
+		return 'openai_plugin_plan_unknown_backend'
+	}
+	return 'model_not_found'
+}
+
+fn openai_plan_error_message(err_msg string) string {
+	if (err_msg.starts_with('openai_plugin_plan_') || err_msg.starts_with('openai_plugin_'))
+		&& err_msg.contains(':') {
+		return err_msg.all_after(':')
+	}
+	return err_msg
+}
+
+fn openai_validate_plan_method(raw string) !string {
+	method := raw.trim_space().to_upper()
+	if method == '' {
+		return 'POST'
+	}
+	if method in ['GET', 'POST', 'PUT', 'PATCH', 'DELETE', 'HEAD'] {
+		return method
+	}
+	return openai_plan_error('openai_plugin_plan_invalid_method', 'unsupported upstream method ${method}')
+}
+
+fn openai_validate_plan_path(raw string) !string {
+	path := raw.trim_space()
+	if path == '' {
+		return '/chat/completions'
+	}
+	if !path.starts_with('/') {
+		return openai_plan_error('openai_plugin_plan_invalid_path', 'upstream path must start with /')
+	}
+	if path.contains('\r') || path.contains('\n') {
+		return openai_plan_error('openai_plugin_plan_invalid_path', 'upstream path must not contain newlines')
+	}
+	return path
+}
+
+fn openai_validate_stream_mode(raw string) !string {
+	mode := raw.trim_space()
+	if mode == '' {
+		return 'passthrough'
+	}
+	if mode in ['passthrough', 'mapped', 'executor'] {
+		return mode
+	}
+	return openai_plan_error('openai_plugin_plan_unsupported_stream_mode', 'unsupported stream_mode ${mode}')
+}
+
+fn openai_validate_response_codec(raw string, stream_mode string) !string {
+	codec := raw.trim_space()
+	if codec == '' {
+		return if stream_mode == 'mapped' { 'ndjson' } else { 'sse' }
+	}
+	if codec in ['sse', 'json', 'ndjson', 'text'] {
+		return codec
+	}
+	return openai_plan_error('openai_plugin_plan_unsupported_response_codec', 'unsupported response_codec ${codec}')
+}
+
+fn openai_validate_output_protocol(raw string, stream_mode string) !string {
+	protocol := raw.trim_space()
+	if protocol == '' {
+		return 'openai.chat.completion'
+	}
+	if stream_mode == 'mapped' && protocol != 'openai.chat.completion' {
+		return openai_plan_error('openai_plugin_plan_unsupported_output_protocol', 'unsupported output_protocol ${protocol}')
+	}
+	return protocol
+}
+
+fn openai_validate_mapper(raw string) !string {
+	mapper := raw.trim_space()
+	if mapper == '' {
+		return 'builtin'
+	}
+	if mapper in ['builtin', 'plugin'] {
+		return mapper
+	}
+	return openai_plan_error('openai_plugin_plan_unsupported_mapper', 'unsupported mapper ${mapper}')
+}
+
+fn openai_sanitize_plan_headers(headers map[string]string) map[string]string {
+	mut out := map[string]string{}
+	for name, value in headers {
+		lower := name.trim_space().to_lower()
+		if lower == ''
+			|| lower in ['connection', 'content-length', 'transfer-encoding', 'host', 'server', 'upgrade', 'proxy-connection', 'keep-alive', 'te', 'trailer'] {
+			continue
+		}
+		if lower.contains('\r') || lower.contains('\n') || value.contains('\r')
+			|| value.contains('\n') {
+			continue
+		}
+		out[name] = value
+	}
+	return out
+}
+
+fn openai_plugin_not_handled(raw string) bool {
+	parsed := json2.decode[json2.Any](raw) or { return false }
+	root := parsed.as_map()
+	for key in ['not_handled', 'notHandled'] {
+		value := root[key] or { continue }
+		if value.bool() {
+			return true
+		}
+	}
+	return false
+}
+
+fn openai_upstream_plan_from_plugin_json_with_defaults(raw string, default_path string, default_output_protocol string) !OpenAIUpstreamPlan {
+	parsed := json2.decode[json2.Any](raw)!
+	mut root := parsed.as_map()
+	if plan_any := root['plan'] {
+		root = plan_any.as_map()
+	}
+	body := if body_any := root['body'] { body_any.str() } else { '' }
+	return OpenAIUpstreamPlan{
+		backend:         openai_json_string_field(root, 'backend', '')
+		method:          openai_json_string_field(root, 'method', 'POST')
+		path:            openai_json_string_field(root, 'path', default_path)
+		body:            body
+		upstream_model:  openai_json_string_field(root, 'upstream_model', '')
+		stream_mode:     openai_json_string_field(root, 'stream_mode', 'passthrough')
+		response_codec:  openai_json_string_field(root, 'response_codec', '')
+		output_protocol: openai_json_string_field(root, 'output_protocol', default_output_protocol)
+		mapper:          openai_json_string_field(root, 'mapper', '')
+		headers:         openai_json_string_map_field(root, 'headers')
+	}
+}
+
+fn openai_upstream_plan_from_plugin_json(raw string) !OpenAIUpstreamPlan {
+	return openai_upstream_plan_from_plugin_json_with_defaults(raw, '/chat/completions',
+		'openai.chat.completion')
+}
+
+fn openai_models_from_plugin_json(raw string) ![]string {
+	parsed := json2.decode[json2.Any](raw)!
+	root := parsed.as_map()
+	mut models := []string{}
+	if models_any := root['models'] {
+		for item in models_any.as_array() {
+			model := item.str().trim_space()
+			if model != '' && model !in models {
+				models << model
+			}
+		}
+	}
+	if data_any := root['data'] {
+		for item in data_any.as_array() {
+			row := item.as_map()
+			model := (row['id'] or { json2.Any('') }).str().trim_space()
+			if model != '' && model !in models {
+				models << model
+			}
+		}
+	}
+	models.sort()
+	return models
+}
+
+fn (mut app App) openai_call_plugin(op string, payload string, req_id string, trace_id string, metadata map[string]string) !PluginCallResponse {
+	plugin_name := app.openai_plugin.trim_space()
+	if plugin_name == '' {
+		return error('openai_plugin_not_configured')
+	}
+	return app.call_plugin(PluginCallRequest{
+		plugin:     plugin_name
+		capability: 'openai'
+		op:         op
+		request_id: req_id
+		trace_id:   trace_id
+		payload:    payload
+		metadata:   metadata
+	})
+}
+
+fn (mut app App) openai_plugin_models(method string, path string, req_id string, trace_id string) !OpenAIPluginModelsResult {
+	resp := app.openai_call_plugin('models', json.encode(OpenAIPluginModelsPayload{
+		method:     method.to_upper()
+		path:       path
+		base_path:  app.openai_base_path
+		request_id: req_id
+		trace_id:   trace_id
+	}), req_id, trace_id, map[string]string{})!
+	if openai_plugin_not_handled(resp.result) {
+		return OpenAIPluginModelsResult{}
+	}
+	return OpenAIPluginModelsResult{
+		handled: true
+		models:  openai_models_from_plugin_json(resp.result)!
+	}
+}
+
+fn (mut app App) openai_resolved_plan_from_plugin_result_with_defaults(model string, body string, raw string, default_path string, default_output_protocol string) !OpenAIResolvedPlan {
+	plan := openai_upstream_plan_from_plugin_json_with_defaults(raw, default_path, default_output_protocol)!
+	backend_name := plan.backend.trim_space()
+	if backend_name == '' {
+		return openai_plan_error('openai_plugin_plan_missing_backend', 'plugin plan must include backend')
+	}
+	backend := app.openai_backends[backend_name] or {
+		return openai_plan_error('openai_plugin_plan_unknown_backend', 'unknown backend ${backend_name}')
+	}
+	plan_method := openai_validate_plan_method(plan.method)!
+	plan_path := openai_validate_plan_path(plan.path)!
+	stream_mode := openai_validate_stream_mode(plan.stream_mode)!
+	response_codec := openai_validate_response_codec(plan.response_codec, stream_mode)!
+	output_protocol := openai_validate_output_protocol(plan.output_protocol, stream_mode)!
+	mapper := openai_validate_mapper(plan.mapper)!
+	plan_headers := openai_sanitize_plan_headers(plan.headers)
+	plan_body := if plan.body.trim_space() != '' {
+		plan.body
+	} else {
+		openai_replace_model_in_body(body, plan.upstream_model)
+	}
+	return OpenAIResolvedPlan{
+		backend_name:    backend_name
+		backend:         backend
+		method:          plan_method
+		path:            plan_path
+		body:            plan_body
+		model:           model
+		stream_mode:     stream_mode
+		response_codec:  response_codec
+		output_protocol: output_protocol
+		mapper:          mapper
+		headers:         plan_headers
+	}
+}
+
+fn (mut app App) openai_resolved_plan_from_plugin_result(model string, body string, raw string) !OpenAIResolvedPlan {
+	return app.openai_resolved_plan_from_plugin_result_with_defaults(model, body, raw,
+		'/chat/completions', 'openai.chat.completion')
+}
+
+fn (mut app App) openai_plugin_plan(model string, body string, method string, path string, req_id string, trace_id string) !OpenAIPluginPlanResult {
+	resp := app.openai_call_plugin('chat.route', json.encode(OpenAIPluginChatPayload{
+		method:     method.to_upper()
+		path:       path
+		model:      model
+		stream:     openai_is_stream_request(body)
+		body:       body
+		base_path:  app.openai_base_path
+		request_id: req_id
+		trace_id:   trace_id
+	}), req_id, trace_id, {
+		'model': model
+	})!
+	if openai_plugin_not_handled(resp.result) {
+		return OpenAIPluginPlanResult{}
+	}
+	return OpenAIPluginPlanResult{
+		handled: true
+		plan:    app.openai_resolved_plan_from_plugin_result(model, body, resp.result)!
+	}
+}
+
+fn (mut app App) openai_plugin_responses_plan(model string, body string, method string, path string, req_id string, trace_id string) !OpenAIPluginPlanResult {
+	resp := app.openai_call_plugin('responses.route', json.encode(OpenAIPluginResponsesPayload{
+		method:     method.to_upper()
+		path:       path
+		model:      model
+		stream:     openai_is_stream_request(body)
+		body:       body
+		base_path:  app.openai_base_path
+		request_id: req_id
+		trace_id:   trace_id
+	}), req_id, trace_id, {
+		'model': model
+	})!
+	if openai_plugin_not_handled(resp.result) {
+		return OpenAIPluginPlanResult{}
+	}
+	return OpenAIPluginPlanResult{
+		handled: true
+		plan:    app.openai_resolved_plan_from_plugin_result_with_defaults(model, body,
+			resp.result, '/responses', 'openai.response')!
+	}
+}
+
+fn (mut app App) openai_plugin_fallback_plan(model string, body string, method string, path string, failed_plan OpenAIResolvedPlan, status_code int, error_code string, error_message string, req_id string, trace_id string) !OpenAIPluginPlanResult {
+	if app.openai_plugin.trim_space() == '' {
+		return OpenAIPluginPlanResult{}
+	}
+	resp := app.openai_call_plugin('chat.fallback', json.encode(OpenAIPluginFallbackPayload{
+		method:         method.to_upper()
+		path:           path
+		model:          model
+		stream:         openai_is_stream_request(body)
+		body:           body
+		base_path:      app.openai_base_path
+		failed_backend: failed_plan.backend_name
+		status_code:    status_code
+		error_code:     error_code
+		error_message:  error_message
+		request_id:     req_id
+		trace_id:       trace_id
+	}), req_id, trace_id, {
+		'model':          model
+		'failed_backend': failed_plan.backend_name
+	})!
+	if openai_plugin_not_handled(resp.result) {
+		return OpenAIPluginPlanResult{}
+	}
+	return OpenAIPluginPlanResult{
+		handled: true
+		plan:    app.openai_resolved_plan_from_plugin_result(model, body, resp.result)!
+	}
+}
+
+fn (mut app App) openai_call_executor_op(plan OpenAIResolvedPlan, op string, method string, path string, req_id string, trace_id string) !PluginCallResponse {
+	executor_name := plan.backend.executor.trim_space()
+	if executor_name == '' {
+		return error('openai_executor_missing_name:${plan.backend_name}')
+	}
+	return app.call_plugin(PluginCallRequest{
+		plugin:     executor_name
+		capability: 'openai'
+		op:         op
+		request_id: req_id
+		trace_id:   trace_id
+		payload:    json.encode(OpenAIExecutorPayload{
+			method:          method.to_upper()
+			path:            path
+			model:           plan.model
+			stream:          openai_is_stream_request(plan.body)
+			body:            plan.body
+			backend:         plan.backend_name
+			request_id:      req_id
+			trace_id:        trace_id
+			response_codec:  plan.response_codec
+			output_protocol: plan.output_protocol
+		})
+		metadata:   {
+			'model':   plan.model
+			'backend': plan.backend_name
+		}
+	})
+}
+
+fn (mut app App) openai_call_executor(plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string) !PluginCallResponse {
+	return app.openai_call_executor_op(plan, 'chat.execute', method, path, req_id, trace_id)
+}
+
+fn (mut app App) openai_call_executor_stream_op(plan OpenAIResolvedPlan, op string, method string, path string, req_id string, trace_id string, on_frame PluginStreamFrameFn) !PluginStreamCallResponse {
+	executor_name := plan.backend.executor.trim_space()
+	if executor_name == '' {
+		return error('openai_executor_missing_name:${plan.backend_name}')
+	}
+	return app.call_plugin_stream(PluginCallRequest{
+		plugin:     executor_name
+		capability: 'openai'
+		op:         op
+		request_id: req_id
+		trace_id:   trace_id
+		payload:    json.encode(OpenAIExecutorPayload{
+			method:          method.to_upper()
+			path:            path
+			model:           plan.model
+			stream:          openai_is_stream_request(plan.body)
+			body:            plan.body
+			backend:         plan.backend_name
+			request_id:      req_id
+			trace_id:        trace_id
+			response_codec:  plan.response_codec
+			output_protocol: plan.output_protocol
+		})
+		metadata:   {
+			'model':   plan.model
+			'backend': plan.backend_name
+		}
+	}, on_frame)
+}
+
+fn (mut app App) openai_call_executor_stream(plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string, on_frame PluginStreamFrameFn) !PluginStreamCallResponse {
+	return app.openai_call_executor_stream_op(plan, 'chat.execute', method, path, req_id,
+		trace_id, on_frame)
+}
+
+fn (mut app App) openai_resolve_plan(model string, body string, method string, path string, req_id string, trace_id string) !OpenAIResolvedPlan {
+	if app.openai_plugin.trim_space() != '' {
+		result := app.openai_plugin_plan(model, body, method, path, req_id, trace_id)!
+		if result.handled {
+			return result.plan
+		}
+	}
+	route := app.openai_resolve_route(model)!
+	return openai_builtin_plan_from_route(route, body)
+}
+
+fn (mut app App) openai_resolve_responses_plan(model string, body string, method string, path string, req_id string, trace_id string) !OpenAIResolvedPlan {
+	if app.openai_plugin.trim_space() != '' {
+		result := app.openai_plugin_responses_plan(model, body, method, path, req_id,
+			trace_id)!
+		if result.handled {
+			return result.plan
+		}
+	}
+	route := app.openai_resolve_route(model)!
+	return openai_builtin_plan_from_route_for_endpoint(route, body, '/responses', 'openai.response')
+}
+
+fn (mut app App) openai_resolve_responses_passthrough_plan(relative_target string, body string, method string) !OpenAIResolvedPlan {
+	model := openai_request_model(body)
+	if model.trim_space() != '' {
+		route := app.openai_resolve_route(model)!
+		return openai_builtin_plan_from_route_for_endpoint_method(route, body, relative_target,
+			'openai.response', method)
+	}
+	backend_name := app.openai_default_backend.trim_space()
+	if backend_name == '' {
+		return error('openai_responses_passthrough_missing_default_backend')
+	}
+	backend := app.openai_backends[backend_name] or {
+		return error('unknown backend ${backend_name}')
+	}
+	return OpenAIResolvedPlan{
+		backend_name:    backend_name
+		backend:         backend
+		method:          method.to_upper()
+		path:            relative_target
+		body:            body
+		model:           model
+		stream_mode:     'passthrough'
+		response_codec:  'sse'
+		output_protocol: 'openai.response'
+		mapper:          'builtin'
+		headers:         map[string]string{}
+	}
+}
+
+fn openai_error(mut app App, mut ctx Context, status int, path string, method string, req_id string, trace_id string, start_ms i64, code string, message string) veb.Result {
+	return openai_error_typed(mut app, mut ctx, status, path, method, req_id, trace_id,
+		start_ms, code, message, 'invalid_request_error')
+}
+
+fn openai_error_typed(mut app App, mut ctx Context, status int, path string, method string, req_id string, trace_id string, start_ms i64, code string, message string, typ string) veb.Result {
+	body := openai_error_body_json(code, message, typ)
+	ctx.res.set_status(http.status_from_int(status))
+	ctx.set_content_type('application/json; charset=utf-8')
+	ctx.set_custom_header('x-request-id', req_id) or {}
+	ctx.set_custom_header('x-vhttpd-trace-id', trace_id) or {}
+	app.emit('http.request', {
+		'method':      method.to_upper()
+		'path':        normalize_path(path)
+		'status':      '${status}'
+		'request_id':  req_id
+		'trace_id':    trace_id
+		'duration_ms': '${time.now().unix_milli() - start_ms}'
+		'provider':    'openai'
+	})
+	return ctx.text(body)
+}
+
+fn openai_error_body_json(code string, message string, typ string) string {
+	return json.encode(OpenAIErrorResponse{
+		error: OpenAIErrorBody{
+			message: message
+			typ:     typ
+			code:    code
+		}
+	})
+}
+
+fn openai_upstream_error_from_body(body string, fallback_code string, fallback_message string) (string, string, string) {
+	parsed := json2.decode[json2.Any](body) or {
+		trimmed := body.trim_space()
+		return fallback_code, if trimmed == '' {
+			fallback_message
+		} else {
+			trimmed
+		}, 'server_error'
+	}
+	root := parsed.as_map()
+	if error_any := root['error'] {
+		error_obj := error_any.as_map()
+		message := (error_obj['message'] or { json2.Any(fallback_message) }).str()
+		code := (error_obj['code'] or { json2.Any(fallback_code) }).str()
+		typ := (error_obj['type'] or { json2.Any('server_error') }).str()
+		return if code == '' { fallback_code } else { code }, if message == '' {
+			fallback_message
+		} else {
+			message
+		}, if typ == '' {
+			'server_error'
+		} else {
+			typ
+		}
+	}
+	message := (root['message'] or { json2.Any(fallback_message) }).str()
+	code := (root['code'] or { json2.Any(fallback_code) }).str()
+	typ := (root['type'] or { json2.Any('server_error') }).str()
+	return if code == '' { fallback_code } else { code }, if message == '' {
+		fallback_message
+	} else {
+		message
+	}, if typ == '' {
+		'server_error'
+	} else {
+		typ
+	}
+}
+
+fn openai_write_error_response_conn(mut conn net.TcpConn, status int, headers map[string]string, code string, message string, typ string) {
+	write_http_stream_headers_conn(mut conn, status, 'application/json; charset=utf-8',
+		headers, false) or {}
+	conn.write_string(openai_error_body_json(code, message, typ)) or {}
+}
+
+fn openai_write_sse_error(mut conn net.TcpConn, code string, message string, typ string) {
+	write_chunk(mut conn, 'data: ${openai_error_body_json(code, message, typ)}\n\n') or {}
+	write_chunk(mut conn, 'data: [DONE]\n\n') or {}
+}
+
+fn openai_finish_passthrough_stream(mut state OpenAIStreamProxyState) ! {
+	if state.headers_written && !state.final_written {
+		write_final_chunk(mut state.conn)!
+		state.final_written = true
+	}
+}
+
+fn openai_finish_mapped_stream(mut state OpenAIMappedStreamProxyState) ! {
+	if state.headers_written && !state.final_written {
+		write_final_chunk(mut state.conn)!
+		state.final_written = true
+	}
+}
+
+fn openai_passthrough_chunk_has_done(mut state OpenAIStreamProxyState, decoded string) bool {
+	if decoded == '' {
+		return false
+	}
+	combined := state.done_probe + decoded
+	if combined.contains('data: [DONE]') {
+		state.done = true
+		return true
+	}
+	state.done_probe = if combined.len > 64 { combined[combined.len - 64..] } else { combined }
+	return false
+}
+
+fn openai_build_upstream_url(base_url string, relative string) string {
+	mut base := base_url.trim_space()
+	for base.ends_with('/') {
+		base = base[..base.len - 1]
+	}
+	return '${base}${relative}'
+}
+
+fn openai_backend_auth_key(backend OpenAIBackendConfig) string {
+	if backend.api_key.trim_space() != '' {
+		return backend.api_key.trim_space()
+	}
+	if backend.api_key_env.trim_space() != '' {
+		return os.getenv(backend.api_key_env.trim_space())
+	}
+	return ''
+}
+
+fn openai_http_method(raw string, fallback string) http.Method {
+	return match raw.trim_space().to_upper() {
+		'GET' {
+			.get
+		}
+		'PUT' {
+			.put
+		}
+		'PATCH' {
+			.patch
+		}
+		'DELETE' {
+			.delete
+		}
+		'HEAD' {
+			.head
+		}
+		else {
+			match fallback.trim_space().to_upper() {
+				'HEAD' { .head }
+				else { .post }
+			}
+		}
+	}
+}
+
+fn openai_build_headers(mut ctx Context, backend OpenAIBackendConfig, req_id string, stream bool, extra map[string]string) http.Header {
+	mut header := http.new_header()
+	content_type := ctx.req.header.get(.content_type) or { 'application/json' }
+	accept := if stream { 'text/event-stream' } else { ctx.req.header.get(.accept) or {
+			'application/json'} }
+	header.add(.content_type, content_type)
+	header.add(.accept, accept)
+	header.add_custom('x-request-id', req_id) or {}
+	api_key := openai_backend_auth_key(backend)
+	if api_key != '' {
+		header.add(.authorization, 'Bearer ${api_key}')
+	}
+	for name, value in extra {
+		if name.trim_space() != '' {
+			header.add_custom(name, value) or {}
+		}
+	}
+	return header
+}
+
+fn ensure_openai_stream_headers_written(mut state OpenAIStreamProxyState) ! {
+	if state.headers_written {
+		return
+	}
+	mut headers := state.response_headers.clone()
+	headers['x-accel-buffering'] = 'no'
+	write_http_stream_headers_conn_with_close(mut state.conn, state.status_code, state.content_type,
+		headers, true, false)!
+	state.headers_written = true
+}
+
+fn openai_progress_body_cb(request &http.Request, chunk []u8, _body_read_so_far u64, _body_expected_size u64, status_code int) ! {
+	mut state := &OpenAIStreamProxyState(unsafe { nil })
+	pstate := unsafe { &voidptr(&state) }
+	unsafe {
+		*pstate = request.user_ptr
+	}
+	if status_code > 0 {
+		state.status_code = status_code
+	}
+	decoded := openai_decode_progress_chunk(mut state.chunk_decoder, chunk)
+	if state.status_code >= 400 {
+		if decoded.len > 0 {
+			state.error_body += decoded
+		}
+		return
+	}
+	ensure_openai_stream_headers_written(mut state)!
+	if state.method.to_upper() != 'HEAD' && decoded.len > 0 {
+		write_chunk(mut state.conn, decoded)!
+	}
+	if openai_passthrough_chunk_has_done(mut state, decoded) {
+		openai_finish_passthrough_stream(mut state)!
+		return error(openai_stream_done_fetch_error)
+	}
+}
+
+fn ensure_openai_mapped_stream_headers_written(mut state OpenAIMappedStreamProxyState) ! {
+	if state.headers_written {
+		return
+	}
+	mut headers := state.response_headers.clone()
+	headers['x-accel-buffering'] = 'no'
+	write_http_stream_headers_conn_with_close(mut state.conn, state.status_code, 'text/event-stream',
+		headers, true, false)!
+	state.headers_written = true
+}
+
+fn openai_extract_mapped_row(line string) OpenAIFrameMapping {
+	parsed := json2.decode[json2.Any](line) or { return OpenAIFrameMapping{} }
+	root := parsed.as_map()
+	done := (root['done'] or { json2.Any(false) }).bool()
+	mut tool_calls := []json2.Any{}
+	usage := openai_usage_from_map(root)
+	if message_any := root['message'] {
+		message := message_any.as_map()
+		content := (message['content'] or { json2.Any('') }).str()
+		if tool_calls_any := message['tool_calls'] {
+			tool_calls = tool_calls_any.as_array()
+		}
+		return OpenAIFrameMapping{
+			content:       content
+			tool_calls:    tool_calls
+			usage:         usage
+			done:          done
+			handled:       true
+			finish_reason: if tool_calls.len > 0 { 'tool_calls' } else { '' }
+		}
+	}
+	if tool_calls_any := root['tool_calls'] {
+		tool_calls = tool_calls_any.as_array()
+		return OpenAIFrameMapping{
+			tool_calls:    tool_calls
+			usage:         usage
+			done:          done
+			handled:       true
+			finish_reason: if tool_calls.len > 0 { 'tool_calls' } else { '' }
+		}
+	}
+	if response_any := root['response'] {
+		return OpenAIFrameMapping{
+			content: response_any.str()
+			usage:   usage
+			done:    done
+			handled: true
+		}
+	}
+	if content_any := root['content'] {
+		return OpenAIFrameMapping{
+			content: content_any.str()
+			usage:   usage
+			done:    done
+			handled: true
+		}
+	}
+	return OpenAIFrameMapping{
+		usage:   usage
+		done:    done
+		handled: true
+	}
+}
+
+fn openai_int_field(obj map[string]json2.Any, key string) int {
+	value := obj[key] or { return 0 }
+	return value.int()
+}
+
+fn openai_usage_from_map(root map[string]json2.Any) map[string]int {
+	if usage_any := root['usage'] {
+		usage := usage_any.as_map()
+		prompt := openai_int_field(usage, 'prompt_tokens')
+		completion := openai_int_field(usage, 'completion_tokens')
+		total_raw := openai_int_field(usage, 'total_tokens')
+		total := if total_raw > 0 { total_raw } else { prompt + completion }
+		if prompt > 0 || completion > 0 || total > 0 {
+			return {
+				'prompt_tokens':     prompt
+				'completion_tokens': completion
+				'total_tokens':      total
+			}
+		}
+	}
+	prompt := openai_int_field(root, 'prompt_tokens') + openai_int_field(root, 'prompt_eval_count')
+	completion := openai_int_field(root, 'completion_tokens') + openai_int_field(root, 'eval_count')
+	total_raw := openai_int_field(root, 'total_tokens')
+	total := if total_raw > 0 { total_raw } else { prompt + completion }
+	if prompt > 0 || completion > 0 || total > 0 {
+		return {
+			'prompt_tokens':     prompt
+			'completion_tokens': completion
+			'total_tokens':      total
+		}
+	}
+	return map[string]int{}
+}
+
+fn openai_merge_usage(mut acc map[string]int, usage map[string]int) {
+	for key, value in usage {
+		if value > 0 {
+			acc[key] = value
+		}
+	}
+}
+
+fn openai_stream_chunk_json(state OpenAIMappedStreamProxyState, mapping OpenAIFrameMapping) string {
+	mut delta := map[string]json2.Any{}
+	if mapping.content != '' {
+		delta['content'] = json2.Any(mapping.content)
+	}
+	if mapping.tool_calls.len > 0 {
+		delta['tool_calls'] = json2.Any(mapping.tool_calls)
+	}
+	mut choice := map[string]json2.Any{}
+	choice['index'] = json2.Any(0)
+	choice['delta'] = json2.Any(delta)
+	if mapping.finish_reason != '' {
+		choice['finish_reason'] = json2.Any(mapping.finish_reason)
+	}
+	mut root := map[string]json2.Any{}
+	root['id'] = json2.Any('chatcmpl-${state.request_id}')
+	root['object'] = json2.Any('chat.completion.chunk')
+	root['created'] = json2.Any(state.created)
+	root['model'] = json2.Any(state.model)
+	root['choices'] = json2.Any([json2.Any(choice)])
+	return json2.Any(root).json_str()
+}
+
+fn openai_usage_json_obj(usage map[string]int) map[string]json2.Any {
+	return {
+		'prompt_tokens':     json2.Any(usage['prompt_tokens'])
+		'completion_tokens': json2.Any(usage['completion_tokens'])
+		'total_tokens':      json2.Any(usage['total_tokens'])
+	}
+}
+
+fn openai_stream_usage_chunk_json(state OpenAIMappedStreamProxyState) string {
+	mut root := map[string]json2.Any{}
+	root['id'] = json2.Any('chatcmpl-${state.request_id}')
+	root['object'] = json2.Any('chat.completion.chunk')
+	root['created'] = json2.Any(state.created)
+	root['model'] = json2.Any(state.model)
+	root['choices'] = json2.Any([]json2.Any{})
+	root['usage'] = json2.Any(openai_usage_json_obj(state.usage))
+	return json2.Any(root).json_str()
+}
+
+fn openai_write_stream_usage_chunk(mut state OpenAIMappedStreamProxyState) ! {
+	if state.usage.len == 0 {
+		return
+	}
+	ensure_openai_mapped_stream_headers_written(mut state)!
+	write_chunk(mut state.conn, 'data: ${openai_stream_usage_chunk_json(state)}\n\n')!
+}
+
+fn openai_tool_call_index(call map[string]json2.Any, fallback int) int {
+	index_any := call['index'] or { return fallback }
+	return index_any.int()
+}
+
+fn openai_merge_tool_call(existing map[string]json2.Any, incoming map[string]json2.Any) map[string]json2.Any {
+	mut merged := existing.clone()
+	for key in ['id', 'type', 'index'] {
+		if value := incoming[key] {
+			if key == 'index' || value.str() != '' {
+				merged[key] = value
+			}
+		}
+	}
+	if incoming_fn_any := incoming['function'] {
+		incoming_fn := incoming_fn_any.as_map()
+		mut fn_obj := if existing_fn_any := merged['function'] {
+			existing_fn_any.as_map()
+		} else {
+			map[string]json2.Any{}
+		}
+		if name_any := incoming_fn['name'] {
+			name := name_any.str()
+			if name != '' {
+				fn_obj['name'] = json2.Any(name)
+			}
+		}
+		if args_any := incoming_fn['arguments'] {
+			args := args_any.str()
+			if args != '' {
+				prev := (fn_obj['arguments'] or { json2.Any('') }).str()
+				fn_obj['arguments'] = json2.Any(prev + args)
+			}
+		}
+		merged['function'] = json2.Any(fn_obj)
+	}
+	return merged
+}
+
+fn openai_merge_tool_calls(mut acc []json2.Any, calls []json2.Any) {
+	for call_any in calls {
+		call := call_any.as_map()
+		index := openai_tool_call_index(call, acc.len)
+		mut found := -1
+		for i, existing_any in acc {
+			existing := existing_any.as_map()
+			if openai_tool_call_index(existing, i) == index {
+				found = i
+				break
+			}
+		}
+		if found < 0 {
+			acc << json2.Any(call)
+			continue
+		}
+		acc[found] = json2.Any(openai_merge_tool_call(acc[found].as_map(), call))
+	}
+}
+
+fn openai_plugin_map_frame_result(raw string) OpenAIFrameMapping {
+	if openai_plugin_not_handled(raw) {
+		return OpenAIFrameMapping{}
+	}
+	parsed := json2.decode[json2.Any](raw) or {
+		return OpenAIFrameMapping{
+			handled: true
+			error:   'invalid mapper response'
+		}
+	}
+	root := parsed.as_map()
+	if error_any := root['error'] {
+		error_obj := error_any.as_map()
+		if error_obj.len > 0 {
+			message := (error_obj['message'] or { json2.Any('mapper error') }).str()
+			return OpenAIFrameMapping{
+				done:    true
+				handled: true
+				error:   message
+			}
+		}
+		err_msg := error_any.str()
+		return OpenAIFrameMapping{
+			done:    true
+			handled: true
+			error:   if err_msg == '' { 'mapper error' } else { err_msg }
+		}
+	}
+	content := (root['content'] or { json2.Any('') }).str()
+	mut tool_calls := []json2.Any{}
+	if tool_calls_any := root['tool_calls'] {
+		tool_calls = tool_calls_any.as_array()
+	}
+	usage := openai_usage_from_map(root)
+	done := (root['done'] or { json2.Any(false) }).bool()
+	return OpenAIFrameMapping{
+		content:       content
+		tool_calls:    tool_calls
+		usage:         usage
+		done:          done
+		handled:       true
+		finish_reason: openai_json_string_field(root, 'finish_reason', if tool_calls.len > 0 {
+			'tool_calls'
+		} else {
+			''
+		})
+	}
+}
+
+fn (mut app App) openai_plugin_map_frame(plan OpenAIResolvedPlan, frame string, req_id string, trace_id string) !OpenAIFrameMapping {
+	resp := app.openai_call_plugin('chat.map_frame', json.encode(OpenAIPluginMapFramePayload{
+		model:           plan.model
+		frame:           frame
+		response_codec:  plan.response_codec
+		output_protocol: plan.output_protocol
+		request_id:      req_id
+		trace_id:        trace_id
+	}), req_id, trace_id, {
+		'model':  plan.model
+		'mapper': 'plugin'
+	})!
+	return openai_plugin_map_frame_result(resp.result)
+}
+
+fn openai_map_line_with_plugin(mut state OpenAIMappedStreamProxyState, line string) OpenAIFrameMapping {
+	mut app := unsafe { &App(state.app) }
+	return app.openai_plugin_map_frame(OpenAIResolvedPlan{
+		model:           state.model
+		response_codec:  state.response_codec
+		output_protocol: state.output_protocol
+	}, line, state.request_id, state.trace_id) or {
+		return OpenAIFrameMapping{
+			done:    true
+			handled: true
+			error:   err.msg()
+		}
+	}
+}
+
+fn openai_write_mapped_stream_line(mut state OpenAIMappedStreamProxyState, line string) ! {
+	trimmed := line.trim_space()
+	if trimmed == '' {
+		return
+	}
+	mapping := if state.mapper == 'plugin' {
+		plugin_mapping := openai_map_line_with_plugin(mut state, trimmed)
+		if plugin_mapping.error != '' {
+			state.mapper_error = plugin_mapping.error
+		}
+		if plugin_mapping.handled {
+			plugin_mapping
+		} else {
+			openai_extract_mapped_row(trimmed)
+		}
+	} else {
+		openai_extract_mapped_row(trimmed)
+	}
+	if mapping.error != '' {
+		state.mapper_error = mapping.error
+		ensure_openai_mapped_stream_headers_written(mut state)!
+		openai_write_sse_error(mut state.conn, 'mapper_error', mapping.error, 'server_error')
+		state.done = true
+		openai_finish_mapped_stream(mut state)!
+		return
+	}
+	if mapping.content != '' || mapping.tool_calls.len > 0 {
+		ensure_openai_mapped_stream_headers_written(mut state)!
+		write_chunk(mut state.conn, 'data: ${openai_stream_chunk_json(state, mapping)}\n\n')!
+	}
+	openai_merge_usage(mut state.usage, mapping.usage)
+	if mapping.done && !state.done {
+		ensure_openai_mapped_stream_headers_written(mut state)!
+		openai_write_stream_usage_chunk(mut state)!
+		write_chunk(mut state.conn, 'data: [DONE]\n\n')!
+		state.done = true
+		openai_finish_mapped_stream(mut state)!
+	}
+}
+
+fn openai_mapped_progress_body_cb(request &http.Request, chunk []u8, _body_read_so_far u64, _body_expected_size u64, status_code int) ! {
+	mut state := &OpenAIMappedStreamProxyState(unsafe { nil })
+	pstate := unsafe { &voidptr(&state) }
+	unsafe {
+		*pstate = request.user_ptr
+	}
+	if status_code > 0 {
+		state.status_code = status_code
+	}
+	decoded := openai_decode_progress_chunk(mut state.chunk_decoder, chunk)
+	if state.status_code >= 400 {
+		if decoded.len > 0 {
+			state.error_body += decoded
+		}
+		return
+	}
+	if state.method.to_upper() == 'HEAD' || decoded.len == 0 {
+		return
+	}
+	state.line_buffer += decoded
+	for state.line_buffer.contains('\n') {
+		line := state.line_buffer.all_before('\n')
+		state.line_buffer = state.line_buffer.all_after('\n')
+		openai_write_mapped_stream_line(mut state, line)!
+	}
+	if state.done {
+		return error(openai_stream_done_fetch_error)
+	}
+}
+
+fn openai_reset_mapped_stream_state_for_plan(mut state OpenAIMappedStreamProxyState, plan OpenAIResolvedPlan) {
+	state.status_code = 200
+	state.response_headers['x-vhttpd-openai-backend'] = plan.backend_name
+	state.line_buffer = ''
+	state.model = plan.model
+	state.mapper = plan.mapper
+	state.response_codec = plan.response_codec
+	state.output_protocol = plan.output_protocol
+	state.done = false
+	state.mapper_error = ''
+	state.error_body = ''
+	state.usage = map[string]int{}
+	state.final_written = false
+}
+
+fn openai_fetch_mapped_stream(mut ctx Context, mut state OpenAIMappedStreamProxyState, plan OpenAIResolvedPlan, method string, req_id string) string {
+	_ := http.fetch(
+		url:                openai_build_upstream_url(plan.backend.base_url, plan.path)
+		method:             openai_http_method(plan.method, method)
+		header:             openai_build_headers(mut ctx, plan.backend, req_id, true,
+			plan.headers)
+		data:               plan.body
+		on_progress_body:   openai_mapped_progress_body_cb
+		user_ptr:           state
+		stop_copying_limit: 65536
+	) or {
+		if err.msg() == openai_stream_done_fetch_error {
+			return ''
+		}
+		return err.msg()
+	}
+	return ''
+}
+
+fn openai_proxy_mapped_stream(mut app App, mut ctx Context, plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result {
+	if plan.response_codec != 'ndjson' || plan.output_protocol != 'openai.chat.completion' {
+		return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms,
+			'openai_plugin_plan_unsupported_mapper', 'unsupported mapper ${plan.response_codec} -> ${plan.output_protocol}')
+	}
+	if plan.backend.kind.trim_space() !in ['', 'openai_http', 'http'] {
+		return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms,
+			'unsupported_backend', 'unsupported OpenAI backend kind ${plan.backend.kind}')
+	}
+	if plan.backend.base_url.trim_space() == '' {
+		return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms,
+			'missing_backend_base_url', 'OpenAI backend ${plan.backend_name} has no base_url')
+	}
+	ctx.takeover_conn_reusable()
+	ctx.conn.set_write_timeout(time.infinite)
+	ctx.conn.set_read_timeout(time.infinite)
+	mut client_conn := ctx.conn
+	mut headers := map[string]string{}
+	headers['x-request-id'] = req_id
+	headers['x-vhttpd-trace-id'] = trace_id
+	headers['x-vhttpd-openai-backend'] = plan.backend_name
+	mut state := &OpenAIMappedStreamProxyState{
+		app:              unsafe { &app }
+		conn:             client_conn
+		method:           method
+		status_code:      200
+		response_headers: headers
+		model:            plan.model
+		request_id:       req_id
+		trace_id:         trace_id
+		mapper:           plan.mapper
+		response_codec:   plan.response_codec
+		output_protocol:  plan.output_protocol
+		created:          int(time.now().unix())
+	}
+	mut fetch_err_msg := openai_fetch_mapped_stream(mut ctx, mut state, plan, method,
+		req_id)
+	if fetch_err_msg != '' && !state.headers_written {
+		fallback := app.openai_plugin_fallback_plan(plan.model, plan.body, method, path,
+			plan, 502, 'upstream_fetch_failed', fetch_err_msg, req_id, trace_id) or {
+			OpenAIPluginPlanResult{}
+		}
+		if fallback.handled && fallback.plan.stream_mode == 'mapped' {
+			openai_reset_mapped_stream_state_for_plan(mut state, fallback.plan)
+			fallback_err_msg := openai_fetch_mapped_stream(mut ctx, mut state, fallback.plan,
+				method, req_id)
+			if fallback_err_msg == '' {
+				fetch_err_msg = ''
+			} else {
+				fetch_err_msg = fallback_err_msg
+			}
+		}
+	}
+	if fetch_err_msg != '' && !state.headers_written {
+		err_headers := {
+			'x-request-id':         req_id
+			'x-vhttpd-trace-id':    trace_id
+			'x-vhttpd-error-class': 'openai_upstream_fetch_failed'
+		}
+		openai_write_error_response_conn(mut client_conn, 502, err_headers, 'upstream_fetch_failed',
+			fetch_err_msg, 'server_error')
+		client_conn.close() or {}
+		return veb.no_result()
+	}
+	if state.status_code >= 400 && !state.headers_written {
+		code, message, _ := openai_upstream_error_from_body(state.error_body, 'upstream_error',
+			'upstream returned HTTP ${state.status_code}')
+		fallback := app.openai_plugin_fallback_plan(plan.model, plan.body, method, path,
+			plan, state.status_code, code, message, req_id, trace_id) or {
+			OpenAIPluginPlanResult{}
+		}
+		if fallback.handled && fallback.plan.stream_mode == 'mapped' {
+			openai_reset_mapped_stream_state_for_plan(mut state, fallback.plan)
+			fallback_err_msg := openai_fetch_mapped_stream(mut ctx, mut state, fallback.plan,
+				method, req_id)
+			if fallback_err_msg != '' && !state.headers_written {
+				err_headers := {
+					'x-request-id':         req_id
+					'x-vhttpd-trace-id':    trace_id
+					'x-vhttpd-error-class': 'openai_upstream_fetch_failed'
+				}
+				openai_write_error_response_conn(mut client_conn, 502, err_headers, 'upstream_fetch_failed',
+					fallback_err_msg, 'server_error')
+				client_conn.close() or {}
+				return veb.no_result()
+			}
+		}
+	}
+	if state.status_code >= 400 && !state.headers_written {
+		code, message, typ := openai_upstream_error_from_body(state.error_body, 'upstream_error',
+			'upstream returned HTTP ${state.status_code}')
+		err_headers := {
+			'x-request-id':         req_id
+			'x-vhttpd-trace-id':    trace_id
+			'x-vhttpd-error-class': 'openai_upstream_error'
+		}
+		openai_write_error_response_conn(mut client_conn, state.status_code, err_headers,
+			code, message, typ)
+		client_conn.close() or {}
+		return veb.no_result()
+	}
+	if state.line_buffer.trim_space() != '' {
+		openai_write_mapped_stream_line(mut state, state.line_buffer) or {}
+		state.line_buffer = ''
+	}
+	if state.mapper_error != '' && !state.final_written {
+		ensure_openai_mapped_stream_headers_written(mut state) or {}
+		openai_write_sse_error(mut client_conn, 'mapper_error', state.mapper_error, 'server_error')
+		state.done = true
+		openai_finish_mapped_stream(mut state) or {}
+	}
+	if !state.done {
+		ensure_openai_mapped_stream_headers_written(mut state) or {}
+		openai_write_stream_usage_chunk(mut state) or {}
+		write_chunk(mut client_conn, 'data: [DONE]\n\n') or {}
+		state.done = true
+	}
+	if state.headers_written {
+		openai_finish_mapped_stream(mut state) or {}
+	}
+	app.emit('http.request', {
+		'method':      method.to_upper()
+		'path':        normalize_path(path)
+		'status':      '${state.status_code}'
+		'request_id':  req_id
+		'trace_id':    trace_id
+		'duration_ms': '${time.now().unix_milli() - start_ms}'
+		'provider':    'openai'
+		'backend':     plan.backend_name
+		'mapper':      '${plan.response_codec}->${plan.output_protocol}'
+	})
+	return veb.no_result()
+}
+
+fn openai_fetch_passthrough_stream(mut ctx Context, mut state OpenAIStreamProxyState, plan OpenAIResolvedPlan, method string, req_id string) string {
+	_ := http.fetch(
+		url:                openai_build_upstream_url(plan.backend.base_url, plan.path)
+		method:             openai_http_method(plan.method, method)
+		header:             openai_build_headers(mut ctx, plan.backend, req_id, true,
+			plan.headers)
+		data:               plan.body
+		on_progress_body:   openai_progress_body_cb
+		user_ptr:           state
+		stop_copying_limit: 65536
+	) or { return err.msg() }
+	return ''
+}
+
+fn openai_proxy_stream(mut app App, mut ctx Context, plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result {
+	if plan.backend.kind.trim_space() !in ['', 'openai_http'] {
+		return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms,
+			'unsupported_backend', 'unsupported OpenAI backend kind ${plan.backend.kind}')
+	}
+	if plan.backend.base_url.trim_space() == '' {
+		return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms,
+			'missing_backend_base_url', 'OpenAI backend ${plan.backend_name} has no base_url')
+	}
+	ctx.takeover_conn_reusable()
+	ctx.conn.set_write_timeout(time.infinite)
+	ctx.conn.set_read_timeout(time.infinite)
+	mut client_conn := ctx.conn
+	mut headers := map[string]string{}
+	headers['x-request-id'] = req_id
+	headers['x-vhttpd-trace-id'] = trace_id
+	headers['x-vhttpd-openai-backend'] = plan.backend_name
+	mut state := &OpenAIStreamProxyState{
+		conn:             client_conn
+		method:           method
+		status_code:      200
+		content_type:     'text/event-stream'
+		response_headers: headers
+	}
+	fetch_method := openai_http_method(plan.method, method)
+	mut fetch_err_msg := ''
+	_ := http.fetch(
+		url:                openai_build_upstream_url(plan.backend.base_url, plan.path)
+		method:             fetch_method
+		header:             openai_build_headers(mut ctx, plan.backend, req_id, true,
+			plan.headers)
+		data:               plan.body
+		on_progress_body:   openai_progress_body_cb
+		user_ptr:           state
+		stop_copying_limit: 65536
+	) or {
+		if err.msg() == openai_stream_done_fetch_error {
+			fetch_err_msg = ''
+		} else {
+			fetch_err_msg = err.msg()
+		}
+		http.Response{}
+	}
+	if fetch_err_msg != '' && !state.headers_written {
+		fallback := app.openai_plugin_fallback_plan(plan.model, plan.body, method, path,
+			plan, 502, 'upstream_fetch_failed', fetch_err_msg, req_id, trace_id) or {
+			OpenAIPluginPlanResult{}
+		}
+		if fallback.handled && fallback.plan.stream_mode == 'passthrough' {
+			state.status_code = 200
+			state.error_body = ''
+			state.chunk_decoder = OpenAIChunkDecodeState{}
+			state.done = false
+			state.done_probe = ''
+			state.final_written = false
+			state.response_headers['x-vhttpd-openai-backend'] = fallback.plan.backend_name
+			fallback_err_msg := openai_fetch_passthrough_stream(mut ctx, mut state, fallback.plan,
+				method, req_id)
+			if fallback_err_msg == '' {
+				fetch_err_msg = ''
+			} else {
+				fetch_err_msg = fallback_err_msg
+			}
+		}
+	}
+	if fetch_err_msg != '' && !state.headers_written {
+		err_headers := {
+			'x-request-id':         req_id
+			'x-vhttpd-trace-id':    trace_id
+			'x-vhttpd-error-class': 'openai_upstream_fetch_failed'
+		}
+		openai_write_error_response_conn(mut client_conn, 502, err_headers, 'upstream_fetch_failed',
+			fetch_err_msg, 'server_error')
+		client_conn.close() or {}
+		return veb.no_result()
+	}
+	if state.status_code >= 400 && !state.headers_written {
+		code, message, typ := openai_upstream_error_from_body(state.error_body, 'upstream_error',
+			'upstream returned HTTP ${state.status_code}')
+		fallback := app.openai_plugin_fallback_plan(plan.model, plan.body, method, path,
+			plan, state.status_code, code, message, req_id, trace_id) or {
+			OpenAIPluginPlanResult{}
+		}
+		if fallback.handled && fallback.plan.stream_mode == 'passthrough' {
+			state.status_code = 200
+			state.error_body = ''
+			state.chunk_decoder = OpenAIChunkDecodeState{}
+			state.done = false
+			state.done_probe = ''
+			state.final_written = false
+			state.response_headers['x-vhttpd-openai-backend'] = fallback.plan.backend_name
+			fallback_err_msg := openai_fetch_passthrough_stream(mut ctx, mut state, fallback.plan,
+				method, req_id)
+			if fallback_err_msg == '' && state.status_code < 400 {
+				if !state.headers_written {
+					ensure_openai_stream_headers_written(mut state) or {}
+				}
+				if state.headers_written {
+					openai_finish_passthrough_stream(mut state) or {}
+				}
+				return veb.no_result()
+			}
+		}
+		if state.status_code >= 400 {
+			code2, message2, typ2 := openai_upstream_error_from_body(state.error_body,
+				'upstream_error', 'upstream returned HTTP ${state.status_code}')
+			err_headers := {
+				'x-request-id':         req_id
+				'x-vhttpd-trace-id':    trace_id
+				'x-vhttpd-error-class': 'openai_upstream_error'
+			}
+			openai_write_error_response_conn(mut client_conn, state.status_code, err_headers,
+				code2, message2, typ2)
+			client_conn.close() or {}
+			return veb.no_result()
+		}
+		err_headers := {
+			'x-request-id':         req_id
+			'x-vhttpd-trace-id':    trace_id
+			'x-vhttpd-error-class': 'openai_upstream_error'
+		}
+		openai_write_error_response_conn(mut client_conn, state.status_code, err_headers,
+			code, message, typ)
+		client_conn.close() or {}
+		return veb.no_result()
+	}
+	if !state.headers_written {
+		ensure_openai_stream_headers_written(mut state) or {}
+	}
+	if state.headers_written {
+		openai_finish_passthrough_stream(mut state) or {}
+	}
+	app.emit('http.request', {
+		'method':      method.to_upper()
+		'path':        normalize_path(path)
+		'status':      '${state.status_code}'
+		'request_id':  req_id
+		'trace_id':    trace_id
+		'duration_ms': '${time.now().unix_milli() - start_ms}'
+		'provider':    'openai'
+		'backend':     plan.backend_name
+	})
+	return veb.no_result()
+}
+
+fn openai_proxy_once(mut app App, mut ctx Context, plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result {
+	return openai_proxy_once_attempt(mut app, mut ctx, plan, method, path, req_id, trace_id,
+		start_ms, true)
+}
+
+fn openai_proxy_once_attempt(mut app App, mut ctx Context, plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string, start_ms i64, allow_fallback bool) veb.Result {
+	if plan.backend.kind.trim_space() == 'executor' {
+		return openai_proxy_executor_once(mut app, mut ctx, plan, method, path, req_id,
+			trace_id, start_ms)
+	}
+	if plan.backend.kind.trim_space() !in ['', 'openai_http', 'http'] {
+		return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms,
+			'unsupported_backend', 'unsupported OpenAI backend kind ${plan.backend.kind}')
+	}
+	if plan.backend.base_url.trim_space() == '' {
+		return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms,
+			'missing_backend_base_url', 'OpenAI backend ${plan.backend_name} has no base_url')
+	}
+	resp := http.fetch(
+		url:    openai_build_upstream_url(plan.backend.base_url, plan.path)
+		method: openai_http_method(plan.method, method)
+		header: openai_build_headers(mut ctx, plan.backend, req_id, false, plan.headers)
+		data:   plan.body
+	) or {
+		if allow_fallback {
+			fallback := app.openai_plugin_fallback_plan(plan.model, plan.body, method,
+				path, plan, 502, 'upstream_fetch_failed', err.msg(), req_id, trace_id) or {
+				OpenAIPluginPlanResult{}
+			}
+			if fallback.handled {
+				return openai_proxy_once_attempt(mut app, mut ctx, fallback.plan, method,
+					path, req_id, trace_id, start_ms, false)
+			}
+		}
+		return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms,
+			'upstream_fetch_failed', err.msg())
+	}
+	if resp.status_code >= 400 {
+		code, message, typ := openai_upstream_error_from_body(resp.body, 'upstream_error',
+			'upstream returned HTTP ${resp.status_code}')
+		if allow_fallback {
+			fallback := app.openai_plugin_fallback_plan(plan.model, plan.body, method,
+				path, plan, resp.status_code, code, message, req_id, trace_id) or {
+				OpenAIPluginPlanResult{}
+			}
+			if fallback.handled {
+				return openai_proxy_once_attempt(mut app, mut ctx, fallback.plan, method,
+					path, req_id, trace_id, start_ms, false)
+			}
+		}
+		return openai_error_typed(mut app, mut ctx, resp.status_code, path, method, req_id,
+			trace_id, start_ms, code, message, typ)
+	}
+	ctx.res.set_status(http.status_from_int(resp.status_code))
+	ctx.set_content_type(if plan.stream_mode == 'mapped' {
+		'application/json; charset=utf-8'
+	} else {
+		openai_response_content_type(resp.header, 'application/json; charset=utf-8')
+	})
+	ctx.set_custom_header('x-request-id', req_id) or {}
+	ctx.set_custom_header('x-vhttpd-trace-id', trace_id) or {}
+	ctx.set_custom_header('x-vhttpd-openai-backend', plan.backend_name) or {}
+	app.emit('http.request', {
+		'method':      method.to_upper()
+		'path':        normalize_path(path)
+		'status':      '${resp.status_code}'
+		'request_id':  req_id
+		'trace_id':    trace_id
+		'duration_ms': '${time.now().unix_milli() - start_ms}'
+		'provider':    'openai'
+		'backend':     plan.backend_name
+	})
+	if plan.stream_mode == 'mapped' {
+		mapped_body := openai_map_once_response(plan, resp.body, req_id, int(time.now().unix())) or {
+			return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id,
+				start_ms, openai_plan_error_code(err.msg()), openai_plan_error_message(err.msg()))
+		}
+		return ctx.text(if method.to_upper() == 'HEAD' { '' } else { mapped_body })
+	}
+	return ctx.text(if method.to_upper() == 'HEAD' { '' } else { resp.body })
+}
+
+fn openai_map_once_response(plan OpenAIResolvedPlan, body string, req_id string, created int) !string {
+	if plan.response_codec !in ['ndjson', 'json']
+		|| plan.output_protocol != 'openai.chat.completion' {
+		return openai_plan_error('openai_plugin_plan_unsupported_mapper', 'unsupported mapper ${plan.response_codec} -> ${plan.output_protocol}')
+	}
+	mut content := ''
+	mut tool_calls := []json2.Any{}
+	mut usage := map[string]int{}
+	if plan.response_codec == 'ndjson' {
+		for line in body.split_into_lines() {
+			mapping := openai_extract_mapped_row(line)
+			content += mapping.content
+			openai_merge_tool_calls(mut tool_calls, mapping.tool_calls)
+			openai_merge_usage(mut usage, mapping.usage)
+		}
+	} else {
+		mapping := openai_extract_mapped_row(body)
+		content = mapping.content
+		openai_merge_tool_calls(mut tool_calls, mapping.tool_calls)
+		openai_merge_usage(mut usage, mapping.usage)
+	}
+	mut message := map[string]json2.Any{}
+	message['role'] = json2.Any('assistant')
+	message['content'] = json2.Any(content)
+	if tool_calls.len > 0 {
+		message['tool_calls'] = json2.Any(tool_calls)
+	}
+	mut choice := map[string]json2.Any{}
+	choice['index'] = json2.Any(0)
+	choice['message'] = json2.Any(message)
+	choice['finish_reason'] = json2.Any(if tool_calls.len > 0 { 'tool_calls' } else { 'stop' })
+	mut root := map[string]json2.Any{}
+	root['id'] = json2.Any('chatcmpl-${req_id}')
+	root['object'] = json2.Any('chat.completion')
+	root['created'] = json2.Any(created)
+	root['model'] = json2.Any(plan.model)
+	root['choices'] = json2.Any([json2.Any(choice)])
+	if usage.len > 0 {
+		root['usage'] = json2.Any(openai_usage_json_obj(usage))
+	}
+	return json2.Any(root).json_str()
+}
+
+fn openai_executor_mapping_from_result(raw string) OpenAIFrameMapping {
+	parsed := json2.decode[json2.Any](raw) or {
+		return OpenAIFrameMapping{
+			content: raw
+			handled: true
+		}
+	}
+	root := parsed.as_map()
+	if root.len == 0 {
+		return OpenAIFrameMapping{
+			content: raw
+			handled: true
+		}
+	}
+	return openai_plugin_map_frame_result(raw)
+}
+
+fn openai_completion_json_from_mapping(plan OpenAIResolvedPlan, mapping OpenAIFrameMapping, req_id string, created int) string {
+	mut message := map[string]json2.Any{}
+	message['role'] = json2.Any('assistant')
+	message['content'] = json2.Any(mapping.content)
+	if mapping.tool_calls.len > 0 {
+		message['tool_calls'] = json2.Any(mapping.tool_calls)
+	}
+	mut choice := map[string]json2.Any{}
+	choice['index'] = json2.Any(0)
+	choice['message'] = json2.Any(message)
+	choice['finish_reason'] = json2.Any(if mapping.finish_reason != '' {
+		mapping.finish_reason
+	} else if mapping.tool_calls.len > 0 {
+		'tool_calls'
+	} else {
+		'stop'
+	})
+	mut root := map[string]json2.Any{}
+	root['id'] = json2.Any('chatcmpl-${req_id}')
+	root['object'] = json2.Any('chat.completion')
+	root['created'] = json2.Any(created)
+	root['model'] = json2.Any(plan.model)
+	root['choices'] = json2.Any([json2.Any(choice)])
+	if mapping.usage.len > 0 {
+		root['usage'] = json2.Any(openai_usage_json_obj(mapping.usage))
+	}
+	return json2.Any(root).json_str()
+}
+
+fn openai_executor_once_body(plan OpenAIResolvedPlan, raw string, req_id string, created int) string {
+	parsed := json2.decode[json2.Any](raw) or {
+		return openai_completion_json_from_mapping(plan, OpenAIFrameMapping{
+			content: raw
+			handled: true
+		}, req_id, created)
+	}
+	root := parsed.as_map()
+	if body_any := root['body'] {
+		body := body_any.str()
+		if body != '' {
+			return body
+		}
+	}
+	if _ := root['choices'] {
+		return raw
+	}
+	if _ := root['error'] {
+		return raw
+	}
+	return openai_completion_json_from_mapping(plan, openai_executor_mapping_from_result(raw),
+		req_id, created)
+}
+
+fn openai_responses_executor_once_body(plan OpenAIResolvedPlan, raw string, req_id string, created int) string {
+	parsed := json2.decode[json2.Any](raw) or { return raw }
+	root := parsed.as_map()
+	if body_any := root['body'] {
+		body := body_any.str()
+		if body != '' {
+			return body
+		}
+	}
+	if (root['object'] or { json2.Any('') }).str() == 'response' {
+		return raw
+	}
+	if _ := root['output'] {
+		return raw
+	}
+	content := (root['content'] or { json2.Any('') }).str()
+	text := if content != '' { content } else { raw }
+	response_id := if req_id.trim_space() != '' { 'resp_${req_id}' } else { 'resp_vhttpd' }
+	mut response := {
+		'id':         json2.Any(response_id)
+		'object':     json2.Any('response')
+		'created_at': json2.Any(created)
+		'status':     json2.Any('completed')
+		'model':      json2.Any(plan.model)
+		'output':     json2.Any([
+			json2.Any({
+				'id':      json2.Any('msg_${req_id}')
+				'type':    json2.Any('message')
+				'status':  json2.Any('completed')
+				'role':    json2.Any('assistant')
+				'content': json2.Any([
+					json2.Any({
+						'type':        json2.Any('output_text')
+						'text':        json2.Any(text)
+						'annotations': json2.Any([]json2.Any{})
+					}),
+				])
+			}),
+		])
+	}
+	if usage_any := root['usage'] {
+		response['usage'] = usage_any
+	}
+	return json2.Any(response).json_str()
+}
+
+fn openai_proxy_executor_once(mut app App, mut ctx Context, plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result {
+	resp := app.openai_call_executor(plan, method, path, req_id, trace_id) or {
+		return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms,
+			'openai_executor_failed', err.msg())
+	}
+	body := openai_executor_once_body(plan, resp.result, req_id, int(time.now().unix()))
+	ctx.res.set_status(.ok)
+	ctx.set_content_type('application/json; charset=utf-8')
+	ctx.set_custom_header('x-request-id', req_id) or {}
+	ctx.set_custom_header('x-vhttpd-trace-id', trace_id) or {}
+	ctx.set_custom_header('x-vhttpd-openai-backend', plan.backend_name) or {}
+	app.emit('http.request', {
+		'method':      method.to_upper()
+		'path':        normalize_path(path)
+		'status':      '200'
+		'request_id':  req_id
+		'trace_id':    trace_id
+		'duration_ms': '${time.now().unix_milli() - start_ms}'
+		'provider':    'openai'
+		'backend':     plan.backend_name
+		'executor':    plan.backend.executor
+	})
+	return ctx.text(if method.to_upper() == 'HEAD' { '' } else { body })
+}
+
+fn openai_proxy_responses_executor_once(mut app App, mut ctx Context, plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result {
+	resp := app.openai_call_executor_op(plan, 'responses.execute', method, path, req_id,
+		trace_id) or {
+		return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms,
+			'openai_executor_failed', err.msg())
+	}
+	body := openai_responses_executor_once_body(plan, resp.result, req_id, int(time.now().unix()))
+	app.openai_store_response_record(plan, body, req_id, trace_id)
+	ctx.res.set_status(.ok)
+	ctx.set_content_type('application/json; charset=utf-8')
+	ctx.set_custom_header('x-request-id', req_id) or {}
+	ctx.set_custom_header('x-vhttpd-trace-id', trace_id) or {}
+	ctx.set_custom_header('x-vhttpd-openai-backend', plan.backend_name) or {}
+	app.emit('http.request', {
+		'method':      method.to_upper()
+		'path':        normalize_path(path)
+		'status':      '200'
+		'request_id':  req_id
+		'trace_id':    trace_id
+		'duration_ms': '${time.now().unix_milli() - start_ms}'
+		'provider':    'openai'
+		'backend':     plan.backend_name
+		'executor':    plan.backend.executor
+		'endpoint':    'responses'
+	})
+	return ctx.text(if method.to_upper() == 'HEAD' { '' } else { body })
+}
+
+fn openai_executor_stream_mappings(raw string) []OpenAIFrameMapping {
+	parsed := json2.decode[json2.Any](raw) or {
+		return [
+			OpenAIFrameMapping{
+				content: raw
+				done:    true
+				handled: true
+			},
+		]
+	}
+	root := parsed.as_map()
+	if frames_any := root['frames'] {
+		mut mappings := []OpenAIFrameMapping{}
+		for frame in frames_any.as_array() {
+			mappings << openai_plugin_map_frame_result(frame.json_str())
+		}
+		return mappings
+	}
+	return [openai_executor_mapping_from_result(raw)]
+}
+
+fn openai_response_stream_event_from_raw(raw string) string {
+	parsed := json2.decode[json2.Any](raw) or { return 'data: ${raw}\n\n' }
+	root := parsed.as_map()
+	event_type := (root['event'] or { root['type'] or { json2.Any('') } }).str()
+	if data_any := root['data'] {
+		data := if data_any.str() != '' { data_any.str() } else { data_any.json_str() }
+		if event_type != '' {
+			return 'event: ${event_type}\ndata: ${data}\n\n'
+		}
+		return 'data: ${data}\n\n'
+	}
+	if event_type != '' {
+		return 'event: ${event_type}\ndata: ${raw}\n\n'
+	}
+	return 'data: ${raw}\n\n'
+}
+
+fn openai_response_body_from_completed_event(raw string) string {
+	parsed := json2.decode[json2.Any](raw) or { return '' }
+	root := parsed.as_map()
+	event_type := (root['type'] or { root['event'] or { json2.Any('') } }).str()
+	if event_type != 'response.completed' {
+		return ''
+	}
+	response_any := root['response'] or { return '' }
+	mut response := response_any.as_map()
+	if (response['object'] or { json2.Any('') }).str() == '' {
+		response['object'] = json2.Any('response')
+	}
+	return json2.Any(response).json_str()
+}
+
+fn openai_proxy_responses_executor_stream(mut app App, mut ctx Context, plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result {
+	ctx.takeover_conn_reusable()
+	ctx.conn.set_write_timeout(time.infinite)
+	ctx.conn.set_read_timeout(time.infinite)
+	mut client_conn := ctx.conn
+	mut headers := {
+		'x-request-id':             req_id
+		'x-vhttpd-trace-id':        trace_id
+		'x-vhttpd-openai-backend':  plan.backend_name
+		'x-vhttpd-openai-executor': plan.backend.executor
+		'x-accel-buffering':        'no'
+	}
+	write_http_stream_headers_conn(mut client_conn, 200, 'text/event-stream', headers,
+		true) or {}
+	mut registry_state := &OpenAIResponsesStreamRegistryState{}
+	stream_resp := app.openai_call_executor_stream_op(plan, 'responses.execute', method,
+		path, req_id, trace_id, fn [mut client_conn, mut registry_state] (raw string) !bool {
+		if registry_state.completed_body == '' {
+			registry_state.completed_body = openai_response_body_from_completed_event(raw)
+		}
+		write_chunk(mut client_conn, openai_response_stream_event_from_raw(raw))!
+		return true
+	}) or {
+		openai_write_sse_error(mut client_conn, 'openai_executor_failed', err.msg(), 'server_error')
+		write_final_chunk(mut client_conn) or {}
+		client_conn.close() or {}
+		app.emit('http.request', {
+			'method':      method.to_upper()
+			'path':        normalize_path(path)
+			'status':      '502'
+			'request_id':  req_id
+			'trace_id':    trace_id
+			'duration_ms': '${time.now().unix_milli() - start_ms}'
+			'provider':    'openai'
+			'backend':     plan.backend_name
+			'executor':    plan.backend.executor
+			'endpoint':    'responses'
+		})
+		return veb.no_result()
+	}
+	if !stream_resp.streamed {
+		mut wrote_frame := false
+		for mapping in openai_executor_stream_mappings(stream_resp.response.result) {
+			if mapping.error != '' {
+				openai_write_sse_error(mut client_conn, 'openai_executor_error', mapping.error,
+					'server_error')
+				wrote_frame = true
+				break
+			}
+			event := {
+				'type':            json2.Any('response.output_text.delta')
+				'delta':           json2.Any(mapping.content)
+				'sequence_number': json2.Any(1)
+			}
+			if mapping.content != '' {
+				write_chunk(mut client_conn, openai_response_stream_event_from_raw(json2.Any(event).json_str())) or {}
+				wrote_frame = true
+			}
+			if mapping.done {
+				registry_state.completed_body = '{"id":"resp_${req_id}","object":"response","status":"completed","model":"${plan.model}"}'
+				write_chunk(mut client_conn, openai_response_stream_event_from_raw('{"type":"response.completed","sequence_number":2,"response":${registry_state.completed_body}}')) or {}
+				wrote_frame = true
+			}
+		}
+		if !wrote_frame {
+			registry_state.completed_body = '{"id":"resp_${req_id}","object":"response","status":"completed","model":"${plan.model}"}'
+			write_chunk(mut client_conn, openai_response_stream_event_from_raw('{"type":"response.completed","sequence_number":1,"response":${registry_state.completed_body}}')) or {}
+		}
+	}
+	if registry_state.completed_body != '' {
+		app.openai_store_response_record(plan, registry_state.completed_body, req_id,
+			trace_id)
+	}
+	write_final_chunk(mut client_conn) or {}
+	app.emit('http.request', {
+		'method':      method.to_upper()
+		'path':        normalize_path(path)
+		'status':      '200'
+		'request_id':  req_id
+		'trace_id':    trace_id
+		'duration_ms': '${time.now().unix_milli() - start_ms}'
+		'provider':    'openai'
+		'backend':     plan.backend_name
+		'executor':    plan.backend.executor
+		'endpoint':    'responses'
+	})
+	return veb.no_result()
+}
+
+fn openai_proxy_executor_stream(mut app App, mut ctx Context, plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result {
+	ctx.takeover_conn_reusable()
+	ctx.conn.set_write_timeout(time.infinite)
+	ctx.conn.set_read_timeout(time.infinite)
+	mut client_conn := ctx.conn
+	mut state := &OpenAIMappedStreamProxyState{
+		conn:             client_conn
+		method:           method
+		status_code:      200
+		response_headers: {
+			'x-request-id':             req_id
+			'x-vhttpd-trace-id':        trace_id
+			'x-vhttpd-openai-backend':  plan.backend_name
+			'x-vhttpd-openai-executor': plan.backend.executor
+		}
+		model:            plan.model
+		request_id:       req_id
+		trace_id:         trace_id
+		mapper:           'executor'
+		response_codec:   plan.response_codec
+		output_protocol:  plan.output_protocol
+		created:          int(time.now().unix())
+	}
+	stream_resp := app.openai_call_executor_stream(plan, method, path, req_id, trace_id,
+		fn [mut state, mut client_conn] (raw string) !bool {
+		mapping := openai_plugin_map_frame_result(raw)
+		if mapping.error != '' {
+			ensure_openai_mapped_stream_headers_written(mut state)!
+			openai_write_sse_error(mut client_conn, 'openai_executor_error', mapping.error,
+				'server_error')
+			state.done = true
+			return false
+		}
+		if mapping.content != '' || mapping.tool_calls.len > 0 {
+			ensure_openai_mapped_stream_headers_written(mut state)!
+			write_chunk(mut client_conn, 'data: ${openai_stream_chunk_json(state, mapping)}\n\n')!
+		}
+		openai_merge_usage(mut state.usage, mapping.usage)
+		if mapping.done && !state.done {
+			ensure_openai_mapped_stream_headers_written(mut state)!
+			openai_write_stream_usage_chunk(mut state)!
+			write_chunk(mut client_conn, 'data: [DONE]\n\n')!
+			state.done = true
+			openai_finish_mapped_stream(mut state)!
+			return false
+		}
+		return true
+	}) or {
+		if !state.headers_written {
+			state.status_code = 502
+			ensure_openai_mapped_stream_headers_written(mut state) or {}
+			openai_write_sse_error(mut client_conn, 'openai_executor_failed', err.msg(),
+				'server_error')
+			state.done = true
+		}
+		if state.headers_written {
+			openai_finish_mapped_stream(mut state) or {}
+		}
+		client_conn.close() or {}
+		app.emit('http.request', {
+			'method':      method.to_upper()
+			'path':        normalize_path(path)
+			'status':      '502'
+			'request_id':  req_id
+			'trace_id':    trace_id
+			'duration_ms': '${time.now().unix_milli() - start_ms}'
+			'provider':    'openai'
+			'backend':     plan.backend_name
+			'executor':    plan.backend.executor
+		})
+		return veb.no_result()
+	}
+	if !stream_resp.streamed {
+		for mapping in openai_executor_stream_mappings(stream_resp.response.result) {
+			if mapping.error != '' {
+				ensure_openai_mapped_stream_headers_written(mut state) or {}
+				openai_write_sse_error(mut client_conn, 'openai_executor_error', mapping.error,
+					'server_error')
+				state.done = true
+				break
+			}
+			if mapping.content != '' || mapping.tool_calls.len > 0 {
+				ensure_openai_mapped_stream_headers_written(mut state) or {}
+				write_chunk(mut client_conn, 'data: ${openai_stream_chunk_json(state,
+					mapping)}\n\n') or {}
+			}
+			openai_merge_usage(mut state.usage, mapping.usage)
+			if mapping.done && !state.done {
+				ensure_openai_mapped_stream_headers_written(mut state) or {}
+				openai_write_stream_usage_chunk(mut state) or {}
+				write_chunk(mut client_conn, 'data: [DONE]\n\n') or {}
+				state.done = true
+				openai_finish_mapped_stream(mut state) or {}
+			}
+		}
+	}
+	if !state.done {
+		ensure_openai_mapped_stream_headers_written(mut state) or {}
+		openai_write_stream_usage_chunk(mut state) or {}
+		write_chunk(mut client_conn, 'data: [DONE]\n\n') or {}
+	}
+	if state.headers_written {
+		openai_finish_mapped_stream(mut state) or {}
+	}
+	app.emit('http.request', {
+		'method':      method.to_upper()
+		'path':        normalize_path(path)
+		'status':      '200'
+		'request_id':  req_id
+		'trace_id':    trace_id
+		'duration_ms': '${time.now().unix_milli() - start_ms}'
+		'provider':    'openai'
+		'backend':     plan.backend_name
+		'executor':    plan.backend.executor
+	})
+	return veb.no_result()
+}
+
+fn (mut app App) openai_handle_models(mut ctx Context, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result {
+	if method.to_upper() !in ['GET', 'HEAD'] {
+		return openai_error(mut app, mut ctx, 405, path, method, req_id, trace_id, start_ms,
+			'method_not_allowed', 'method ${method} is not allowed for ${path}')
+	}
+	models := if app.openai_plugin.trim_space() != '' {
+		result := app.openai_plugin_models(method, path, req_id, trace_id) or {
+			return openai_error(mut app, mut ctx, 500, path, method, req_id, trace_id,
+				start_ms, 'plugin_error', err.msg())
+		}
+		if result.handled {
+			result.models
+		} else {
+			app.openai_models()
+		}
+	} else {
+		app.openai_models()
+	}
+	mut data := []OpenAIModelObject{}
+	for model in models {
+		data << OpenAIModelObject{
+			id:      model
+			created: int(app.started_at_unix)
+		}
+	}
+	body := json.encode(OpenAIModelsResponse{
+		data: data
+	})
+	ctx.res.set_status(.ok)
+	ctx.set_content_type('application/json; charset=utf-8')
+	ctx.set_custom_header('x-request-id', req_id) or {}
+	ctx.set_custom_header('x-vhttpd-trace-id', trace_id) or {}
+	app.emit('http.request', {
+		'method':      method.to_upper()
+		'path':        normalize_path(path)
+		'status':      '200'
+		'request_id':  req_id
+		'trace_id':    trace_id
+		'duration_ms': '${time.now().unix_milli() - start_ms}'
+		'provider':    'openai'
+	})
+	return ctx.text(if method.to_upper() == 'HEAD' { '' } else { body })
+}
+
+fn (mut app App) openai_handle_chat(mut ctx Context, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result {
+	if method.to_upper() !in ['POST', 'HEAD'] {
+		return openai_error(mut app, mut ctx, 405, path, method, req_id, trace_id, start_ms,
+			'method_not_allowed', 'method ${method} is not allowed for ${path}')
+	}
+	model := openai_request_model(ctx.req.data)
+	plan := app.openai_resolve_plan(model, ctx.req.data, method, path, req_id, trace_id) or {
+		err_msg := err.msg()
+		status := if err_msg.starts_with('openai_plugin_') { 502 } else { 400 }
+		return openai_error(mut app, mut ctx, status, path, method, req_id, trace_id,
+			start_ms, openai_plan_error_code(err_msg), openai_plan_error_message(err_msg))
+	}
+	if openai_is_stream_request(ctx.req.data) {
+		if plan.backend.kind.trim_space() == 'executor' {
+			return openai_proxy_executor_stream(mut app, mut ctx, plan, method, path,
+				req_id, trace_id, start_ms)
+		}
+		if plan.stream_mode == 'mapped' {
+			return openai_proxy_mapped_stream(mut app, mut ctx, plan, method, path, req_id,
+				trace_id, start_ms)
+		}
+		return openai_proxy_stream(mut app, mut ctx, plan, method, path, req_id, trace_id,
+			start_ms)
+	}
+	return openai_proxy_once(mut app, mut ctx, plan, method, path, req_id, trace_id, start_ms)
+}
+
+fn (mut app App) openai_handle_responses(mut ctx Context, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result {
+	if method.to_upper() !in ['POST', 'HEAD'] {
+		return openai_error(mut app, mut ctx, 405, path, method, req_id, trace_id, start_ms,
+			'method_not_allowed', 'method ${method} is not allowed for ${path}')
+	}
+	model := openai_request_model(ctx.req.data)
+	plan := app.openai_resolve_responses_plan(model, ctx.req.data, method, path, req_id,
+		trace_id) or {
+		err_msg := err.msg()
+		status := if err_msg.starts_with('openai_plugin_') { 502 } else { 400 }
+		return openai_error(mut app, mut ctx, status, path, method, req_id, trace_id,
+			start_ms, openai_plan_error_code(err_msg), openai_plan_error_message(err_msg))
+	}
+	if openai_is_stream_request(ctx.req.data) {
+		if plan.backend.kind.trim_space() == 'executor' {
+			return openai_proxy_responses_executor_stream(mut app, mut ctx, plan, method,
+				path, req_id, trace_id, start_ms)
+		}
+		return openai_proxy_stream(mut app, mut ctx, plan, method, path, req_id, trace_id,
+			start_ms)
+	}
+	if plan.backend.kind.trim_space() == 'executor' {
+		return openai_proxy_responses_executor_once(mut app, mut ctx, plan, method, path,
+			req_id, trace_id, start_ms)
+	}
+	return openai_proxy_once(mut app, mut ctx, plan, method, path, req_id, trace_id, start_ms)
+}
+
+fn (mut app App) openai_handle_responses_passthrough(mut ctx Context, method string, path string, relative_target string, req_id string, trace_id string, start_ms i64) veb.Result {
+	if method.to_upper() !in ['GET', 'POST', 'DELETE', 'HEAD'] {
+		return openai_error(mut app, mut ctx, 405, path, method, req_id, trace_id, start_ms,
+			'method_not_allowed', 'method ${method} is not allowed for ${path}')
+	}
+	response_id := openai_response_id_from_relative(relative_target)
+	relative_path := normalize_path(relative_target.all_before('?'))
+	if method.to_upper() in ['GET', 'HEAD'] && response_id != ''
+		&& !relative_path.contains('/input_items') {
+		if record := app.openai_responses.get(response_id) {
+			ctx.res.set_status(.ok)
+			ctx.set_content_type('application/json; charset=utf-8')
+			ctx.set_custom_header('x-request-id', req_id) or {}
+			ctx.set_custom_header('x-vhttpd-trace-id', trace_id) or {}
+			ctx.set_custom_header('x-vhttpd-openai-backend', record.backend_name) or {}
+			app.emit('http.request', {
+				'method':      method.to_upper()
+				'path':        normalize_path(path)
+				'status':      '200'
+				'request_id':  req_id
+				'trace_id':    trace_id
+				'duration_ms': '${time.now().unix_milli() - start_ms}'
+				'provider':    'openai'
+				'backend':     record.backend_name
+				'executor':    record.executor
+				'endpoint':    'responses.registry'
+			})
+			return ctx.text(if method.to_upper() == 'HEAD' { '' } else { record.body })
+		}
+	}
+	plan := app.openai_resolve_responses_passthrough_plan(relative_target, ctx.req.data,
+		method) or {
+		err_msg := err.msg()
+		status := if err_msg.starts_with('openai_plugin_') { 502 } else { 400 }
+		return openai_error(mut app, mut ctx, status, path, method, req_id, trace_id,
+			start_ms, openai_plan_error_code(err_msg), openai_plan_error_message(err_msg))
+	}
+	if plan.backend.kind.trim_space() == 'executor' {
+		return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms,
+			'unsupported_backend', 'Responses passthrough endpoint ${relative_target} requires an HTTP backend')
+	}
+	if openai_is_stream_request(ctx.req.data) || openai_is_stream_target(path) {
+		return openai_proxy_stream(mut app, mut ctx, plan, method, path, req_id, trace_id,
+			start_ms)
+	}
+	return openai_proxy_once_attempt(mut app, mut ctx, plan, method, path, req_id, trace_id,
+		start_ms, false)
+}
+
+fn (mut app App) openai_try_handle(mut ctx Context, method string, target string, req_id string, trace_id string, start_ms i64) ?veb.Result {
+	if !app.openai_enabled {
+		return none
+	}
+	relative := openai_relative_path(target, app.openai_base_path) or { return none }
+	relative_target := openai_relative_target(target, app.openai_base_path) or { return none }
+	if relative == '/models' {
+		if !app.openai_endpoints.models {
+			return openai_error(mut app, mut ctx, 404, target, method, req_id, trace_id,
+				start_ms, 'endpoint_disabled', 'OpenAI models endpoint is disabled')
+		}
+		return app.openai_handle_models(mut ctx, method, target, req_id, trace_id, start_ms)
+	}
+	if relative == '/chat/completions' {
+		if !app.openai_endpoints.chat_completions {
+			return openai_error(mut app, mut ctx, 404, target, method, req_id, trace_id,
+				start_ms, 'endpoint_disabled', 'OpenAI chat completions endpoint is disabled')
+		}
+		return app.openai_handle_chat(mut ctx, method, target, req_id, trace_id, start_ms)
+	}
+	if relative == '/responses' {
+		if !app.openai_endpoints.responses {
+			return openai_error(mut app, mut ctx, 404, target, method, req_id, trace_id,
+				start_ms, 'endpoint_disabled', 'OpenAI responses endpoint is disabled')
+		}
+		return app.openai_handle_responses(mut ctx, method, target, req_id, trace_id,
+			start_ms)
+	}
+	if relative.starts_with('/responses/') {
+		if !app.openai_endpoints.responses {
+			return openai_error(mut app, mut ctx, 404, target, method, req_id, trace_id,
+				start_ms, 'endpoint_disabled', 'OpenAI responses endpoint is disabled')
+		}
+		return app.openai_handle_responses_passthrough(mut ctx, method, target, relative_target,
+			req_id, trace_id, start_ms)
+	}
+	if relative == '/embeddings' {
+		return openai_error(mut app, mut ctx, 501, target, method, req_id, trace_id, start_ms,
+			'endpoint_not_implemented', 'OpenAI endpoint ${relative} is not implemented yet')
+	}
+	return openai_error(mut app, mut ctx, 404, target, method, req_id, trace_id, start_ms,
+		'endpoint_not_found', 'OpenAI endpoint ${relative} was not found')
+}
diff --git a/src/openai_runtime_test.v b/src/openai_runtime_test.v
new file mode 100644
index 0000000..0702907
--- /dev/null
+++ b/src/openai_runtime_test.v
@@ -0,0 +1,388 @@
+module main
+
+import os
+import x.json2
+
+fn test_openai_relative_path_matches_configured_base_path() {
+	assert openai_relative_path('/v1/models', '/v1') or { '' } == '/models'
+	assert openai_relative_path('api/openai/chat/completions?trace=1', '/api/openai') or { '' } == '/chat/completions'
+	if _ := openai_relative_path('/api/other/models', '/api/openai') {
+		assert false
+	} else {
+		assert true
+	}
+}
+
+fn test_openai_route_resolution_maps_public_model_to_upstream_model() {
+	mut app := App{
+		openai_enabled:         true
+		openai_base_path:       '/v1'
+		openai_default_backend: 'default'
+		openai_backends:        {
+			'default': OpenAIBackendConfig{
+				base_url: 'https://upstream.test/v1'
+			}
+		}
+		openai_routes:          {
+			'gpt-4o-mini': OpenAIRouteConfig{
+				models:         ['gpt-4o-mini', 'mini']
+				backend:        'default'
+				upstream_model: 'upstream-mini'
+			}
+		}
+	}
+	route := app.openai_resolve_route('mini') or { panic(err) }
+	assert route.backend_name == 'default'
+	assert route.upstream_model == 'upstream-mini'
+	assert app.openai_models() == ['gpt-4o-mini', 'mini']
+}
+
+fn test_openai_responses_builtin_plan_uses_responses_path() {
+	mut app := App{
+		openai_enabled:         true
+		openai_base_path:       '/v1'
+		openai_default_backend: 'default'
+		openai_backends:        {
+			'default': OpenAIBackendConfig{
+				base_url: 'https://upstream.test/v1'
+			}
+		}
+		openai_routes:          {
+			'public': OpenAIRouteConfig{
+				models:         ['public-model']
+				backend:        'default'
+				upstream_model: 'upstream-model'
+			}
+		}
+	}
+	plan := app.openai_resolve_responses_plan('public-model', '{"model":"public-model","input":"hi"}',
+		'POST', '/v1/responses', 'req_resp', 'trace_resp') or { panic(err) }
+	assert plan.path == '/responses'
+	assert plan.output_protocol == 'openai.response'
+	assert plan.body.contains('"model":"upstream-model"')
+}
+
+fn test_openai_replace_model_in_body_keeps_other_fields() {
+	body := openai_replace_model_in_body('{"model":"public","messages":[{"role":"user","content":"hi"}],"stream":true}',
+		'upstream')
+	root := json2.decode[json2.Any](body) or { panic(err) }.as_map()
+	assert (root['model'] or { json2.Any('') }).str() == 'upstream'
+	assert (root['stream'] or { json2.Any(false) }).bool()
+	assert (root['messages'] or { json2.Any([]json2.Any{}) }).as_array().len == 1
+}
+
+fn test_openai_config_parses_backends_and_routes() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_config_test')
+	os.mkdir_all(temp_dir) or { panic(err) }
+	config_file := os.join_path(temp_dir, 'vhttpd.toml')
+	os.write_file(config_file, '
+[openai]
+enabled = true
+base_path = "/openai/v1"
+default_backend = "default"
+plugin = "planner"
+
+[plugins.planner]
+kind = "vjsx"
+entry = "plugins/openai-planner.mts"
+runtime_profile = "node"
+
+[openai.backends.default]
+kind = "openai_http"
+base_url = "https://api.openai.test/v1"
+api_key_env = "TEST_OPENAI_KEY"
+
+[openai.backends.exec]
+kind = "executor"
+executor = "custom_executor"
+
+[openai.routes.gpt_demo]
+models = ["gpt-demo", "demo"]
+backend = "default"
+upstream_model = "gpt-4o-mini"
+') or {
+		panic(err)
+	}
+	defer {
+		os.rm(config_file) or {}
+		os.rmdir_all(temp_dir) or {}
+	}
+	cfg := load_vhttpd_config(['--config', config_file]) or { panic(err) }
+	assert cfg.openai.enabled
+	assert cfg.openai.base_path == '/openai/v1'
+	assert cfg.openai.plugin == 'planner'
+	assert cfg.openai.endpoints.responses
+	assert cfg.plugins['planner'].runtime_profile == 'node'
+	assert cfg.openai.backends['default'].base_url == 'https://api.openai.test/v1'
+	assert cfg.openai.backends['exec'].kind == 'executor'
+	assert cfg.openai.backends['exec'].executor == 'custom_executor'
+	assert cfg.openai.routes['gpt_demo'].models == ['gpt-demo', 'demo']
+	assert cfg.openai.routes['gpt_demo'].upstream_model == 'gpt-4o-mini'
+}
+
+fn test_openai_vjsx_plugin_can_return_upstream_plan() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_plugin_plan_test')
+	plugin_dir := os.join_path(temp_dir, 'plugins')
+	os.mkdir_all(plugin_dir) or { panic(err) }
+	plugin_file := os.join_path(plugin_dir, 'openai-planner.mts')
+	os.write_file(plugin_file, "
+export function openai(req) {
+  if (req.op !== 'chat.route') {
+    return { not_handled: true };
+  }
+  const payload = JSON.parse(req.payload);
+  const body = JSON.parse(payload.body);
+  body.model = 'plugin-upstream-model';
+  return {
+    backend: 'mock',
+    method: 'POST',
+    path: '/chat/completions',
+    headers: { 'x-plugin-plan': 'yes' },
+    body: JSON.stringify(body),
+    stream_mode: 'passthrough',
+  };
+}
+") or {
+		panic(err)
+	}
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	plugins := {
+		'planner': PluginConfig{
+			kind:            'vjsx'
+			app_entry:       plugin_file
+			runtime_profile: 'node'
+			thread_count:    1
+		}
+	}
+	mut app := App{
+		started_at_unix:        123
+		openai_enabled:         true
+		openai_base_path:       '/v1'
+		openai_plugin:          'planner'
+		openai_default_backend: 'mock'
+		openai_backends:        {
+			'mock': OpenAIBackendConfig{
+				base_url: 'https://mock.openai.test/v1'
+			}
+		}
+		plugin_configs:         plugins
+		plugin_vjsx:            build_vjsx_plugin_runtimes(plugins)
+	}
+	defer {
+		app.close_all_plugins()
+	}
+	plan := app.openai_resolve_plan('public-model', '{"model":"public-model","messages":[]}',
+		'POST', '/v1/chat/completions', 'req_plugin', 'trace_plugin') or { panic(err) }
+	assert plan.backend_name == 'mock'
+	assert plan.path == '/chat/completions'
+	assert plan.headers['x-plugin-plan'] == 'yes'
+	root := json2.decode[json2.Any](plan.body) or { panic(err) }.as_map()
+	assert (root['model'] or { json2.Any('') }).str() == 'plugin-upstream-model'
+}
+
+fn test_openai_vjsx_plugin_models_uses_same_openai_entry() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_plugin_models_test')
+	os.mkdir_all(temp_dir) or { panic(err) }
+	plugin_file := os.join_path(temp_dir, 'openai-planner.mts')
+	os.write_file(plugin_file, "
+export function openai(req) {
+  if (req.op === 'models') {
+    return { models: ['plugin-b', 'plugin-a', 'plugin-a'] };
+  }
+  return { not_handled: true };
+}
+") or {
+		panic(err)
+	}
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	plugins := {
+		'planner': PluginConfig{
+			kind:            'vjsx'
+			app_entry:       plugin_file
+			runtime_profile: 'node'
+			thread_count:    1
+		}
+	}
+	mut app := App{
+		openai_enabled:   true
+		openai_base_path: '/v1'
+		openai_plugin:    'planner'
+		plugin_configs:   plugins
+		plugin_vjsx:      build_vjsx_plugin_runtimes(plugins)
+	}
+	defer {
+		app.close_all_plugins()
+	}
+	result := app.openai_plugin_models('GET', '/v1/models', 'req_models', 'trace_models') or {
+		panic(err)
+	}
+	assert result.handled
+	assert result.models == ['plugin-a', 'plugin-b']
+}
+
+fn test_openai_vjsx_plugin_not_handled_falls_back_to_builtin_route() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_plugin_not_handled_test')
+	os.mkdir_all(temp_dir) or { panic(err) }
+	plugin_file := os.join_path(temp_dir, 'openai-planner.mts')
+	os.write_file(plugin_file, '
+export function openai(_req) {
+  return { not_handled: true };
+}
+') or {
+		panic(err)
+	}
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	plugins := {
+		'planner': PluginConfig{
+			kind:            'vjsx'
+			app_entry:       plugin_file
+			runtime_profile: 'node'
+			thread_count:    1
+		}
+	}
+	mut app := App{
+		openai_enabled:         true
+		openai_base_path:       '/v1'
+		openai_plugin:          'planner'
+		openai_default_backend: 'mock'
+		openai_backends:        {
+			'mock': OpenAIBackendConfig{
+				base_url: 'https://mock.openai.test/v1'
+			}
+		}
+		openai_routes:          {
+			'public': OpenAIRouteConfig{
+				models:         ['public-model']
+				backend:        'mock'
+				upstream_model: 'builtin-upstream-model'
+			}
+		}
+		plugin_configs:         plugins
+		plugin_vjsx:            build_vjsx_plugin_runtimes(plugins)
+	}
+	defer {
+		app.close_all_plugins()
+	}
+	plan := app.openai_resolve_plan('public-model', '{"model":"public-model","messages":[]}',
+		'POST', '/v1/chat/completions', 'req_fallback', 'trace_fallback') or { panic(err) }
+	assert plan.backend_name == 'mock'
+	root := json2.decode[json2.Any](plan.body) or { panic(err) }.as_map()
+	assert (root['model'] or { json2.Any('') }).str() == 'builtin-upstream-model'
+}
+
+fn test_openai_plugin_plan_validation_rejects_missing_backend() {
+	raw := '{"method":"POST","path":"/chat/completions","body":"{}"}'
+	plan := openai_upstream_plan_from_plugin_json(raw) or { panic(err) }
+	mut app := App{}
+	_ := app
+	if plan.backend.trim_space() == '' {
+		err := openai_plan_error('openai_plugin_plan_missing_backend', 'plugin plan must include backend')
+		assert openai_plan_error_code(err.msg()) == 'openai_plugin_plan_missing_backend'
+		assert openai_plan_error_message(err.msg()) == 'plugin plan must include backend'
+		return
+	}
+	assert false
+}
+
+fn test_openai_plugin_plan_validation_rejects_invalid_method_and_path() {
+	openai_validate_plan_method('TRACE') or {
+		assert openai_plan_error_code(err.msg()) == 'openai_plugin_plan_invalid_method'
+		assert openai_plan_error_message(err.msg()).contains('TRACE')
+	}
+	openai_validate_plan_path('chat/completions') or {
+		assert openai_plan_error_code(err.msg()) == 'openai_plugin_plan_invalid_path'
+		assert openai_plan_error_message(err.msg()).contains('start with /')
+	}
+	assert openai_validate_stream_mode('mapped') or { panic(err) } == 'mapped'
+	openai_validate_response_codec('xml', 'mapped') or {
+		assert openai_plan_error_code(err.msg()) == 'openai_plugin_plan_unsupported_response_codec'
+		assert openai_plan_error_message(err.msg()).contains('xml')
+	}
+	openai_validate_output_protocol('custom.protocol', 'mapped') or {
+		assert openai_plan_error_code(err.msg()) == 'openai_plugin_plan_unsupported_output_protocol'
+		assert openai_plan_error_message(err.msg()).contains('custom.protocol')
+	}
+	assert openai_validate_mapper('plugin') or { panic(err) } == 'plugin'
+	openai_validate_mapper('remote') or {
+		assert openai_plan_error_code(err.msg()) == 'openai_plugin_plan_unsupported_mapper'
+		assert openai_plan_error_message(err.msg()).contains('remote')
+	}
+}
+
+fn test_openai_plugin_plan_sanitizes_hop_by_hop_headers() {
+	headers := openai_sanitize_plan_headers({
+		'x-ok':              'yes'
+		'connection':        'close'
+		'transfer-encoding': 'chunked'
+		'host':              'bad'
+		'x-bad':             'line\r\nbreak'
+	})
+	assert headers['x-ok'] == 'yes'
+	assert 'connection' !in headers
+	assert 'transfer-encoding' !in headers
+	assert 'host' !in headers
+	assert 'x-bad' !in headers
+}
+
+fn test_openai_mapped_once_ndjson_aggregates_chat_completion() {
+	body := '{"message":{"content":"你"},"done":false}\n' +
+		'{"message":{"content":"好"},"done":false}\n' + '{"done":true}\n'
+	mapped := openai_map_once_response(OpenAIResolvedPlan{
+		model:           'public-model'
+		stream_mode:     'mapped'
+		response_codec:  'ndjson'
+		output_protocol: 'openai.chat.completion'
+	}, body, 'req_once', 123) or { panic(err) }
+	root := json2.decode[json2.Any](mapped) or { panic(err) }.as_map()
+	assert (root['object'] or { json2.Any('') }).str() == 'chat.completion'
+	choices := (root['choices'] or { json2.Any([]json2.Any{}) }).as_array()
+	assert choices.len == 1
+	message := (choices[0].as_map()['message'] or { json2.Any(map[string]json2.Any{}) }).as_map()
+	assert (message['content'] or { json2.Any('') }).str() == '你好'
+}
+
+fn test_openai_mapped_once_ndjson_aggregates_tool_calls() {
+	body :=
+		'{"message":{"tool_calls":[{"index":0,"id":"call_search","type":"function","function":{"name":"search","arguments":"{\\"q\\":\\"vh"}}]},"done":false}\n' +
+		'{"message":{"tool_calls":[{"index":0,"function":{"arguments":"ttpd\\"}"}}]},"done":false}\n' +
+		'{"done":true}\n'
+	mapped := openai_map_once_response(OpenAIResolvedPlan{
+		model:           'public-model'
+		stream_mode:     'mapped'
+		response_codec:  'ndjson'
+		output_protocol: 'openai.chat.completion'
+	}, body, 'req_tools', 123) or { panic(err) }
+	root := json2.decode[json2.Any](mapped) or { panic(err) }.as_map()
+	choices := (root['choices'] or { json2.Any([]json2.Any{}) }).as_array()
+	message := (choices[0].as_map()['message'] or { json2.Any(map[string]json2.Any{}) }).as_map()
+	tool_calls := (message['tool_calls'] or { json2.Any([]json2.Any{}) }).as_array()
+	assert tool_calls.len == 1
+	call := tool_calls[0].as_map()
+	assert (call['id'] or { json2.Any('') }).str() == 'call_search'
+	fn_obj := (call['function'] or { json2.Any(map[string]json2.Any{}) }).as_map()
+	assert (fn_obj['name'] or { json2.Any('') }).str() == 'search'
+	assert (fn_obj['arguments'] or { json2.Any('') }).str() == '{"q":"vhttpd"}'
+	assert (choices[0].as_map()['finish_reason'] or { json2.Any('') }).str() == 'tool_calls'
+}
+
+fn test_openai_mapped_once_ndjson_normalizes_usage() {
+	body := '{"message":{"content":"hi"},"done":false}\n' +
+		'{"done":true,"prompt_eval_count":7,"eval_count":11}\n'
+	mapped := openai_map_once_response(OpenAIResolvedPlan{
+		model:           'public-model'
+		stream_mode:     'mapped'
+		response_codec:  'ndjson'
+		output_protocol: 'openai.chat.completion'
+	}, body, 'req_usage', 123) or { panic(err) }
+	root := json2.decode[json2.Any](mapped) or { panic(err) }.as_map()
+	usage := (root['usage'] or { json2.Any(map[string]json2.Any{}) }).as_map()
+	assert (usage['prompt_tokens'] or { json2.Any(0) }).int() == 7
+	assert (usage['completion_tokens'] or { json2.Any(0) }).int() == 11
+	assert (usage['total_tokens'] or { json2.Any(0) }).int() == 18
+}
diff --git a/src/plugin_runtime.v b/src/plugin_runtime.v
new file mode 100644
index 0000000..017d1ab
--- /dev/null
+++ b/src/plugin_runtime.v
@@ -0,0 +1,118 @@
+module main
+
+import log
+
+pub struct PluginCallRequest {
+pub:
+	plugin     string
+	capability string
+	op         string
+	request_id string @[json: 'request_id']
+	trace_id   string @[json: 'trace_id']
+	payload    string
+	metadata   map[string]string
+}
+
+pub struct PluginCallResponse {
+pub:
+	ok     bool
+	result string
+	error  string
+}
+
+pub type PluginStreamFrameFn = fn (string) !bool
+
+pub struct PluginStreamCallResponse {
+pub:
+	streamed bool
+	response PluginCallResponse
+}
+
+fn plugin_config_app_entry(cfg PluginConfig) string {
+	if cfg.app_entry.trim_space() != '' {
+		return cfg.app_entry.trim_space()
+	}
+	return cfg.entry.trim_space()
+}
+
+fn vjsx_plugin_runtime_config(name string, cfg PluginConfig) !VjsxRuntimeFacadeConfig {
+	app_entry := plugin_config_app_entry(cfg)
+	embedded_cfg := resolve_embedded_host_runtime_config([]string{}, EmbeddedHostRuntimeConfig{
+		app_entry:         app_entry
+		module_root:       cfg.module_root
+		build_root:        cfg.build_root
+		signature_root:    cfg.signature_root
+		signature_include: cfg.signature_include.clone()
+		signature_exclude: cfg.signature_exclude.clone()
+		runtime_profile:   cfg.runtime_profile
+		lane_count:        cfg.thread_count
+		max_requests:      cfg.max_requests
+		enable_fs:         cfg.enable_fs
+		enable_process:    cfg.enable_process
+		enable_network:    cfg.enable_network
+	}, EmbeddedHostCliOverrides{}) or {
+		return error('plugin_runtime_config_failed:${name}:${err.msg()}')
+	}
+	return VjsxRuntimeFacadeConfig{
+		app_entry:         embedded_cfg.app_entry
+		module_root:       embedded_cfg.module_root
+		build_root:        embedded_cfg.build_root
+		signature_root:    embedded_cfg.signature_root
+		signature_include: embedded_cfg.signature_include.clone()
+		signature_exclude: embedded_cfg.signature_exclude.clone()
+		runtime_profile:   embedded_cfg.runtime_profile
+		thread_count:      embedded_cfg.lane_count
+		max_requests:      embedded_cfg.max_requests
+		enable_fs:         embedded_cfg.enable_fs
+		enable_process:    embedded_cfg.enable_process
+		enable_network:    embedded_cfg.enable_network
+	}
+}
+
+fn build_vjsx_plugin_runtimes(configs map[string]PluginConfig) map[string]InProcVjsxExecutor {
+	mut runtimes := map[string]InProcVjsxExecutor{}
+	for name, cfg in configs {
+		if cfg.kind.trim_space().to_lower() !in ['', 'vjsx'] {
+			continue
+		}
+		runtime_cfg := vjsx_plugin_runtime_config(name, cfg) or {
+			log.warn('[vhttpd] plugin runtime unavailable name=${name} kind=${cfg.kind} entry=${plugin_config_app_entry(cfg)} error=${err.msg()}')
+			continue
+		}
+		runtimes[name] = new_inproc_vjsx_executor(runtime_cfg)
+	}
+	return runtimes
+}
+
+fn (mut app App) close_all_plugins() {
+	for _, executor in app.plugin_vjsx {
+		executor.close()
+	}
+	app.plugin_vjsx = map[string]InProcVjsxExecutor{}
+}
+
+fn (mut app App) call_plugin(req PluginCallRequest) !PluginCallResponse {
+	name := req.plugin.trim_space()
+	if name == '' {
+		return error('plugin_missing_name')
+	}
+	cfg := app.plugin_configs[name] or { return error('plugin_not_configured:${name}') }
+	if cfg.kind.trim_space().to_lower() !in ['', 'vjsx'] {
+		return error('plugin_unsupported_kind:${name}:${cfg.kind}')
+	}
+	executor := app.plugin_vjsx[name] or { return error('plugin_runtime_unavailable:${name}') }
+	return executor.call_plugin(mut app, req)
+}
+
+fn (mut app App) call_plugin_stream(req PluginCallRequest, on_frame PluginStreamFrameFn) !PluginStreamCallResponse {
+	name := req.plugin.trim_space()
+	if name == '' {
+		return error('plugin_missing_name')
+	}
+	cfg := app.plugin_configs[name] or { return error('plugin_not_configured:${name}') }
+	if cfg.kind.trim_space().to_lower() !in ['', 'vjsx'] {
+		return error('plugin_unsupported_kind:${name}:${cfg.kind}')
+	}
+	executor := app.plugin_vjsx[name] or { return error('plugin_runtime_unavailable:${name}') }
+	return executor.call_plugin_stream(mut app, req, on_frame)
+}
diff --git a/src/server_shutdown_hooks.v b/src/server_shutdown_hooks.v
index 9483659..f58a472 100644
--- a/src/server_shutdown_hooks.v
+++ b/src/server_shutdown_hooks.v
@@ -8,6 +8,7 @@ fn shutdown_app_runtime(mut app App, runtime_cfg ServerRuntimeConfig) {
 	})
 	runtime_cfg.executor_plan.lifecycle.stop(mut app)
 	app.logic_executor.close()
+	app.close_all_plugins()
 	// Graceful provider shutdown is now spec/runtime-driven.
 	app.stop_all_providers()
 	os.rm(runtime_cfg.internal_admin_socket) or {}
diff --git a/src/worker_backend_transport.v b/src/worker_backend_transport.v
index 1b82402..2d5c629 100644
--- a/src/worker_backend_transport.v
+++ b/src/worker_backend_transport.v
@@ -10,7 +10,8 @@ import time
 
 fn write_frame(mut conn unix.StreamConn, payload string) ! {
 	size := payload.len
-	header := [u8((size >> 24) & 0xff), u8((size >> 16) & 0xff), u8((size >> 8) & 0xff), u8(size & 0xff)]
+	header := [u8((size >> 24) & 0xff), u8((size >> 16) & 0xff), u8((size >> 8) & 0xff),
+		u8(size & 0xff)]
 	conn.write_ptr(&header[0], 4)!
 	conn.write_string(payload)!
 }
@@ -99,7 +100,8 @@ fn try_decode_stream_start(raw string) ?WorkerStreamFrame {
 
 fn try_decode_upstream_plan(raw string) ?WorkerUpstreamPlanFrame {
 	frame := json.decode(WorkerUpstreamPlanFrame, raw) or { return none }
-	if ((frame.mode == 'stream' && frame.strategy == 'upstream_plan') || frame.mode == 'upstream_plan') && frame.event == 'start' {
+	if ((frame.mode == 'stream' && frame.strategy == 'upstream_plan')
+		|| frame.mode == 'upstream_plan') && frame.event == 'start' {
 		return frame
 	}
 	return none
@@ -198,7 +200,7 @@ fn read_websocket_upstream_response(mut conn unix.StreamConn) !WorkerWebSocketUp
 
 fn (mut app App) worker_backend_dispatch_websocket_upstream(req WorkerWebSocketUpstreamDispatchRequest) !WorkerWebSocketUpstreamDispatchResponse {
 	socket, mut conn := app.worker_backend_connect_selected()!
-	
+
 	app.on_worker_request_started(socket)
 	defer {
 		app.on_worker_request_finished(socket)
@@ -238,7 +240,7 @@ fn status_reason_phrase(status int) string {
 	}
 }
 
-fn write_http_stream_headers_conn(mut conn net.TcpConn, status int, content_type string, extra_headers map[string]string, chunked bool) ! {
+fn write_http_stream_headers_conn_with_close(mut conn net.TcpConn, status int, content_type string, extra_headers map[string]string, chunked bool, close_conn bool) ! {
 	mut code := status
 	if code <= 0 {
 		code = 200
@@ -246,7 +248,9 @@ fn write_http_stream_headers_conn(mut conn net.TcpConn, status int, content_type
 	mut sb := strings.new_builder(512)
 	sb.write_string('HTTP/1.1 ${code} ${status_reason_phrase(code)}\r\n')
 	sb.write_string('Server: vhttpd\r\n')
-	sb.write_string('Connection: close\r\n')
+	if close_conn {
+		sb.write_string('Connection: close\r\n')
+	}
 	if chunked {
 		sb.write_string('Transfer-Encoding: chunked\r\n')
 	}
@@ -265,8 +269,14 @@ fn write_http_stream_headers_conn(mut conn net.TcpConn, status int, content_type
 	conn.write_string(sb.str())!
 }
 
+fn write_http_stream_headers_conn(mut conn net.TcpConn, status int, content_type string, extra_headers map[string]string, chunked bool) ! {
+	write_http_stream_headers_conn_with_close(mut conn, status, content_type, extra_headers,
+		chunked, true)!
+}
+
 fn write_http_stream_headers(mut ctx Context, status int, content_type string, extra_headers map[string]string, chunked bool) ! {
-	write_http_stream_headers_conn(mut ctx.conn, status, content_type, extra_headers, chunked)!
+	write_http_stream_headers_conn(mut ctx.conn, status, content_type, extra_headers,
+		chunked)!
 }
 
 fn write_chunk(mut conn net.TcpConn, data string) ! {
@@ -278,6 +288,10 @@ fn write_chunk(mut conn net.TcpConn, data string) ! {
 	conn.write_string('\r\n')!
 }
 
+fn write_final_chunk(mut conn net.TcpConn) ! {
+	conn.write_string('0\r\n\r\n')!
+}
+
 fn write_sse_message(mut conn net.TcpConn, frame WorkerStreamFrame) ! {
 	mut sb := strings.new_builder(256)
 	if frame.sse_id != '' {
@@ -357,8 +371,8 @@ fn (mut app App) execute_websocket_dispatch_commands_result(commands []WorkerWeb
 	}
 	return WorkerWebSocketDispatchCommandsResult{
 		close_frame: close_frame
-		has_close: has_close
-		failures: failures
+		has_close:   has_close
+		failures:    failures
 	}
 }
 
diff --git a/v.mod b/v.mod
index 2e7aa75..a113253 100644
--- a/v.mod
+++ b/v.mod
@@ -1,5 +1,6 @@
 Module {
 	name: 'vhttpd'
+	base_url: 'src'
 	description: 'V HTTP daemon and provider runtime toolkit'
 	version: '0.0.1'
 }

From e369e78d2f82d108518c0c1d95b3b5bdb0cbd2e3 Mon Sep 17 00:00:00 2001
From: weigang <guweigang@bullsoft.org>
Date: Wed, 6 May 2026 09:59:04 +0800
Subject: [PATCH 03/10] openai: add gateway integration coverage

---
 src/openai_gateway_integration_test.v | 1525 +++++++++++++++++++++++++
 1 file changed, 1525 insertions(+)
 create mode 100644 src/openai_gateway_integration_test.v

diff --git a/src/openai_gateway_integration_test.v b/src/openai_gateway_integration_test.v
new file mode 100644
index 0000000..ef740f2
--- /dev/null
+++ b/src/openai_gateway_integration_test.v
@@ -0,0 +1,1525 @@
+module main
+
+import net
+import net.http
+import os
+import time
+import veb
+
+fn openai_integration_free_port_pair() (int, int) {
+	seed := int((time.now().unix_milli() + os.getpid()) % 10000)
+	for i in 0 .. 1000 {
+		port := 30000 + ((seed + i) % 20000)
+		mut first := net.listen_tcp(.ip, '127.0.0.1:${port}') or { continue }
+		mut second := net.listen_tcp(.ip, '127.0.0.1:${port + 1}') or {
+			first.close() or {}
+			continue
+		}
+		first.close() or {}
+		second.close() or {}
+		return port, port + 1
+	}
+	panic('openai integration could not find free TCP port pair')
+}
+
+fn openai_integration_wait_for_http(url string) {
+	for _ in 0 .. 80 {
+		http.fetch(url: url, method: .get) or {
+			time.sleep(25 * time.millisecond)
+			continue
+		}
+		return
+	}
+}
+
+fn openai_integration_wait_for_file(path string) {
+	for _ in 0 .. 80 {
+		if os.exists(path) {
+			return
+		}
+		time.sleep(25 * time.millisecond)
+	}
+}
+
+fn openai_integration_read_http_request(mut conn net.TcpConn) string {
+	mut raw := ''
+	mut buf := []u8{len: 4096}
+	for _ in 0 .. 80 {
+		n := conn.read(mut buf) or { 0 }
+		if n <= 0 {
+			break
+		}
+		raw += buf[..n].bytestr()
+		header := raw.all_before('\r\n\r\n')
+		if header.len == raw.len {
+			continue
+		}
+		mut content_length := 0
+		for line in header.split('\r\n') {
+			if line.to_lower().starts_with('content-length:') {
+				content_length = line.all_after(':').trim_space().int()
+			}
+		}
+		body_len := raw.len - header.len - 4
+		if body_len >= content_length {
+			break
+		}
+	}
+	return raw
+}
+
+fn openai_integration_read_http_response_until(mut conn net.TcpConn, marker string) string {
+	mut raw := ''
+	mut buf := []u8{len: 4096}
+	conn.set_read_timeout(2 * time.second)
+	for _ in 0 .. 80 {
+		n := conn.read(mut buf) or { 0 }
+		if n <= 0 {
+			break
+		}
+		raw += buf[..n].bytestr()
+		if raw.contains(marker) {
+			break
+		}
+	}
+	return raw
+}
+
+fn openai_integration_mock_upstream(port int, mode string, request_log string, ready_file string) {
+	mut listener := net.listen_tcp(.ip, '127.0.0.1:${port}') or { panic(err) }
+	defer {
+		listener.close() or {}
+	}
+	os.write_file(ready_file, 'ready') or {}
+	mut conn := listener.accept() or { return }
+	defer {
+		conn.close() or {}
+	}
+	raw := openai_integration_read_http_request(mut conn)
+	os.write_file(request_log, raw) or {}
+	if mode == 'stream' {
+		conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: text/event-stream\r\nConnection: close\r\n\r\n') or {}
+		conn.write_string('data: {"id":"chunk-1","choices":[{"delta":{"content":"hello"}}]}\n\n') or {}
+		conn.write_string('data: [DONE]\n\n') or {}
+		return
+	}
+	if mode == 'stream_keepalive_after_done' {
+		conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: text/event-stream\r\nConnection: keep-alive\r\n\r\n') or {}
+		conn.write_string('data: {"id":"chunk-keepalive","choices":[{"delta":{"content":"hello"}}]}\n\n') or {}
+		conn.write_string('data: [DONE]\n\n') or {}
+		time.sleep(6 * time.second)
+		return
+	}
+	if mode == 'stream_chunked' {
+		conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: text/event-stream\r\nTransfer-Encoding: chunked\r\nConnection: close\r\n\r\n') or {}
+		frame1 := 'data: {"id":"chunked-1","choices":[{"delta":{"content":"hello"}}]}\n\n'
+		frame2 := 'data: [DONE]\n\n'
+		conn.write_string('${frame1.len:x}\r\n${frame1}\r\n') or {}
+		conn.write_string('${frame2.len:x}\r\n${frame2}\r\n') or {}
+		conn.write_string('0\r\n\r\n') or {}
+		return
+	}
+	if mode == 'responses_stream' {
+		conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: text/event-stream\r\nConnection: close\r\n\r\n') or {}
+		conn.write_string('event: response.created\ndata: {"type":"response.created","response":{"id":"resp_mock","object":"response","status":"in_progress"},"sequence_number":1}\n\n') or {}
+		conn.write_string('event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello","sequence_number":2}\n\n') or {}
+		conn.write_string('event: response.completed\ndata: {"type":"response.completed","response":{"id":"resp_mock","object":"response","status":"completed"},"sequence_number":3}\n\n') or {}
+		return
+	}
+	if mode == 'responses_json' {
+		conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nConnection: close\r\n\r\n') or {}
+		conn.write_string('{"id":"resp_mock","object":"response","status":"completed","output":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"response ok"}]}]}') or {}
+		return
+	}
+	if mode == 'responses_stateful' {
+		conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nConnection: close\r\n\r\n') or {}
+		if raw.starts_with('POST /v1/responses/resp_123/cancel HTTP/') {
+			conn.write_string('{"id":"resp_123","object":"response","status":"cancelled"}') or {}
+		} else {
+			conn.write_string('{"id":"resp_123","object":"response","status":"completed"}') or {}
+		}
+		return
+	}
+	if mode == 'ollama_ndjson' {
+		conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: application/x-ndjson\r\nConnection: close\r\n\r\n') or {}
+		conn.write_string('{"message":{"role":"assistant","content":"你"},"done":false}\n') or {}
+		conn.write_string('{"message":{"role":"assistant","content":"好"},"done":false}\n') or {}
+		conn.write_string('{"done":true}\n') or {}
+		return
+	}
+	if mode == 'custom_ndjson' {
+		conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: application/x-ndjson\r\nConnection: close\r\n\r\n') or {}
+		conn.write_string('{"delta":"plugin-","finished":false}\n') or {}
+		conn.write_string('{"delta":"mapped","finished":false}\n') or {}
+		conn.write_string('{"finished":true}\n') or {}
+		return
+	}
+	if mode == 'tool_call_ndjson' {
+		conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: application/x-ndjson\r\nConnection: close\r\n\r\n') or {}
+		conn.write_string('{"message":{"role":"assistant","tool_calls":[{"index":0,"id":"call_search","type":"function","function":{"name":"search","arguments":"{\\"q\\":\\"vh"}}]},"done":false}\n') or {}
+		conn.write_string('{"message":{"role":"assistant","tool_calls":[{"index":0,"function":{"arguments":"ttpd\\"}"}}]},"done":false}\n') or {}
+		conn.write_string('{"done":true}\n') or {}
+		return
+	}
+	if mode == 'usage_ndjson' {
+		conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: application/x-ndjson\r\nConnection: close\r\n\r\n') or {}
+		conn.write_string('{"message":{"role":"assistant","content":"usage ok"},"done":false}\n') or {}
+		conn.write_string('{"done":true,"prompt_eval_count":5,"eval_count":9}\n') or {}
+		return
+	}
+	if mode == 'json_error' {
+		conn.write_string('HTTP/1.1 429 Too Many Requests\r\nContent-Type: application/json\r\nConnection: close\r\n\r\n') or {}
+		conn.write_string('{"error":{"message":"provider quota exceeded","type":"rate_limit_error","code":"rate_limit_exceeded"}}') or {}
+		return
+	}
+	if mode == 'stream_error' {
+		conn.write_string('HTTP/1.1 503 Service Unavailable\r\nContent-Type: application/json\r\nConnection: close\r\n\r\n') or {}
+		conn.write_string('{"error":{"message":"provider overloaded","type":"server_error","code":"provider_overloaded"}}') or {}
+		return
+	}
+	conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nConnection: close\r\n\r\n') or {}
+	conn.write_string('{"id":"cmpl-mock","object":"chat.completion","choices":[{"message":{"role":"assistant","content":"ok"}}]}') or {}
+}
+
+fn openai_integration_mock_fallback_upstream(port int, request_log string, ready_file string) {
+	mut listener := net.listen_tcp(.ip, '127.0.0.1:${port}') or { panic(err) }
+	defer {
+		listener.close() or {}
+	}
+	os.write_file(ready_file, 'ready') or {}
+	mut first := listener.accept() or { return }
+	raw_first := openai_integration_read_http_request(mut first)
+	first.write_string('HTTP/1.1 503 Service Unavailable\r\nContent-Type: application/json\r\nConnection: close\r\n\r\n') or {}
+	first.write_string('{"error":{"message":"primary overloaded","type":"server_error","code":"primary_overloaded"}}') or {}
+	first.close() or {}
+	mut second := listener.accept() or { return }
+	raw_second := openai_integration_read_http_request(mut second)
+	os.write_file(request_log, raw_first + '\n---SECOND---\n' + raw_second) or {}
+	if raw_second.starts_with('POST /v1/fallback/api/chat HTTP/') {
+		second.write_string('HTTP/1.1 200 OK\r\nContent-Type: application/x-ndjson\r\nConnection: close\r\n\r\n') or {}
+		second.write_string('{"message":{"role":"assistant","content":"mapped "},"done":false}\n') or {}
+		second.write_string('{"message":{"role":"assistant","content":"fallback"},"done":false}\n') or {}
+		second.write_string('{"done":true}\n') or {}
+		second.close() or {}
+		return
+	}
+	if raw_second.starts_with('POST /v1/fallback/chat/completions HTTP/') {
+		if raw_second.contains('"stream":true') {
+			second.write_string('HTTP/1.1 200 OK\r\nContent-Type: text/event-stream\r\nConnection: close\r\n\r\n') or {}
+			second.write_string('data: {"id":"chunk-fallback","choices":[{"delta":{"content":"fallback stream"}}]}\n\n') or {}
+			second.write_string('data: [DONE]\n\n') or {}
+			second.close() or {}
+			return
+		}
+		second.write_string('HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nConnection: close\r\n\r\n') or {}
+		second.write_string('{"id":"cmpl-fallback","object":"chat.completion","choices":[{"message":{"role":"assistant","content":"fallback ok"}}]}') or {}
+		second.close() or {}
+		return
+	}
+	second.write_string('HTTP/1.1 503 Service Unavailable\r\nContent-Type: application/json\r\nConnection: close\r\n\r\n') or {}
+	second.write_string('{"error":{"message":"fallback was not used","type":"server_error","code":"fallback_not_used"}}') or {}
+	second.close() or {}
+}
+
+fn openai_integration_start_gateway(port int, upstream_port int, plugin_file string) {
+	plugins := if plugin_file.trim_space() == '' {
+		map[string]PluginConfig{}
+	} else {
+		{
+			'planner': PluginConfig{
+				kind:            'vjsx'
+				app_entry:       plugin_file
+				runtime_profile: 'node'
+				thread_count:    1
+			}
+		}
+	}
+	mut app := App{
+		event_log:                  ''
+		started_at_unix:            time.now().unix()
+		openai_enabled:             true
+		openai_base_path:           '/v1'
+		openai_plugin:              if plugin_file.trim_space() == '' { '' } else { 'planner' }
+		openai_default_backend:     'mock'
+		openai_endpoints:           OpenAIEndpointsConfig{}
+		openai_backends:            {
+			'mock':   OpenAIBackendConfig{
+				base_url: 'http://127.0.0.1:${upstream_port}/v1'
+			}
+			'backup': OpenAIBackendConfig{
+				base_url: 'http://127.0.0.1:${upstream_port}/v1'
+			}
+			'exec':   OpenAIBackendConfig{
+				kind:     'executor'
+				executor: 'planner'
+			}
+		}
+		openai_routes:              {
+			'public': OpenAIRouteConfig{
+				models:         ['public-model']
+				backend:        'mock'
+				upstream_model: 'builtin-upstream-model'
+			}
+		}
+		plugin_configs:             plugins
+		plugin_vjsx:                build_vjsx_plugin_runtimes(plugins)
+		openai_responses:           new_memory_state_store[OpenAIResponseRecord]()
+		upstream_sessions:          map[string]UpstreamRuntimeSession{}
+		mcp_sessions:               map[string]McpSession{}
+		ws_hub_conns:               map[string]HubConn{}
+		ws_hub_room_members:        map[string]map[string]bool{}
+		ws_hub_conn_rooms:          map[string]map[string]bool{}
+		ws_hub_conn_meta:           map[string]map[string]string{}
+		ws_hub_pending:             map[string][]HubPendingMessage{}
+		feishu_runtime:             map[string]FeishuProviderRuntime{}
+		websocket_upstream_started: map[string]bool{}
+		providers:                  ProviderHost{
+			registry: map[string]Provider{}
+			specs:    map[string]ProviderSpec{}
+		}
+		fixture_websocket_runtime:  map[string]FixtureWebSocketUpstreamRuntime{}
+		provider_instance_specs:    map[string]ProviderInstanceSpec{}
+		codex_instances:            map[string]CodexProviderRuntime{}
+		feishu_buffers:             map[string]FeishuStreamBuffer{}
+	}
+	veb.run_at[App, Context](mut app,
+		host:                 '127.0.0.1'
+		port:                 port
+		family:               .ip
+		show_startup_message: false
+	) or {}
+}
+
+fn openai_integration_write_plugin(temp_dir string) string {
+	plugin_file := os.join_path(temp_dir, 'openai-planner.mts')
+	os.write_file(plugin_file, "
+export function openai(req) {
+  if (req.op !== 'chat.route') return { not_handled: true };
+  const payload = JSON.parse(req.payload);
+  const body = JSON.parse(payload.body);
+  body.model = 'plugin-upstream-model';
+  return {
+    backend: 'mock',
+    method: 'POST',
+    path: '/chat/completions',
+    headers: { 'x-plugin-plan': 'yes' },
+    body: JSON.stringify(body),
+    stream_mode: 'passthrough',
+  };
+}
+") or {
+		panic(err)
+	}
+	return plugin_file
+}
+
+fn openai_integration_write_bad_plugin(temp_dir string) string {
+	plugin_file := os.join_path(temp_dir, 'openai-bad-planner.mts')
+	os.write_file(plugin_file, "
+export function openai(req) {
+  if (req.op !== 'chat.route') return { not_handled: true };
+  return {
+    backend: 'mock',
+    method: 'TRACE',
+    path: '/chat/completions',
+    body: '{}',
+    stream_mode: 'passthrough',
+  };
+}
+") or {
+		panic(err)
+	}
+	return plugin_file
+}
+
+fn openai_integration_write_ollama_plugin(temp_dir string) string {
+	plugin_file := os.join_path(temp_dir, 'openai-ollama-planner.mts')
+	os.write_file(plugin_file, "
+export function openai(req) {
+  if (req.op !== 'chat.route') return { not_handled: true };
+  const payload = JSON.parse(req.payload);
+  const body = JSON.parse(payload.body);
+  return {
+    backend: 'mock',
+    method: 'POST',
+    path: '/api/chat',
+    headers: { 'x-plugin-plan': 'ollama' },
+    body: JSON.stringify({
+      model: 'qwen2.5',
+      messages: body.messages,
+      stream: body.stream === true,
+    }),
+    stream_mode: 'mapped',
+    response_codec: 'ndjson',
+    output_protocol: 'openai.chat.completion',
+  };
+}
+") or {
+		panic(err)
+	}
+	return plugin_file
+}
+
+fn openai_integration_write_tool_call_plugin(temp_dir string) string {
+	plugin_file := os.join_path(temp_dir, 'openai-tool-call-planner.mts')
+	os.write_file(plugin_file, "
+export function openai(req) {
+  if (req.op !== 'chat.route') return { not_handled: true };
+  return {
+    backend: 'mock',
+    method: 'POST',
+    path: '/api/chat',
+    body: JSON.stringify({ stream: true }),
+    stream_mode: 'mapped',
+    response_codec: 'ndjson',
+    output_protocol: 'openai.chat.completion',
+  };
+}
+") or {
+		panic(err)
+	}
+	return plugin_file
+}
+
+fn openai_integration_write_usage_plugin(temp_dir string) string {
+	plugin_file := os.join_path(temp_dir, 'openai-usage-planner.mts')
+	os.write_file(plugin_file, "
+export function openai(req) {
+  if (req.op !== 'chat.route') return { not_handled: true };
+  return {
+    backend: 'mock',
+    method: 'POST',
+    path: '/api/chat',
+    body: JSON.stringify({ stream: false }),
+    stream_mode: 'mapped',
+    response_codec: 'ndjson',
+    output_protocol: 'openai.chat.completion',
+  };
+}
+") or {
+		panic(err)
+	}
+	return plugin_file
+}
+
+fn openai_integration_write_executor_plugin(temp_dir string) string {
+	plugin_file := os.join_path(temp_dir, 'openai-executor-planner.mts')
+	os.write_file(plugin_file, "
+async function* executorFrames(body) {
+  yield { content: 'executor ', done: false };
+  yield { content: body.messages?.[0]?.content ?? 'ok', done: false };
+  yield { usage: { prompt_tokens: 3, completion_tokens: 4, total_tokens: 7 }, done: true };
+}
+
+async function* responseEvents(body) {
+  const input = Array.isArray(body.input) ? body.input.map((item) => item.content || '').join(' ') : String(body.input || 'ok');
+  yield { type: 'response.created', response: { id: 'resp_exec', object: 'response', status: 'in_progress' }, sequence_number: 1 };
+  yield { type: 'response.output_text.delta', delta: 'executor ' + input, sequence_number: 2 };
+  yield { type: 'response.completed', response: { id: 'resp_exec', object: 'response', status: 'completed' }, sequence_number: 3 };
+}
+
+export function openai(req) {
+  if (req.op === 'chat.route') {
+    return {
+      backend: 'exec',
+      method: 'POST',
+      path: '/executor/chat',
+      body: JSON.parse(req.payload).body,
+      stream_mode: 'executor',
+    };
+  }
+  if (req.op === 'responses.route') {
+    return {
+      backend: 'exec',
+      method: 'POST',
+      path: '/executor/responses',
+      body: JSON.parse(req.payload).body,
+      stream_mode: 'executor',
+      output_protocol: 'openai.response',
+    };
+  }
+  if (req.op === 'chat.execute') {
+    const payload = JSON.parse(req.payload);
+    const body = JSON.parse(payload.body);
+    if (payload.stream) {
+      return executorFrames(body);
+    }
+    return {
+      content: 'executor ' + (body.messages?.[0]?.content ?? 'ok'),
+      usage: { prompt_tokens: 3, completion_tokens: 4, total_tokens: 7 },
+      done: true,
+    };
+  }
+  if (req.op === 'responses.execute') {
+    const payload = JSON.parse(req.payload);
+    const body = JSON.parse(payload.body);
+    const input = Array.isArray(body.input) ? body.input.map((item) => item.content || '').join(' ') : String(body.input || 'ok');
+    if (payload.stream) {
+      return responseEvents(body);
+    }
+    return {
+      id: 'resp_exec',
+      object: 'response',
+      status: 'completed',
+      output: [{ type: 'message', role: 'assistant', content: [{ type: 'output_text', text: 'executor ' + input }] }],
+    };
+  }
+  return { not_handled: true };
+}
+") or {
+		panic(err)
+	}
+	return plugin_file
+}
+
+fn openai_integration_write_frame_mapper_plugin(temp_dir string) string {
+	plugin_file := os.join_path(temp_dir, 'openai-frame-mapper.mts')
+	os.write_file(plugin_file, "
+export function openai(req) {
+  if (req.op === 'chat.route') {
+    const payload = JSON.parse(req.payload);
+    const body = JSON.parse(payload.body);
+    return {
+      backend: 'mock',
+      method: 'POST',
+      path: '/custom/stream',
+      body: JSON.stringify({ prompt: body.messages?.[0]?.content ?? '', stream: true }),
+      stream_mode: 'mapped',
+      response_codec: 'ndjson',
+      output_protocol: 'openai.chat.completion',
+      mapper: 'plugin',
+    };
+  }
+  if (req.op === 'chat.map_frame') {
+    const payload = JSON.parse(req.payload);
+    const frame = JSON.parse(payload.frame);
+    return {
+      content: frame.delta ? frame.delta.toUpperCase() : '',
+      done: frame.finished === true,
+    };
+  }
+  return { not_handled: true };
+}
+") or {
+		panic(err)
+	}
+	return plugin_file
+}
+
+fn openai_integration_write_plugin_tool_call_mapper(temp_dir string) string {
+	plugin_file := os.join_path(temp_dir, 'openai-plugin-tool-call-mapper.mts')
+	os.write_file(plugin_file, "
+export function openai(req) {
+  if (req.op === 'chat.route') {
+    return {
+      backend: 'mock',
+      method: 'POST',
+      path: '/custom/stream',
+      body: JSON.stringify({ stream: true }),
+      stream_mode: 'mapped',
+      response_codec: 'ndjson',
+      output_protocol: 'openai.chat.completion',
+      mapper: 'plugin',
+    };
+  }
+  if (req.op === 'chat.map_frame') {
+    const payload = JSON.parse(req.payload);
+    const frame = JSON.parse(payload.frame);
+    if (frame.finished) return { done: true };
+    return {
+      tool_calls: [{
+        index: 0,
+        id: 'call_plugin',
+        type: 'function',
+        function: { name: 'lookup', arguments: frame.delta },
+      }],
+      finish_reason: 'tool_calls',
+    };
+  }
+  return { not_handled: true };
+}
+") or {
+		panic(err)
+	}
+	return plugin_file
+}
+
+fn openai_integration_write_mapper_error_plugin(temp_dir string) string {
+	plugin_file := os.join_path(temp_dir, 'openai-mapper-error.mts')
+	os.write_file(plugin_file, "
+export function openai(req) {
+  if (req.op === 'chat.route') {
+    return {
+      backend: 'mock',
+      method: 'POST',
+      path: '/custom/stream',
+      body: JSON.stringify({ stream: true }),
+      stream_mode: 'mapped',
+      response_codec: 'ndjson',
+      output_protocol: 'openai.chat.completion',
+      mapper: 'plugin',
+    };
+  }
+  if (req.op === 'chat.map_frame') {
+    return { error: { message: 'mapper refused frame' } };
+  }
+  return { not_handled: true };
+}
+") or {
+		panic(err)
+	}
+	return plugin_file
+}
+
+fn openai_integration_write_fallback_plugin(temp_dir string) string {
+	plugin_file := os.join_path(temp_dir, 'openai-fallback-planner.mts')
+	os.write_file(plugin_file, "
+export function openai(req) {
+  if (req.op === 'chat.route') {
+    const payload = JSON.parse(req.payload);
+    return {
+      backend: 'mock',
+      method: 'POST',
+      path: '/primary/chat/completions',
+      body: payload.body,
+      stream_mode: 'passthrough',
+    };
+  }
+  if (req.op === 'chat.fallback') {
+    const payload = JSON.parse(req.payload);
+    if (payload.failed_backend !== 'mock' || payload.status_code !== 503) {
+      return { not_handled: true };
+    }
+    return {
+      backend: 'backup',
+      method: 'POST',
+      path: '/fallback/chat/completions',
+      body: payload.body,
+      stream_mode: 'passthrough',
+    };
+  }
+  return { not_handled: true };
+}
+") or {
+		panic(err)
+	}
+	return plugin_file
+}
+
+fn openai_integration_write_mapped_fallback_plugin(temp_dir string) string {
+	plugin_file := os.join_path(temp_dir, 'openai-mapped-fallback-planner.mts')
+	os.write_file(plugin_file, "
+export function openai(req) {
+  if (req.op === 'chat.route') {
+    const payload = JSON.parse(req.payload);
+    const body = JSON.parse(payload.body);
+    return {
+      backend: 'mock',
+      method: 'POST',
+      path: '/primary/api/chat',
+      body: JSON.stringify({ model: 'primary-local', messages: body.messages, stream: true }),
+      stream_mode: 'mapped',
+      response_codec: 'ndjson',
+      output_protocol: 'openai.chat.completion',
+    };
+  }
+  if (req.op === 'chat.fallback') {
+    const payload = JSON.parse(req.payload);
+    if (payload.failed_backend !== 'mock' || payload.status_code !== 503) {
+      return { not_handled: true };
+    }
+    return {
+      backend: 'backup',
+      method: 'POST',
+      path: '/fallback/api/chat',
+      body: JSON.stringify({ model: 'backup-local', stream: true }),
+      stream_mode: 'mapped',
+      response_codec: 'ndjson',
+      output_protocol: 'openai.chat.completion',
+    };
+  }
+  return { not_handled: true };
+}
+") or {
+		panic(err)
+	}
+	return plugin_file
+}
+
+fn test_openai_gateway_plugin_non_stream_passthrough_hits_mock_upstream() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_non_stream_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	plugin_file := openai_integration_write_plugin(temp_dir)
+	spawn openai_integration_mock_upstream(upstream_port, 'json', request_log, ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file)
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/chat/completions'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","messages":[{"role":"user","content":"hi"}]}'
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('"cmpl-mock"')
+	raw := os.read_file(request_log) or { panic(err) }
+	assert raw.starts_with('POST /v1/chat/completions HTTP/')
+	assert raw.to_lower().contains('x-plugin-plan: yes')
+	assert raw.contains('"model":"plugin-upstream-model"')
+}
+
+fn test_openai_gateway_stream_passthrough_forwards_sse_bytes() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_stream_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	spawn openai_integration_mock_upstream(upstream_port, 'stream', request_log, ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, '')
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	header.add(.accept, 'text/event-stream')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/chat/completions'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","stream":true,"messages":[{"role":"user","content":"hi"}]}'
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('data: {"id":"chunk-1"')
+	assert resp.body.contains('data: [DONE]')
+	raw := os.read_file(request_log) or { panic(err) }
+	assert raw.contains('"model":"builtin-upstream-model"')
+	assert raw.contains('"stream":true')
+}
+
+fn test_openai_gateway_stream_passthrough_dechunks_upstream_sse() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_stream_chunked_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	spawn openai_integration_mock_upstream(upstream_port, 'stream_chunked', request_log,
+		ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, '')
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	header.add(.accept, 'text/event-stream')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/chat/completions'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","stream":true,"messages":[{"role":"user","content":"hi"}]}'
+	) or { panic(err) }
+	frame1 := 'data: {"id":"chunked-1","choices":[{"delta":{"content":"hello"}}]}\n\n'
+	frame2 := 'data: [DONE]\n\n'
+	assert resp.status_code == 200
+	assert resp.body.contains(frame1)
+	assert resp.body.contains(frame2)
+	assert !resp.body.contains('${frame1.len:x}\r\n')
+	assert !resp.body.contains('${frame2.len:x}\r\n')
+	assert !resp.body.contains('\r\n0\r\n')
+}
+
+fn test_openai_gateway_stream_passthrough_writes_chunked_response_boundary() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_stream_response_chunked_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	spawn openai_integration_mock_upstream(upstream_port, 'stream', request_log, ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, '')
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	body := '{"model":"public-model","stream":true,"messages":[{"role":"user","content":"hi"}]}'
+	mut conn := net.dial_tcp('127.0.0.1:${gateway_port}') or { panic(err) }
+	defer {
+		conn.close() or {}
+	}
+	conn.write_string('POST /v1/chat/completions HTTP/1.1\r\nHost: 127.0.0.1:${gateway_port}\r\nContent-Type: application/json\r\nAccept: text/event-stream\r\nContent-Length: ${body.len}\r\n\r\n${body}') or {
+		panic(err)
+	}
+	raw := openai_integration_read_http_response_until(mut conn, '\r\n0\r\n\r\n')
+	assert raw.starts_with('HTTP/1.1 200 OK')
+	assert raw.to_lower().contains('transfer-encoding: chunked')
+	assert !raw.to_lower().contains('connection: close')
+	assert raw.contains('data: {"id":"chunk-1"')
+	assert raw.contains('data: [DONE]')
+	assert raw.contains('\r\n0\r\n\r\n')
+	conn.write_string('GET /health HTTP/1.1\r\nHost: 127.0.0.1:${gateway_port}\r\n\r\n') or {
+		panic(err)
+	}
+	second := openai_integration_read_http_response_until(mut conn, '\r\n\r\nOK')
+	assert second.starts_with('HTTP/1.1 200 OK')
+	assert second.ends_with('\r\n\r\nOK')
+}
+
+fn test_openai_gateway_stream_passthrough_finishes_on_done_before_upstream_close() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_stream_done_boundary_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	spawn openai_integration_mock_upstream(upstream_port, 'stream_keepalive_after_done',
+		request_log, ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, '')
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	body := '{"model":"public-model","stream":true,"messages":[{"role":"user","content":"hi"}]}'
+	mut conn := net.dial_tcp('127.0.0.1:${gateway_port}') or { panic(err) }
+	defer {
+		conn.close() or {}
+	}
+	conn.write_string('POST /v1/chat/completions HTTP/1.1\r\nHost: 127.0.0.1:${gateway_port}\r\nContent-Type: application/json\r\nAccept: text/event-stream\r\nContent-Length: ${body.len}\r\n\r\n${body}') or {
+		panic(err)
+	}
+	raw := openai_integration_read_http_response_until(mut conn, '\r\n0\r\n\r\n')
+	assert raw.starts_with('HTTP/1.1 200 OK')
+	assert raw.to_lower().contains('transfer-encoding: chunked')
+	assert raw.contains('data: {"id":"chunk-keepalive"')
+	assert raw.contains('data: [DONE]')
+	assert raw.contains('\r\n0\r\n\r\n')
+}
+
+fn test_openai_gateway_mapped_ollama_ndjson_stream_outputs_openai_sse() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_ollama_stream_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	plugin_file := openai_integration_write_ollama_plugin(temp_dir)
+	spawn openai_integration_mock_upstream(upstream_port, 'ollama_ndjson', request_log,
+		ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file)
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	header.add(.accept, 'text/event-stream')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/chat/completions'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","stream":true,"messages":[{"role":"user","content":"hi"}]}'
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('data: {"id":"chatcmpl-')
+	assert resp.body.contains('"object":"chat.completion.chunk"')
+	assert resp.body.contains('"content":"你"')
+	assert resp.body.contains('"content":"好"')
+	assert resp.body.contains('data: [DONE]')
+	raw := os.read_file(request_log) or { panic(err) }
+	assert raw.starts_with('POST /v1/api/chat HTTP/')
+	assert raw.to_lower().contains('x-plugin-plan: ollama')
+	assert raw.contains('"model":"qwen2.5"')
+	assert raw.contains('"stream":true')
+}
+
+fn test_openai_gateway_mapped_ndjson_tool_calls_output_openai_delta() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_tool_call_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	plugin_file := openai_integration_write_tool_call_plugin(temp_dir)
+	spawn openai_integration_mock_upstream(upstream_port, 'tool_call_ndjson', request_log,
+		ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file)
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	header.add(.accept, 'text/event-stream')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/chat/completions'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","stream":true,"messages":[]}'
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('"tool_calls"')
+	assert resp.body.contains('"id":"call_search"')
+	assert resp.body.contains('"name":"search"')
+	assert resp.body.contains('"arguments":"{\\"q\\":\\"vh"')
+	assert resp.body.contains('"arguments":"ttpd\\"}"')
+	assert resp.body.contains('"finish_reason":"tool_calls"')
+	assert resp.body.contains('data: [DONE]')
+}
+
+fn test_openai_gateway_mapped_ndjson_tool_calls_aggregate_non_stream() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_tool_call_once_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	plugin_file := openai_integration_write_tool_call_plugin(temp_dir)
+	spawn openai_integration_mock_upstream(upstream_port, 'tool_call_ndjson', request_log,
+		ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file)
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/chat/completions'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","stream":false,"messages":[]}'
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('"object":"chat.completion"')
+	assert resp.body.contains('"message"')
+	assert resp.body.contains('"tool_calls"')
+	assert resp.body.contains('"id":"call_search"')
+	assert resp.body.contains('"name":"search"')
+	assert resp.body.contains('"arguments":"{\\"q\\":\\"vhttpd\\"}"')
+	assert resp.body.contains('"finish_reason":"tool_calls"')
+	assert !resp.body.contains('data:')
+}
+
+fn test_openai_gateway_mapped_ndjson_usage_aggregates_non_stream() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_usage_once_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	plugin_file := openai_integration_write_usage_plugin(temp_dir)
+	spawn openai_integration_mock_upstream(upstream_port, 'usage_ndjson', request_log,
+		ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file)
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/chat/completions'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","stream":false,"messages":[]}'
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('"content":"usage ok"')
+	assert resp.body.contains('"usage"')
+	assert resp.body.contains('"prompt_tokens":5')
+	assert resp.body.contains('"completion_tokens":9')
+	assert resp.body.contains('"total_tokens":14')
+	assert !resp.body.contains('data:')
+}
+
+fn test_openai_gateway_mapped_ndjson_usage_outputs_stream_final_chunk() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_usage_stream_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	plugin_file := openai_integration_write_usage_plugin(temp_dir)
+	spawn openai_integration_mock_upstream(upstream_port, 'usage_ndjson', request_log,
+		ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file)
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	header.add(.accept, 'text/event-stream')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/chat/completions'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","stream":true,"messages":[]}'
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('"content":"usage ok"')
+	assert resp.body.contains('"choices":[]')
+	assert resp.body.contains('"usage"')
+	assert resp.body.contains('"prompt_tokens":5')
+	assert resp.body.contains('"completion_tokens":9')
+	assert resp.body.contains('"total_tokens":14')
+	assert resp.body.contains('data: [DONE]')
+	assert resp.body.index('"usage"') or { -1 } < resp.body.index('data: [DONE]') or { -1 }
+}
+
+fn test_openai_gateway_executor_backend_non_stream_uses_vjsx_app() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_executor_once_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	plugin_file := openai_integration_write_executor_plugin(temp_dir)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file)
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/chat/completions'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","messages":[{"role":"user","content":"handled"}]}'
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('"object":"chat.completion"')
+	assert resp.body.contains('"content":"executor handled"')
+	assert resp.body.contains('"usage"')
+	assert resp.body.contains('"total_tokens":7')
+}
+
+fn test_openai_gateway_executor_backend_stream_uses_vjsx_frames() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_executor_stream_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	plugin_file := openai_integration_write_executor_plugin(temp_dir)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file)
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	header.add(.accept, 'text/event-stream')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/chat/completions'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","stream":true,"messages":[{"role":"user","content":"stream"}]}'
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('"content":"executor "')
+	assert resp.body.contains('"content":"stream"')
+	assert resp.body.contains('"choices":[]')
+	assert resp.body.contains('"total_tokens":7')
+	assert resp.body.contains('data: [DONE]')
+}
+
+fn test_openai_gateway_responses_passthrough_non_stream() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_responses_once_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	spawn openai_integration_mock_upstream(upstream_port, 'responses_json', request_log,
+		ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, '')
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/responses'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","input":"hello"}'
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('"object":"response"')
+	assert resp.body.contains('response ok')
+	raw := os.read_file(request_log) or { panic(err) }
+	assert raw.starts_with('POST /v1/responses HTTP/')
+	assert raw.contains('"model":"builtin-upstream-model"')
+}
+
+fn test_openai_gateway_responses_passthrough_stream() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_responses_stream_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	spawn openai_integration_mock_upstream(upstream_port, 'responses_stream', request_log,
+		ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, '')
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	header.add(.accept, 'text/event-stream')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/responses'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","stream":true,"input":"hello"}'
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('event: response.created')
+	assert resp.body.contains('response.output_text.delta')
+	assert resp.body.contains('response.completed')
+	raw := os.read_file(request_log) or { panic(err) }
+	assert raw.starts_with('POST /v1/responses HTTP/')
+}
+
+fn test_openai_gateway_responses_executor_stream_uses_async_iterable_events() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_responses_executor_stream_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	plugin_file := openai_integration_write_executor_plugin(temp_dir)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file)
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	header.add(.accept, 'text/event-stream')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/responses'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","stream":true,"input":"stream"}'
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('event: response.created')
+	assert resp.body.contains('response.output_text.delta')
+	assert resp.body.contains('executor stream')
+	assert resp.body.contains('response.completed')
+}
+
+fn test_openai_gateway_responses_executor_non_stream_registers_response() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_responses_executor_registry_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	plugin_file := openai_integration_write_executor_plugin(temp_dir)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file)
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	create_resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/responses'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","input":"remember me"}'
+	) or { panic(err) }
+	assert create_resp.status_code == 200
+	assert create_resp.body.contains('"id":"resp_exec"')
+	retrieve_resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/responses/resp_exec'
+		method: .get
+	) or { panic(err) }
+	assert retrieve_resp.status_code == 200
+	assert retrieve_resp.body.contains('"id":"resp_exec"')
+	assert retrieve_resp.body.contains('executor remember me')
+}
+
+fn test_openai_gateway_responses_executor_stream_registers_completed_response() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_responses_executor_stream_registry_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	plugin_file := openai_integration_write_executor_plugin(temp_dir)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file)
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	header.add(.accept, 'text/event-stream')
+	stream_resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/responses'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","stream":true,"input":"stream"}'
+	) or { panic(err) }
+	assert stream_resp.status_code == 200
+	assert stream_resp.body.contains('response.completed')
+	retrieve_resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/responses/resp_exec'
+		method: .get
+	) or { panic(err) }
+	assert retrieve_resp.status_code == 200
+	assert retrieve_resp.body.contains('"id":"resp_exec"')
+	assert retrieve_resp.body.contains('"status":"completed"')
+}
+
+fn test_openai_gateway_responses_retrieve_preserves_query() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_responses_retrieve_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	spawn openai_integration_mock_upstream(upstream_port, 'responses_stateful', request_log,
+		ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, '')
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/responses/resp_123?include[]=output_text'
+		method: .get
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('"id":"resp_123"')
+	raw := os.read_file(request_log) or { panic(err) }
+	assert raw.starts_with('GET /v1/responses/resp_123?include%5B%5D=output_text HTTP/')
+}
+
+fn test_openai_gateway_responses_cancel_passthrough() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_responses_cancel_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	spawn openai_integration_mock_upstream(upstream_port, 'responses_stateful', request_log,
+		ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, '')
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/responses/resp_123/cancel'
+		method: .post
+		data:   '{}'
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('"status":"cancelled"')
+	raw := os.read_file(request_log) or { panic(err) }
+	assert raw.starts_with('POST /v1/responses/resp_123/cancel HTTP/')
+}
+
+fn test_openai_gateway_plugin_frame_mapper_outputs_openai_sse() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_plugin_mapper_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	plugin_file := openai_integration_write_frame_mapper_plugin(temp_dir)
+	spawn openai_integration_mock_upstream(upstream_port, 'custom_ndjson', request_log,
+		ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file)
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	header.add(.accept, 'text/event-stream')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/chat/completions'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","stream":true,"messages":[{"role":"user","content":"hi"}]}'
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('"content":"PLUGIN-"')
+	assert resp.body.contains('"content":"MAPPED"')
+	assert resp.body.contains('data: [DONE]')
+	raw := os.read_file(request_log) or { panic(err) }
+	assert raw.starts_with('POST /v1/custom/stream HTTP/')
+	assert raw.contains('"prompt":"hi"')
+}
+
+fn test_openai_gateway_plugin_frame_mapper_can_emit_tool_calls() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_plugin_tool_call_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	plugin_file := openai_integration_write_plugin_tool_call_mapper(temp_dir)
+	spawn openai_integration_mock_upstream(upstream_port, 'custom_ndjson', request_log,
+		ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file)
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	header.add(.accept, 'text/event-stream')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/chat/completions'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","stream":true,"messages":[]}'
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('"tool_calls"')
+	assert resp.body.contains('"id":"call_plugin"')
+	assert resp.body.contains('"name":"lookup"')
+	assert resp.body.contains('"arguments":"plugin-"')
+	assert resp.body.contains('"arguments":"mapped"')
+	assert resp.body.contains('"finish_reason":"tool_calls"')
+	assert resp.body.contains('data: [DONE]')
+}
+
+fn test_openai_gateway_non_stream_upstream_error_is_openai_error() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_json_error_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	spawn openai_integration_mock_upstream(upstream_port, 'json_error', request_log, ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, '')
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/chat/completions'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","messages":[]}'
+	) or { panic(err) }
+	assert resp.status_code == 429
+	assert resp.body.contains('"message":"provider quota exceeded"')
+	assert resp.body.contains('"type":"rate_limit_error"')
+	assert resp.body.contains('"code":"rate_limit_exceeded"')
+}
+
+fn test_openai_gateway_non_stream_plugin_fallback_retries_backup_plan() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_fallback_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	plugin_file := openai_integration_write_fallback_plugin(temp_dir)
+	spawn openai_integration_mock_fallback_upstream(upstream_port, request_log, ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file)
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/chat/completions'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","messages":[]}'
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('"cmpl-fallback"')
+	assert resp.body.contains('fallback ok')
+	raw := os.read_file(request_log) or { panic(err) }
+	assert raw.contains('POST /v1/primary/chat/completions HTTP/')
+	assert raw.contains('POST /v1/fallback/chat/completions HTTP/')
+}
+
+fn test_openai_gateway_stream_plugin_fallback_retries_before_sse_headers() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_stream_fallback_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	plugin_file := openai_integration_write_fallback_plugin(temp_dir)
+	spawn openai_integration_mock_fallback_upstream(upstream_port, request_log, ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file)
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	header.add(.accept, 'text/event-stream')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/chat/completions'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","stream":true,"messages":[]}'
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('data: {"id":"chunk-fallback"')
+	assert resp.body.contains('fallback stream')
+	assert resp.body.contains('data: [DONE]')
+	raw := os.read_file(request_log) or { panic(err) }
+	assert raw.contains('POST /v1/primary/chat/completions HTTP/')
+	assert raw.contains('POST /v1/fallback/chat/completions HTTP/')
+}
+
+fn test_openai_gateway_mapped_stream_plugin_fallback_retries_before_sse_headers() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_mapped_fallback_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	plugin_file := openai_integration_write_mapped_fallback_plugin(temp_dir)
+	spawn openai_integration_mock_fallback_upstream(upstream_port, request_log, ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file)
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	header.add(.accept, 'text/event-stream')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/chat/completions'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","stream":true,"messages":[{"role":"user","content":"hi"}]}'
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('"object":"chat.completion.chunk"')
+	assert resp.body.contains('"content":"mapped "')
+	assert resp.body.contains('"content":"fallback"')
+	assert resp.body.contains('data: [DONE]')
+	raw := os.read_file(request_log) or { panic(err) }
+	assert raw.contains('POST /v1/primary/api/chat HTTP/')
+	assert raw.contains('POST /v1/fallback/api/chat HTTP/')
+}
+
+fn test_openai_gateway_stream_upstream_error_is_openai_error() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_stream_error_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	spawn openai_integration_mock_upstream(upstream_port, 'stream_error', request_log,
+		ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, '')
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	header.add(.accept, 'text/event-stream')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/chat/completions'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","stream":true,"messages":[]}'
+	) or { panic(err) }
+	assert resp.status_code == 503
+	assert resp.body.contains('"message":"provider overloaded"')
+	assert resp.body.contains('"code":"provider_overloaded"')
+	assert !resp.body.contains('data:')
+}
+
+fn test_openai_gateway_plugin_mapper_error_is_openai_sse_error() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_mapper_error_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	request_log := os.join_path(temp_dir, 'upstream.request.txt')
+	ready_file := os.join_path(temp_dir, 'upstream.ready')
+	plugin_file := openai_integration_write_mapper_error_plugin(temp_dir)
+	spawn openai_integration_mock_upstream(upstream_port, 'custom_ndjson', request_log,
+		ready_file)
+	openai_integration_wait_for_file(ready_file)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file)
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	header.add(.accept, 'text/event-stream')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/chat/completions'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","stream":true,"messages":[]}'
+	) or { panic(err) }
+	assert resp.status_code == 200
+	assert resp.body.contains('data: {"error":')
+	assert resp.body.contains('"message":"mapper refused frame"')
+	assert resp.body.contains('"code":"mapper_error"')
+	assert resp.body.contains('data: [DONE]')
+}
+
+fn test_openai_gateway_invalid_plugin_plan_returns_openai_error() {
+	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_bad_plan_integration_test')
+	os.rmdir_all(temp_dir) or {}
+	os.mkdir_all(temp_dir) or { panic(err) }
+	defer {
+		os.rmdir_all(temp_dir) or {}
+	}
+	upstream_port, gateway_port := openai_integration_free_port_pair()
+	plugin_file := openai_integration_write_bad_plugin(temp_dir)
+	spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file)
+	openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health')
+	mut header := http.new_header()
+	header.add(.content_type, 'application/json')
+	resp := http.fetch(
+		url:    'http://127.0.0.1:${gateway_port}/v1/chat/completions'
+		method: .post
+		header: header
+		data:   '{"model":"public-model","messages":[]}'
+	) or { panic(err) }
+	assert resp.status_code == 502
+	assert resp.body.contains('"code":"openai_plugin_plan_invalid_method"')
+	assert resp.body.contains('unsupported upstream method TRACE')
+}

From 24197257c61b956fb5e4cf3435a303c3d75b333d Mon Sep 17 00:00:00 2001
From: weigang <guweigang@bullsoft.org>
Date: Wed, 6 May 2026 09:59:11 +0800
Subject: [PATCH 04/10] docs: add OpenAI gateway examples

---
 docs/OPENAI_AGGREGATION_GATEWAY_PLAN.md       | 646 ++++++++++++++++++
 .../openai-gateway-dashscope-coding.toml      |  45 ++
 examples/config/openai-gateway.toml           |  64 ++
 .../vjsx/openai-dashscope-coding-plugin.mts   |  74 ++
 examples/vjsx/openai-executor-app.mts         | 101 +++
 examples/vjsx/openai-gateway-plugin.mts       | 212 ++++++
 6 files changed, 1142 insertions(+)
 create mode 100644 docs/OPENAI_AGGREGATION_GATEWAY_PLAN.md
 create mode 100644 examples/config/openai-gateway-dashscope-coding.toml
 create mode 100644 examples/config/openai-gateway.toml
 create mode 100644 examples/vjsx/openai-dashscope-coding-plugin.mts
 create mode 100644 examples/vjsx/openai-executor-app.mts
 create mode 100644 examples/vjsx/openai-gateway-plugin.mts

diff --git a/docs/OPENAI_AGGREGATION_GATEWAY_PLAN.md b/docs/OPENAI_AGGREGATION_GATEWAY_PLAN.md
new file mode 100644
index 0000000..df46a4d
--- /dev/null
+++ b/docs/OPENAI_AGGREGATION_GATEWAY_PLAN.md
@@ -0,0 +1,646 @@
+# OpenAI Aggregation Gateway Plan
+
+## Goal
+
+Build vhttpd into an OpenAI-compatible aggregation gateway.
+
+The important boundary is:
+
+- vhttpd owns network execution, stream lifecycle, SSE writing, upstream HTTP,
+  timeout/cancellation, tracing, auth envelope, and backpressure.
+- vjsx owns protocol intelligence: OpenAI compatibility mapping, model routing,
+  backend-specific request/response shaping, validation, and policy.
+
+In short, vhttpd should keep the data plane. vjsx should act as a protocol
+plugin and planning/mapping layer.
+
+## Why This Shape
+
+OpenAI-compatible aggregation has two very different responsibilities.
+
+The first is physical IO: accepting client HTTP requests, holding long-running
+connections, reading upstream streams, detecting disconnects, enforcing
+timeouts, and writing SSE frames. This belongs in vhttpd because it is closer to
+the server runtime and existing stream/upstream machinery.
+
+The second is protocol adaptation: deciding which backend should serve a model,
+converting OpenAI requests into upstream-specific requests, normalizing provider
+quirks, and mapping chunks back into OpenAI-compatible responses. This is a good
+fit for vjsx because TypeScript has mature libraries and schemas for this
+ecosystem, and protocol logic can evolve faster outside the core server.
+
+## Runtime Boundary
+
+```text
+client
+  -> vhttpd /v1/*
+  -> vhttpd parses request, auth, trace, lifecycle
+  -> vjsx protocol plugin returns a declarative plan
+  -> vhttpd executes upstream HTTP/executor plan
+  -> vhttpd decodes frames: sse | ndjson | json | text
+  -> optional vjsx frame mapper
+  -> vhttpd writes OpenAI-compatible JSON/SSE
+```
+
+vjsx should not own sockets for this feature. It should return plans and mapping
+decisions. vhttpd should own the actual fetch, stream read, and client write.
+
+## Responsibilities
+
+### vhttpd Owns
+
+- `/v1/*` HTTP dispatch surface.
+- Client connection takeover and SSE/chunked response writing.
+- Upstream HTTP execution.
+- Upstream stream decoding at the transport/framing layer.
+- Cancellation when the client disconnects.
+- Timeout and retry hooks.
+- Request ids, trace ids, response headers, access logs, and admin snapshots.
+- Fast-path passthrough for already OpenAI-compatible upstream streams.
+
+### vjsx Owns
+
+- Model alias and route selection.
+- Backend-specific request construction.
+- OpenAI request normalization and validation.
+- Upstream response and error mapping.
+- Provider-specific quirks.
+- Optional policy: fallback, tenant routing, capability selection.
+
+## Configuration Shape
+
+Prefer named map sections, matching the existing vhttpd style.
+
+```toml
+[openai]
+enabled = true
+base_path = "/v1"
+plugin = "openai-gateway"
+
+[openai.endpoints]
+models = true
+chat_completions = true
+responses = true
+embeddings = false
+
+[openai.routes.gpt4omini]
+models = ["gpt-4o-mini", "gpt-4o-mini-*"]
+backend = "openai-main"
+upstream_model = "gpt-4o-mini"
+
+[openai.routes.local-chat]
+models = ["llama3.1", "qwen2.5"]
+backend = "ollama-local"
+
+[openai.routes.agent]
+models = ["my-agent", "company-assistant"]
+backend = "agent-vjsx"
+
+[openai.backends.openai-main]
+kind = "openai_http"
+base_url = "https://api.openai.com/v1"
+api_key_env = "OPENAI_API_KEY"
+stream_mode = "passthrough"
+
+[openai.backends.ollama-local]
+kind = "http"
+base_url = "http://127.0.0.1:11434"
+stream_mode = "mapped"
+protocol_plugin = "openai_ollama"
+
+[openai.backends.agent-vjsx]
+kind = "executor"
+executor = "agent-vjsx"
+stream_mode = "vhttpd_sse"
+
+[plugins.agent-vjsx]
+kind = "vjsx"
+entry = "plugins/agent-executor.mts"
+runtime_profile = "node"
+enable_network = true
+
+[plugins.openai-gateway]
+kind = "vjsx"
+entry = "plugins/openai-gateway.mts"
+runtime_profile = "node"
+thread_count = 1
+enable_network = false
+```
+
+Site-level overrides should be allowed later:
+
+```toml
+[sites.ai_gateway]
+host = "127.0.0.1"
+port = 19890
+openai.enabled = true
+openai.base_path = "/v1"
+```
+
+## Protocol Plugin Contract
+
+The plugin should return declarative plans, not perform network IO.
+
+TypeScript shape:
+
+```ts
+type OpenAIPluginRequest = {
+  plugin: string;
+  capability: "openai";
+  op:
+    | "models"
+    | "chat.route"
+    | "chat.execute"
+    | "chat.fallback"
+    | "chat.map_frame"
+    | "responses.route"
+    | "responses.execute"
+    | string;
+  request_id: string;
+  trace_id: string;
+  payload: string;
+  metadata: Record<string, string>;
+};
+
+type OpenAIModelsResult =
+  | { not_handled: true }
+  | { models: string[] }
+  | { data: Array<{ id: string }> };
+
+type OpenAIChatRoutePlan =
+  | { not_handled: true }
+  | {
+      backend: string;
+      method?: "GET" | "POST" | "PUT" | "PATCH" | "DELETE" | "HEAD";
+      path?: `/${string}`;
+      headers?: Record<string, string>;
+      body?: string;
+      upstream_model?: string;
+      stream_mode?: "passthrough" | "mapped";
+      response_codec?: "sse" | "json" | "ndjson" | "text";
+      output_protocol?: "openai.chat.completion";
+      mapper?: "builtin" | "plugin";
+    };
+```
+
+Example:
+
+```ts
+export function openai(req) {
+  switch (req.op) {
+    case "models":
+      return { models: ["gpt-4o-mini"] };
+
+    case "chat.route": {
+      const payload = JSON.parse(req.payload);
+      return {
+        backend: "openai-main",
+        method: "POST",
+        path: "/chat/completions",
+        headers: {},
+        body: payload.body,
+        stream_mode: "passthrough",
+      };
+    }
+
+    default:
+      return { not_handled: true };
+  }
+}
+```
+
+Example upstream plan:
+
+```ts
+return {
+  backend: "ollama-local",
+  method: "POST",
+  path: "/api/chat",
+  headers: {},
+  body: JSON.stringify({ model: "llama3.1", messages, stream: true }),
+  stream_mode: "mapped",
+  response_codec: "ndjson",
+  output_protocol: "openai.chat.completion",
+  mapper: "builtin",
+};
+```
+
+vhttpd executes the plan and exposes framed upstream data back to the plugin
+only when mapping is required.
+
+Plugin frame mapper example:
+
+```ts
+export function openai(req) {
+  if (req.op === "chat.map_frame") {
+    const payload = JSON.parse(req.payload);
+    const frame = JSON.parse(payload.frame);
+    return {
+      content: frame.delta ?? "",
+      tool_calls: frame.tool_calls ?? undefined,
+      finish_reason: frame.tool_calls ? "tool_calls" : undefined,
+      done: frame.finished === true,
+    };
+  }
+  return { not_handled: true };
+}
+```
+
+Plugin fallback example:
+
+```ts
+export function openai(req) {
+  if (req.op === "chat.fallback") {
+    const payload = JSON.parse(req.payload);
+    if (payload.failed_backend !== "primary" || payload.status_code < 500) {
+      return { not_handled: true };
+    }
+    return {
+      backend: "backup",
+      method: "POST",
+      path: "/chat/completions",
+      body: payload.body,
+      stream_mode: "passthrough",
+    };
+  }
+  return { not_handled: true };
+}
+```
+
+## Executor Backend Contract
+
+An executor backend is used when vhttpd should not directly call the upstream
+HTTP API. This is the escape hatch for private SDKs, non-OpenAI-compatible
+protocols, multi-step agent logic, or provider-specific network behavior.
+
+Configuration:
+
+```toml
+[openai.backends.custom_executor]
+kind = "executor"
+executor = "custom_executor"
+
+[plugins.custom_executor]
+kind = "vjsx"
+entry = "examples/vjsx/openai-executor-app.mts"
+runtime_profile = "node"
+enable_network = true
+```
+
+The OpenAI routing plugin can select this backend:
+
+```ts
+return {
+  backend: "custom_executor",
+  method: "POST",
+  path: "/executor/chat",
+  body: payload.body,
+  stream_mode: "executor",
+};
+```
+
+The executor app only needs to implement the `openai(req)` entry and handle
+`req.op === "chat.execute"` for Chat Completions or
+`req.op === "responses.execute"` for Responses.
+
+Request shape:
+
+```ts
+type OpenAIExecutorRequest = {
+  plugin: string;
+  capability: "openai";
+  op: "chat.execute" | "responses.execute";
+  request_id: string;
+  trace_id: string;
+  payload: string;
+  metadata: {
+    model?: string;
+    backend?: string;
+  };
+};
+
+type OpenAIExecutorPayload = {
+  method: string;
+  path: string;
+  model: string;
+  stream: boolean;
+  body: string;
+  backend: string;
+  request_id: string;
+  trace_id: string;
+  response_codec?: string;
+  output_protocol?: "openai.chat.completion" | "openai.response";
+};
+```
+
+Minimal executor:
+
+```ts
+export async function openai(req) {
+  if (req.op !== "chat.execute") {
+    return { not_handled: true };
+  }
+  const payload = JSON.parse(req.payload);
+  const body = JSON.parse(payload.body);
+
+  // Call a private SDK or non-OpenAI HTTP API here.
+  if (payload.stream) {
+    return {
+      frames: [
+        { content: "hello", done: false },
+        { usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, done: true },
+      ],
+    };
+  }
+
+  return {
+    content: "hello",
+    usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+    done: true,
+  };
+}
+```
+
+Non-stream normalized result:
+
+```ts
+return {
+  content: "hello",
+  usage: {
+    prompt_tokens: 10,
+    completion_tokens: 3,
+    total_tokens: 13,
+  },
+  done: true,
+};
+```
+
+vhttpd turns that into an OpenAI `chat.completion` response.
+
+Non-stream full OpenAI body:
+
+```ts
+return {
+  body: JSON.stringify({
+    id: "chatcmpl-custom",
+    object: "chat.completion",
+    choices: [
+      {
+        index: 0,
+        message: { role: "assistant", content: "hello" },
+        finish_reason: "stop",
+      },
+    ],
+  }),
+};
+```
+
+Stream result:
+
+```ts
+return {
+  frames: [
+    { content: "hello ", done: false },
+    { content: "world", done: false },
+    {
+      usage: {
+        prompt_tokens: 10,
+        completion_tokens: 2,
+        total_tokens: 12,
+      },
+      done: true,
+    },
+  ],
+};
+```
+
+vhttpd writes these frames as OpenAI SSE and appends `data: [DONE]`.
+
+Tool call frame:
+
+```ts
+return {
+  frames: [
+    {
+      tool_calls: [
+        {
+          index: 0,
+          id: "call_1",
+          type: "function",
+          function: {
+            name: "search",
+            arguments: "{\"q\":\"vhttpd\"}",
+          },
+        },
+      ],
+      finish_reason: "tool_calls",
+      done: true,
+    },
+  ],
+};
+```
+
+Error result:
+
+```ts
+return {
+  error: {
+    message: "custom provider failed",
+  },
+};
+```
+
+Current executor behavior:
+
+- Executor apps may perform network access when their plugin config enables it.
+- vhttpd still owns the client-facing OpenAI HTTP/SSE response.
+- Non-stream executor results are normalized into OpenAI JSON unless `body` is
+  returned.
+- Stream executor results can return either buffered `frames: [...]` or an async
+  iterable. Async iterable results are pulled by vhttpd through vjsx
+  `RuntimeSession.stream_value(...)`, so each yielded frame is written as SSE
+  before the next frame is requested.
+
+Current plan validation:
+
+- `backend` is required and must name a configured backend.
+- `method` defaults to `POST` and must be one of `GET`, `POST`, `PUT`,
+  `PATCH`, `DELETE`, `HEAD`.
+- `path` defaults to `/chat/completions`, must start with `/`, and must not
+  contain newlines.
+- `stream_mode` defaults to `passthrough`; `mapped` is supported for OpenAI
+  chat completion mapping.
+- `mapped` currently supports `response_codec = "ndjson"` for streaming and
+  `response_codec = "ndjson" | "json"` for non-stream aggregation.
+- `output_protocol` defaults to `openai.chat.completion`.
+- `mapper` defaults to `builtin`; `plugin` calls `openai(req)` with
+  `req.op = "chat.map_frame"` per decoded upstream frame.
+- hop-by-hop headers such as `Connection`, `Content-Length`,
+  `Transfer-Encoding`, `Host`, and `Upgrade` are ignored.
+
+## Stream Modes
+
+### passthrough
+
+For OpenAI-compatible upstreams. vhttpd forwards the request upstream and writes
+the upstream response back to the client with minimal intervention.
+
+Useful for:
+
+- OpenAI official API.
+- OpenAI-compatible providers.
+- Other aggregation gateways.
+
+vjsx is used for route/build-start/error hooks, not per-token mapping.
+
+### mapped
+
+For non-OpenAI upstreams such as Ollama NDJSON. vhttpd decodes the upstream
+framing and calls vjsx or a built-in mapper to emit OpenAI-compatible chunks.
+
+Useful for:
+
+- Ollama `/api/chat` NDJSON.
+- custom JSONL/NDJSON model servers.
+- providers with incompatible stream shape.
+
+### vhttpd_sse
+
+For executor backends where PHP/vjsx returns normalized events or frames, but
+vhttpd still owns the client-facing SSE writer.
+
+Useful for:
+
+- inproc vjsx agent executors.
+- PHP application executors.
+- local business logic pretending to be an OpenAI model.
+
+## Initial MVP
+
+1. Add OpenAI config structs and admin snapshot fields.
+2. Add `ProviderRouteKind.openai`.
+3. Add `/v1/models` and `/v1/chat/completions` dispatch behind `[openai]`.
+4. Implement `openai_http` backend with non-stream and SSE passthrough.
+5. Add vjsx hook for route/buildUpstream.
+6. Add built-in OpenAI SSE writer:
+   - `data: {...}\n\n`
+   - `data: [DONE]\n\n`
+7. Add fixture tests for OpenAI-compatible mock upstream.
+
+## Current Implementation Slice
+
+The first slice keeps the network path in vhttpd and implements the
+OpenAI-compatible passthrough path directly:
+
+- `[openai]` config, named `[openai.backends.*]`, and named
+  `[openai.routes.*]`.
+- `[plugins.*]` config for capability plugins that do not replace the site
+  executor.
+- `/v1/models` generated from configured route models, or from the
+  OpenAI plugin `models` operation.
+- `/v1/chat/completions` routed by request `model`, or planned by the OpenAI
+  plugin `chat.route` operation.
+- `openai_http` upstream backend with configured `base_url` and API key from
+  `api_key` or `api_key_env`.
+- non-stream request passthrough, with optional model rewrite when
+  `upstream_model` is configured.
+- stream request passthrough where vhttpd takes over the client connection and
+  forwards upstream SSE bytes.
+- mapped Ollama-style NDJSON streams where vhttpd decodes upstream JSON lines
+  and emits OpenAI chat completion SSE chunks.
+- mapped non-stream `ndjson`/`json` responses aggregated into an OpenAI chat
+  completion response.
+- executor backends using a vjsx app via `chat.execute`, for providers that
+  need custom SDK/network logic outside vhttpd's HTTP/mapped fetch path.
+- `/v1/responses` create endpoint with non-stream and stream passthrough. The
+  built-in route resolver reuses `[openai.routes.*]` and sends upstream traffic
+  to `/responses`.
+- Responses stateful passthrough for paths under `/v1/responses/*`, including
+  retrieve, cancel, input item listing, and future upstream-defined subroutes.
+  vhttpd preserves the query string and still applies backend auth, trace
+  headers, and error normalization.
+- Responses executor backends using `responses.execute`. Non-stream executors
+  may return a Response object or `{ body }`; stream executors may return an
+  async iterable of typed Responses events.
+- In-memory Responses registry for executor-owned responses. vhttpd stores
+  completed executor Responses in a TTL-backed `MemoryStateStore` and serves
+  `GET /v1/responses/{id}` locally when the id is known; unknown ids continue
+  to upstream passthrough.
+- plugin frame mapper hook for provider-specific stream frames that the
+  built-in mapper does not understand.
+- upstream non-2xx responses normalized into OpenAI error envelopes.
+- streaming upstream non-2xx responses normalized before SSE headers are
+  written.
+- plugin frame mapper errors normalized into OpenAI-style SSE error frames.
+- `chat.fallback` plugin hook: vhttpd retries once with a fallback plan when
+  upstream fetch fails or returns non-2xx.
+- stream-safe fallback for passthrough streams: fallback is allowed only before
+  client SSE headers are written; after streaming begins, vhttpd sends an
+  OpenAI-style SSE error instead of switching backend.
+- stream-safe fallback for mapped NDJSON streams using the same pre-SSE-header
+  boundary.
+- tool call chunk normalization for mapped streams: built-in and plugin mappers
+  can emit OpenAI-compatible `delta.tool_calls` and `finish_reason =
+  "tool_calls"`.
+- non-stream mapped NDJSON tool calls are aggregated into final
+  `message.tool_calls`, including incremental `function.arguments` chunks.
+- non-stream mapped usage normalization from OpenAI-style `usage` or
+  Ollama-style `prompt_eval_count`/`eval_count` into OpenAI
+  `usage.prompt_tokens`, `usage.completion_tokens`, and `usage.total_tokens`.
+- stream mapped usage emits a final OpenAI-compatible chunk with `choices: []`
+  and `usage` before `data: [DONE]` when upstream usage is available.
+- optional vjsx OpenAI plugin hook through a single `openai(req)` entry. vhttpd
+  passes `req.op` values such as `models`, `chat.route`, and
+  `responses.route`; `{ not_handled: true }` falls back to built-in config
+  behavior.
+
+The plugin hook is intentionally scoped: it can route/build the upstream plan,
+but it does not own sockets, fetch, or client streaming.
+
+## Second Phase
+
+1. Add provider-specific error code taxonomy.
+2. Add retry/fallback policy limits and observability fields.
+3. Add provider-specific Responses routing examples beyond passthrough and
+   executor.
+
+## Later Phases
+
+- durable persistence for Responses objects when executor state must survive
+  process restart or be shared across vhttpd instances.
+- embeddings
+- tool call chunk normalization
+- usage aggregation
+- tenant-aware routing
+- weighted routing and health checks
+- per-key quota hooks
+- request/response audit events
+- admin UI/runtime snapshots
+
+## Testing Strategy
+
+Default tests should avoid npm and network dependencies.
+
+Use local mock upstreams in V tests for:
+
+- OpenAI-compatible JSON response.
+- OpenAI-compatible SSE response.
+- upstream disconnect.
+- malformed SSE.
+- timeout/cancellation.
+- route miss.
+
+Use optional vjsx/npm integration fixtures for:
+
+- `openai` SDK non-stream and stream.
+- AI SDK `generateText`.
+- AI SDK `streamText`.
+
+The optional fixture should not run in the default `v test` suite.
+
+## Key Design Rule
+
+Do not let protocol plugins own the socket.
+
+The plugin can decide, normalize, and map. vhttpd should execute, stream, cancel,
+observe, and write.
diff --git a/examples/config/openai-gateway-dashscope-coding.toml b/examples/config/openai-gateway-dashscope-coding.toml
new file mode 100644
index 0000000..95e6652
--- /dev/null
+++ b/examples/config/openai-gateway-dashscope-coding.toml
@@ -0,0 +1,45 @@
+[server]
+host = "127.0.0.1"
+port = 18082
+
+[paths]
+root = "../.."
+
+[openai]
+enabled = true
+base_path = "/v1"
+default_backend = "bailian_coding"
+plugin = "openai_gateway"
+
+[plugins.openai_gateway]
+kind = "vjsx"
+entry = "${paths.root}/examples/vjsx/openai-dashscope-coding-plugin.mts"
+runtime_profile = "node"
+thread_count = 1
+enable_network = false
+
+[openai.backends.bailian_coding]
+kind = "openai_http"
+base_url = "https://coding.dashscope.aliyuncs.com/v1"
+api_key_env = "BAILIAN_CODING_API_KEY"
+timeout_ms = 60000
+
+[openai.backends.ollama]
+kind = "http"
+base_url = "http://127.0.0.1:11434"
+timeout_ms = 60000
+
+[openai.routes.bailian_coding_models]
+models = [
+  "qwen3.6-plus",
+  "qwen3.5-plus",
+  "qwen3-coder-plus",
+  "glm-5",
+  "kimi-k2.5",
+  "MiniMax-M2.5",
+]
+backend = "bailian_coding"
+
+[openai.routes.minimax_m2]
+models = ["minimax-m2:cloud"]
+backend = "ollama"
diff --git a/examples/config/openai-gateway.toml b/examples/config/openai-gateway.toml
new file mode 100644
index 0000000..35ee5ef
--- /dev/null
+++ b/examples/config/openai-gateway.toml
@@ -0,0 +1,64 @@
+[server]
+host = "127.0.0.1"
+port = 18081
+
+[openai]
+enabled = true
+base_path = "/v1"
+default_backend = "openai"
+plugin = "openai_gateway"
+
+[plugins.openai_gateway]
+kind = "vjsx"
+entry = "${paths.root}/examples/vjsx/openai-gateway-plugin.mts"
+runtime_profile = "node"
+thread_count = 1
+enable_network = false
+
+[plugins.custom_executor]
+kind = "vjsx"
+entry = "${paths.root}/examples/vjsx/openai-executor-app.mts"
+runtime_profile = "node"
+thread_count = 2
+enable_network = true
+
+[openai.backends.openai]
+kind = "openai_http"
+base_url = "https://api.openai.com/v1"
+api_key_env = "OPENAI_API_KEY"
+timeout_ms = 60000
+
+[openai.backends.ollama]
+kind = "http"
+base_url = "http://127.0.0.1:11434"
+timeout_ms = 60000
+
+[openai.backends.custom]
+kind = "http"
+base_url = "http://127.0.0.1:19090"
+timeout_ms = 60000
+
+[openai.backends.custom_executor]
+kind = "executor"
+executor = "custom_executor"
+timeout_ms = 60000
+
+[openai.routes.gpt_4o_mini]
+models = ["gpt-4o-mini"]
+backend = "openai"
+
+[openai.routes.gpt_4_1_mini]
+models = ["gpt-4.1-mini"]
+backend = "openai"
+
+[openai.routes.llama3_1]
+models = ["llama3.1"]
+backend = "ollama"
+
+[openai.routes.custom_agent]
+models = ["custom-agent"]
+backend = "custom"
+
+[openai.routes.executor_agent]
+models = ["executor-agent"]
+backend = "custom_executor"
diff --git a/examples/vjsx/openai-dashscope-coding-plugin.mts b/examples/vjsx/openai-dashscope-coding-plugin.mts
new file mode 100644
index 0000000..e2bff9e
--- /dev/null
+++ b/examples/vjsx/openai-dashscope-coding-plugin.mts
@@ -0,0 +1,74 @@
+type PluginRequest = {
+  op: string;
+  payload: string;
+};
+
+type ChatPayload = {
+  model: string;
+  stream: boolean;
+  body: string;
+};
+
+function payload<T>(req: PluginRequest): T {
+  return JSON.parse(req.payload || "{}") as T;
+}
+
+function chatBody(input: ChatPayload): Record<string, any> {
+  return JSON.parse(input.body || "{}");
+}
+
+function routeChat(req: PluginRequest) {
+  const input = payload<ChatPayload>(req);
+  const body = chatBody(input);
+  const model = input.model || body.model || "";
+
+  if (model === "llama3.1" || model === "minimax-m2:cloud") {
+    return {
+      backend: "ollama",
+      method: "POST",
+      path: "/api/chat",
+      body: JSON.stringify({
+        model: model === "minimax-m2:cloud" ? "minimax_m2" : "llama3.1",
+        messages: body.messages || [],
+        tools: body.tools,
+        stream: input.stream,
+      }),
+      stream_mode: "mapped",
+      response_codec: "ndjson",
+      output_protocol: "openai.chat.completion",
+      mapper: "builtin",
+    };
+  }
+
+  body.model = model || body.model;
+
+  return {
+    backend: "bailian_coding",
+    method: "POST",
+    path: "/chat/completions",
+    body: JSON.stringify(body),
+    stream_mode: "passthrough",
+  };
+}
+
+export function openai(req: PluginRequest) {
+  switch (req.op) {
+    case "models":
+      return {
+        models: [
+          "qwen3.6-plus",
+          "qwen3.5-plus",
+          "qwen3-coder-plus",
+          "glm-5",
+          "kimi-k2.5",
+          "MiniMax-M2.5",
+          "llama3.1",
+          "minimax-m2:cloud",
+        ],
+      };
+    case "chat.route":
+      return routeChat(req);
+    default:
+      return { not_handled: true };
+  }
+}
diff --git a/examples/vjsx/openai-executor-app.mts b/examples/vjsx/openai-executor-app.mts
new file mode 100644
index 0000000..dffa75e
--- /dev/null
+++ b/examples/vjsx/openai-executor-app.mts
@@ -0,0 +1,101 @@
+type PluginRequest = {
+  op: string;
+  payload: string;
+};
+
+function payload(req: PluginRequest): Record<string, any> {
+  return JSON.parse(req.payload || "{}");
+}
+
+async function* streamFrames(prompt: string) {
+  yield { content: "executor: ", done: false };
+  yield { content: prompt || "ok", done: false };
+  yield {
+    usage: {
+      prompt_tokens: Math.max(1, prompt.length),
+      completion_tokens: 2,
+      total_tokens: Math.max(1, prompt.length) + 2,
+    },
+    done: true,
+  };
+}
+
+async function* responseEvents(prompt: string) {
+  yield {
+    type: "response.created",
+    response: {
+      id: "resp_vhttpd_executor",
+      object: "response",
+      status: "in_progress",
+    },
+    sequence_number: 1,
+  };
+  yield {
+    type: "response.output_text.delta",
+    delta: `executor: ${prompt || "ok"}`,
+    output_index: 0,
+    content_index: 0,
+    sequence_number: 2,
+  };
+  yield {
+    type: "response.completed",
+    response: {
+      id: "resp_vhttpd_executor",
+      object: "response",
+      status: "completed",
+    },
+    sequence_number: 3,
+  };
+}
+
+export async function openai(req: PluginRequest) {
+  if (req.op !== "chat.execute" && req.op !== "responses.execute") {
+    return { not_handled: true };
+  }
+
+  const p = payload(req);
+  const body = JSON.parse(p.body || "{}");
+  const prompt = (body.messages || []).map((m: any) => m.content).join("\n");
+
+  if (req.op === "responses.execute") {
+    if (p.stream) {
+      return responseEvents(prompt);
+    }
+
+    return {
+      id: "resp_vhttpd_executor",
+      object: "response",
+      status: "completed",
+      model: p.model,
+      output: [{
+        id: "msg_vhttpd_executor",
+        type: "message",
+        status: "completed",
+        role: "assistant",
+        content: [{
+          type: "output_text",
+          text: `executor: ${prompt || "ok"}`,
+          annotations: [],
+        }],
+      }],
+    };
+  }
+
+  // This is where a real executor app can call a private SDK or a
+  // non-OpenAI-compatible HTTP service. The result returned to vhttpd is
+  // normalized frames/data; vhttpd still owns the client-facing OpenAI response.
+
+  if (p.stream) {
+    return streamFrames(prompt);
+  }
+
+  return {
+    content: `executor: ${prompt || "ok"}`,
+    usage: {
+      prompt_tokens: Math.max(1, prompt.length),
+      completion_tokens: 2,
+      total_tokens: Math.max(1, prompt.length) + 2,
+    },
+    done: true,
+  };
+}
diff --git a/examples/vjsx/openai-gateway-plugin.mts b/examples/vjsx/openai-gateway-plugin.mts
new file mode 100644
index 0000000..d54f8eb
--- /dev/null
+++ b/examples/vjsx/openai-gateway-plugin.mts
@@ -0,0 +1,212 @@
+type PluginRequest = {
+  op: string;
+  payload: string;
+  request_id?: string;
+  trace_id?: string;
+  metadata?: Record<string, string>;
+};
+
+type ChatPayload = {
+  model: string;
+  stream: boolean;
+  body: string;
+};
+
+type FallbackPayload = {
+  body: string;
+  failed_backend: string;
+  status_code: number;
+  error_code: string;
+  error_message: string;
+};
+
+type MapFramePayload = {
+  frame: string;
+};
+
+const publicModels = [
+  "gpt-4o-mini",
+  "llama3.1",
+  "custom-agent",
+  "executor-agent",
+];
+
+function jsonPayload<T>(req: PluginRequest): T {
+  return JSON.parse(req.payload || "{}") as T;
+}
+
+function chatBody(payload: ChatPayload): Record<string, any> {
+  return JSON.parse(payload.body || "{}");
+}
+
+function openaiPassthrough(payload: ChatPayload) {
+  const body = chatBody(payload);
+  return {
+    backend: "openai",
+    method: "POST",
+    path: "/chat/completions",
+    body: JSON.stringify(body),
+    stream_mode: "passthrough",
+  };
+}
+
+function ollamaMapped(payload: ChatPayload) {
+  const body = chatBody(payload);
+  return {
+    backend: "ollama",
+    method: "POST",
+    path: "/api/chat",
+    body: JSON.stringify({
+      model: "llama3.1",
+      messages: body.messages || [],
+      tools: body.tools,
+      stream: payload.stream,
+    }),
+    stream_mode: "mapped",
+    response_codec: "ndjson",
+    output_protocol: "openai.chat.completion",
+    mapper: "builtin",
+  };
+}
+
+function customMapped(payload: ChatPayload) {
+  const body = chatBody(payload);
+  return {
+    backend: "custom",
+    method: "POST",
+    path: "/chat",
+    body: JSON.stringify({
+      prompt: (body.messages || []).map((m: any) => m.content).join("\n"),
+      stream: payload.stream,
+    }),
+    stream_mode: "mapped",
+    response_codec: "ndjson",
+    output_protocol: "openai.chat.completion",
+    mapper: "plugin",
+  };
+}
+
+function routeChat(req: PluginRequest) {
+  const payload = jsonPayload<ChatPayload>(req);
+  const model = payload.model || chatBody(payload).model || "";
+
+  if (model === "llama3.1") {
+    return ollamaMapped(payload);
+  }
+
+  if (model === "custom-agent") {
+    return customMapped(payload);
+  }
+
+  if (model === "executor-agent") {
+    return {
+      backend: "custom_executor",
+      method: "POST",
+      path: "/executor/chat",
+      body: payload.body,
+      stream_mode: "executor",
+    };
+  }
+
+  return openaiPassthrough(payload);
+}
+
+function routeResponses(req: PluginRequest) {
+  const payload = jsonPayload<ChatPayload>(req);
+  const body = chatBody(payload);
+  const model = payload.model || body.model || "";
+
+  if (model === "executor-agent") {
+    return {
+      backend: "custom_executor",
+      method: "POST",
+      path: "/executor/responses",
+      body: payload.body,
+      stream_mode: "executor",
+      output_protocol: "openai.response",
+    };
+  }
+
+  return {
+    backend: "openai",
+    method: "POST",
+    path: "/responses",
+    body: JSON.stringify(body),
+    stream_mode: "passthrough",
+    output_protocol: "openai.response",
+  };
+}
+
+function mapCustomFrame(req: PluginRequest) {
+  const payload = jsonPayload<MapFramePayload>(req);
+  const frame = JSON.parse(payload.frame || "{}");
+
+  if (frame.error) {
+    return { error: { message: String(frame.error) } };
+  }
+
+  if (frame.tool_call) {
+    return {
+      tool_calls: [{
+        index: frame.tool_call.index || 0,
+        id: frame.tool_call.id,
+        type: "function",
+        function: {
+          name: frame.tool_call.name,
+          arguments: frame.tool_call.arguments || "",
+        },
+      }],
+      finish_reason: "tool_calls",
+      done: frame.done === true,
+    };
+  }
+
+  return {
+    content: frame.delta || frame.text || "",
+    usage: frame.usage,
+    done: frame.done === true,
+  };
+}
+
+function fallback(req: PluginRequest) {
+  const payload = jsonPayload<FallbackPayload>(req);
+
+  if (payload.failed_backend === "openai" && payload.status_code >= 500) {
+    const original = JSON.parse(payload.body || "{}");
+    original.model = "llama3.1";
+    original.stream = original.stream === true;
+    return {
+      backend: "ollama",
+      method: "POST",
+      path: "/api/chat",
+      body: JSON.stringify({
+        model: "llama3.1",
+        messages: original.messages || [],
+        stream: original.stream,
+      }),
+      stream_mode: "mapped",
+      response_codec: "ndjson",
+      output_protocol: "openai.chat.completion",
+      mapper: "builtin",
+    };
+  }
+
+  return { not_handled: true };
+}
+
+export function openai(req: PluginRequest) {
+  switch (req.op) {
+    case "models":
+      return { models: publicModels };
+    case "chat.route":
+      return routeChat(req);
+    case "responses.route":
+      return routeResponses(req);
+    case "chat.map_frame":
+      return mapCustomFrame(req);
+    case "chat.fallback":
+      return fallback(req);
+    default:
+      return { not_handled: true };
+  }
+}

From a3e05eacc65889b172c68f4e25429116e846ee0d Mon Sep 17 00:00:00 2001
From: weigang <guweigang@bullsoft.org>
Date: Sun, 10 May 2026 17:36:30 +0800
Subject: [PATCH 05/10] state-store: avoid generic map value wrappers

---
 src/state_store.v      | 76 ++++++++++++++++++++++++++----------------
 src/state_store_test.v | 20 +++++++++++
 2 files changed, 67 insertions(+), 29 deletions(-)

diff --git a/src/state_store.v b/src/state_store.v
index 0f794ba..83fadf3 100644
--- a/src/state_store.v
+++ b/src/state_store.v
@@ -1,7 +1,9 @@
 module main
 
+import json
 import sync
 import time
+import x.json2
 
 pub interface StateStore[T] {
 mut:
@@ -17,23 +19,23 @@ mut:
 	clear()
 }
 
-struct StoredValue[T] {
+struct StoredValue {
 mut:
-	value          T
-	created_at_ms  i64
-	updated_at_ms  i64
-	expires_at_ms  i64
+	value         json2.Any
+	created_at_ms i64
+	updated_at_ms i64
+	expires_at_ms i64
 }
 
 pub struct MemoryStateStore[T] {
 mut:
 	mu   sync.Mutex
-	data map[string]StoredValue[T]
+	data map[string]StoredValue
 }
 
 pub fn new_memory_state_store[T]() MemoryStateStore[T] {
 	return MemoryStateStore[T]{
-		data: map[string]StoredValue[T]{}
+		data: map[string]StoredValue{}
 	}
 }
 
@@ -48,10 +50,24 @@ fn state_store_expires_at_ms(ttl time.Duration) i64 {
 	return state_store_now_ms() + ttl.milliseconds()
 }
 
-fn state_store_is_expired[T](record StoredValue[T], now_ms i64) bool {
+fn state_store_is_expired(record StoredValue, now_ms i64) bool {
 	return record.expires_at_ms > 0 && record.expires_at_ms <= now_ms
 }
 
+fn state_store_encode_value[T](val T) !json2.Any {
+	$if T is string {
+		return json2.Any(val)
+	} $else $if T is $struct {
+		return json2.Any(json2.map_from[T](val))
+	} $else {
+		return json2.decode[json2.Any](json.encode(val))!
+	}
+}
+
+fn state_store_decode_value[T](val json2.Any) !T {
+	return json2.decode[T](val.json_str())!
+}
+
 pub fn (mut store MemoryStateStore[T]) get(key string) !T {
 	store.mu.@lock()
 	defer {
@@ -59,11 +75,11 @@ pub fn (mut store MemoryStateStore[T]) get(key string) !T {
 	}
 	if record := store.data[key] {
 		now_ms := state_store_now_ms()
-		if state_store_is_expired[T](record, now_ms) {
+		if state_store_is_expired(record, now_ms) {
 			store.data.delete(key)
 			return error('state_store_key_expired:${key}')
 		}
-		return record.value
+		return state_store_decode_value[T](record.value)!
 	}
 	return error('state_store_key_missing:${key}')
 }
@@ -79,15 +95,15 @@ pub fn (mut store MemoryStateStore[T]) set_with_ttl(key string, val T, ttl time.
 		store.mu.unlock()
 	}
 	if existing := store.data[key] {
-		store.data[key] = StoredValue[T]{
-			value:         val
+		store.data[key] = StoredValue{
+			value:         state_store_encode_value[T](val)!
 			created_at_ms: existing.created_at_ms
 			updated_at_ms: now_ms
 			expires_at_ms: state_store_expires_at_ms(ttl)
 		}
 	} else {
-		store.data[key] = StoredValue[T]{
-			value:         val
+		store.data[key] = StoredValue{
+			value:         state_store_encode_value[T](val)!
 			created_at_ms: now_ms
 			updated_at_ms: now_ms
 			expires_at_ms: state_store_expires_at_ms(ttl)
@@ -110,7 +126,7 @@ pub fn (mut store MemoryStateStore[T]) exists(key string) bool {
 	}
 	if record := store.data[key] {
 		now_ms := state_store_now_ms()
-		if state_store_is_expired[T](record, now_ms) {
+		if state_store_is_expired(record, now_ms) {
 			store.data.delete(key)
 			return false
 		}
@@ -128,7 +144,7 @@ pub fn (mut store MemoryStateStore[T]) keys() []string {
 	mut keys := []string{}
 	mut expired := []string{}
 	for key, record in store.data {
-		if state_store_is_expired[T](record, now_ms) {
+		if state_store_is_expired(record, now_ms) {
 			expired << key
 			continue
 		}
@@ -150,11 +166,11 @@ pub fn (mut store MemoryStateStore[T]) list() []T {
 	mut values := []T{}
 	mut expired := []string{}
 	for key, record in store.data {
-		if state_store_is_expired[T](record, now_ms) {
+		if state_store_is_expired(record, now_ms) {
 			expired << key
 			continue
 		}
-		values << record.value
+		values << state_store_decode_value[T](record.value) or { continue }
 	}
 	for key in expired {
 		store.data.delete(key)
@@ -169,11 +185,13 @@ pub fn (mut store MemoryStateStore[T]) patch(key string, updater fn (mut T) !) !
 	}
 	if mut record := store.data[key] {
 		now_ms := state_store_now_ms()
-		if state_store_is_expired[T](record, now_ms) {
+		if state_store_is_expired(record, now_ms) {
 			store.data.delete(key)
 			return error('state_store_key_expired:${key}')
 		}
-		updater(mut record.value)!
+		mut value := state_store_decode_value[T](record.value)!
+		updater(mut value)!
+		record.value = state_store_encode_value[T](value)!
 		record.updated_at_ms = now_ms
 		store.data[key] = record
 		return
@@ -189,7 +207,7 @@ pub fn (mut store MemoryStateStore[T]) prune_expired() int {
 	now_ms := state_store_now_ms()
 	mut expired := []string{}
 	for key, record in store.data {
-		if state_store_is_expired[T](record, now_ms) {
+		if state_store_is_expired(record, now_ms) {
 			expired << key
 		}
 	}
@@ -214,7 +232,7 @@ pub fn (mut store MemoryStateStore[string]) compare_and_swap_set_with_ttl(key st
 		store.mu.unlock()
 	}
 	if mut existing := store.data[key] {
-		if state_store_is_expired[string](existing, now_ms) {
+		if state_store_is_expired(existing, now_ms) {
 			store.data.delete(key)
 			if expected_found {
 				return false
@@ -223,11 +241,11 @@ pub fn (mut store MemoryStateStore[string]) compare_and_swap_set_with_ttl(key st
 			if !expected_found {
 				return false
 			}
-			if existing.value != expected_value {
+			if state_store_decode_value[string](existing.value)! != expected_value {
 				return false
 			}
-			store.data[key] = StoredValue[string]{
-				value:         next_value
+			store.data[key] = StoredValue{
+				value:         state_store_encode_value[string](next_value)!
 				created_at_ms: existing.created_at_ms
 				updated_at_ms: now_ms
 				expires_at_ms: state_store_expires_at_ms(ttl)
@@ -238,8 +256,8 @@ pub fn (mut store MemoryStateStore[string]) compare_and_swap_set_with_ttl(key st
 	if expected_found {
 		return false
 	}
-	store.data[key] = StoredValue[string]{
-		value:         next_value
+	store.data[key] = StoredValue{
+		value:         state_store_encode_value[string](next_value)!
 		created_at_ms: now_ms
 		updated_at_ms: now_ms
 		expires_at_ms: state_store_expires_at_ms(ttl)
@@ -254,14 +272,14 @@ pub fn (mut store MemoryStateStore[string]) compare_and_swap_delete(key string,
 		store.mu.unlock()
 	}
 	if existing := store.data[key] {
-		if state_store_is_expired[string](existing, now_ms) {
+		if state_store_is_expired(existing, now_ms) {
 			store.data.delete(key)
 			return !expected_found
 		}
 		if !expected_found {
 			return false
 		}
-		if existing.value != expected_value {
+		if state_store_decode_value[string](existing.value)! != expected_value {
 			return false
 		}
 		store.data.delete(key)
diff --git a/src/state_store_test.v b/src/state_store_test.v
index 96ba2fb..7a08569 100644
--- a/src/state_store_test.v
+++ b/src/state_store_test.v
@@ -2,6 +2,12 @@ module main
 
 import time
 
+struct MemoryStateStoreTestRecord {
+	id     string
+	status string
+	count  int
+}
+
 fn test_memory_state_store_set_get_and_keys() {
 	mut store := new_memory_state_store[string]()
 	store.set('alpha', 'a') or { panic(err) }
@@ -14,6 +20,20 @@ fn test_memory_state_store_set_get_and_keys() {
 	assert store.list().len == 2
 }
 
+fn test_memory_state_store_roundtrips_struct_values() {
+	mut store := new_memory_state_store[MemoryStateStoreTestRecord]()
+	store.set('resp', MemoryStateStoreTestRecord{
+		id:     'resp_1'
+		status: 'completed'
+		count:  2
+	}) or { panic(err) }
+
+	record := store.get('resp') or { panic(err) }
+	assert record.id == 'resp_1'
+	assert record.status == 'completed'
+	assert record.count == 2
+}
+
 fn test_memory_state_store_ttl_expiry_and_prune() {
 	mut store := new_memory_state_store[string]()
 	store.set_with_ttl('short', 'x', 20 * time.millisecond) or { panic(err) }

From 9b6e00d7396246186bdc46bd6c938c9ce5c47325 Mon Sep 17 00:00:00 2001
From: weigang <guweigang@bullsoft.org>
Date: Sun, 10 May 2026 17:55:01 +0800
Subject: [PATCH 06/10] docs: clarify embedded vjsx runtime assets

---
 README.md                 | 9 +++++----
 scripts/doctor.sh         | 4 ++--
 scripts/runtime_doctor.sh | 6 +++---
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index f48fb28..8be840e 100644
--- a/README.md
+++ b/README.md
@@ -212,7 +212,7 @@ Practical summary:
 `vhttpd` now splits local dependencies into install profiles so you do not need to guess the full system package list up front.
 
 - `core`: base build dependencies for `vhttpd`
-- `vjsx`: embedded runtime support, including the required QuickJS archive placement under `~/.vmodules/vjsx/libs/`
+- `vjsx`: embedded runtime build support, including the required QuickJS archive placement under `~/.vmodules/vjsx/libs/`
 - `db`: alias for the default DB-capable dependency surface
 - `full`: everything above
 
@@ -242,15 +242,16 @@ What these targets do:
 
 - `make deps-core`: installs `openssl`, `Boehm GC`, `pkg-config`, and basic build tooling
 - `make deps-core` also installs SQLite/MySQL/PostgreSQL client development packages because default `vhttpd` builds now include DB support
-- `make deps-vjsx`: ensures `~/.vmodules/vjsx` exists and, on Linux, builds QuickJS and places it at `~/.vmodules/vjsx/libs/qjs_linux_x64.a`
+- `make deps-vjsx`: ensures the local `vjsx` module checkout exists for builds and, on Linux, builds QuickJS and places it at `~/.vmodules/vjsx/libs/qjs_linux_x64.a`
 - `make deps-db`: alias for the same default DB-capable build dependency set
 - `make doctor`: checks the current machine for the required commands, `pkg-config` entries, and `vjsx` QuickJS archive placement
 
-Important `vjsx` note:
+Important `vjsx` build note:
 
 - On Linux, `vjsx` does not just need "QuickJS installed somewhere".
 - The archive must exist at [~/.vmodules/vjsx/libs/qjs_linux_x64.a](/Users/guweigang/.vmodules/vjsx/libs/qjs_linux_x64.a).
 - `make deps-vjsx` is the supported way to prepare that path locally.
+- This is a build-time module layout. Packaged `vhttpd` binaries do not need a local `~/.vmodules/vjsx` checkout just to load embedded JavaScript or TypeScript runtime assets.
 
 ## Build
 
@@ -344,7 +345,7 @@ The binary is rewritten during packaging so it prefers these bundled copies:
 
 That means end users no longer need to preinstall MySQL/PostgreSQL/OpenSSL/Boehm runtime packages just to launch the release binary.
 
-`vjsx` runtime JS assets are embedded into the `vjsx` binary integration, so packaged `vhttpd` releases no longer need to ship `runtime/vjsx` JS files. `VJSX_ASSET_ROOT` is still supported as an explicit development override when you need to test a local replacement for built-in `vjsx` runtime JS.
+`vjsx` runtime assets, including the TypeScript compiler runtime under `thirdparty/typescript/lib`, are embedded into the `vjsx` binary integration. Packaged `vhttpd` releases no longer need to ship `runtime/vjsx` JS files or a local `~/.vmodules/vjsx/thirdparty/typescript` tree. `VJSX_ASSET_ROOT` is still supported as an explicit development override when you need to test a local replacement for built-in `vjsx` runtime assets.
 
 After installation, users can verify the machine with:
 
diff --git a/scripts/doctor.sh b/scripts/doctor.sh
index 631e4bf..0d6e056 100755
--- a/scripts/doctor.sh
+++ b/scripts/doctor.sh
@@ -68,9 +68,9 @@ check_pkg openssl
 check_pkg bdw-gc
 
 if [ -e "$vjsx_dir" ]; then
-  ok "vjsx module path ${vjsx_dir}"
+  ok "vjsx build module path ${vjsx_dir}"
 else
-  warn "vjsx module path ${vjsx_dir} is absent; run ./scripts/install_deps.sh vjsx if you need embedded runtime support"
+  warn "vjsx build module path ${vjsx_dir} is absent; run ./scripts/install_deps.sh vjsx before building embedded runtime support"
 fi
 
 if [ -e "$vjsx_dir" ]; then
diff --git a/scripts/runtime_doctor.sh b/scripts/runtime_doctor.sh
index 22fba10..3978f1b 100755
--- a/scripts/runtime_doctor.sh
+++ b/scripts/runtime_doctor.sh
@@ -109,13 +109,13 @@ fi
 if [ -n "$vjsx_asset_root_override" ]; then
   ok "vjsx runtime asset override ${vjsx_asset_root_override}"
 else
-  ok "vjsx runtime assets are embedded; VJSX_ASSET_ROOT is unset"
+  ok "vjsx runtime assets, including TypeScript runtime assets, are embedded; VJSX_ASSET_ROOT is unset"
 fi
 
 if [ -L "${HOME}/.vmodules/vjsx" ]; then
-  warn "legacy vjsx compatibility symlink ${HOME}/.vmodules/vjsx is present but no longer required"
+  warn "legacy vjsx compatibility symlink ${HOME}/.vmodules/vjsx is present but no longer required at runtime"
 else
-  ok "legacy vjsx compatibility symlink ${HOME}/.vmodules/vjsx is absent"
+  ok "legacy vjsx compatibility symlink ${HOME}/.vmodules/vjsx is absent; runtime assets are embedded"
 fi
 
 if [ "$status" -ne 0 ]; then

From c263a2193f696c50ff2db8677724c744c92a6058 Mon Sep 17 00:00:00 2001
From: weigang <guweigang@bullsoft.org>
Date: Sun, 10 May 2026 17:55:07 +0800
Subject: [PATCH 07/10] test: isolate vjsx TypeScript asset root

---
 src/inproc_vjsx_executor_test.v | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/inproc_vjsx_executor_test.v b/src/inproc_vjsx_executor_test.v
index f21ef53..a06e755 100644
--- a/src/inproc_vjsx_executor_test.v
+++ b/src/inproc_vjsx_executor_test.v
@@ -862,13 +862,22 @@ fn test_inproc_vjsx_executor_dispatch_http_supports_redirect_helper() {
 
 fn test_inproc_vjsx_executor_dispatch_http_supports_typescript_module_entry() {
 	temp_dir := os.join_path(os.temp_dir(), 'vhttpd_vjsx_executor_ts_test')
+	asset_root := os.join_path(temp_dir, 'empty-asset-root')
 	os.mkdir_all(temp_dir) or { panic(err) }
+	os.mkdir_all(asset_root) or { panic(err) }
 	app_file := os.join_path(temp_dir, 'handler.mts')
 	os.write_file(app_file, 'function handler(ctx) { return { status: 206, headers: { "content-type": "application/json; charset=utf-8" }, body: JSON.stringify({ ok: true, message: "hello " + ctx.queryParam("name", "guest"), laneId: ctx.runtime.laneId }) }; }\nglobalThis.__vhttpd_handle = handler;\nexport default handler;\n') or {
 		panic(err)
 	}
+	old_asset_root := os.getenv('VJSX_ASSET_ROOT')
+	os.setenv('VJSX_ASSET_ROOT', asset_root, true)
 	defer {
-		os.rm(app_file) or {}
+		if old_asset_root == '' {
+			os.unsetenv('VJSX_ASSET_ROOT')
+		} else {
+			os.setenv('VJSX_ASSET_ROOT', old_asset_root, true)
+		}
+		os.rmdir_all(temp_dir) or {}
 	}
 	mut executor := new_inproc_vjsx_executor(VjsxRuntimeFacadeConfig{
 		thread_count:    1

From c8fe8556455e3eb72ac2ad8eb68db3b0901eb69e Mon Sep 17 00:00:00 2001
From: weigang <guweigang@bullsoft.org>
Date: Sun, 10 May 2026 18:02:31 +0800
Subject: [PATCH 08/10] ci: build V from latest master

---
 .github/workflows/vhttpd-binaries.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/vhttpd-binaries.yml b/.github/workflows/vhttpd-binaries.yml
index e15898f..3d21e8a 100644
--- a/.github/workflows/vhttpd-binaries.yml
+++ b/.github/workflows/vhttpd-binaries.yml
@@ -29,7 +29,6 @@ jobs:
     env:
       VHTTPD_VJSX_ROOT: /usr/local/share/vhttpd/vjsx
       V_REPO: https://github.com/guweigang/vlang
-      V_REF: 06438457b7fed78397588e7c0797b9e0d7483257
     strategy:
       fail-fast: false
       matrix:
@@ -96,7 +95,6 @@ jobs:
           unzip -q /tmp/v-bootstrap.zip -d /tmp/v-bootstrap
           rm -rf /tmp/v
           git clone "$V_REPO" /tmp/v
-          git -C /tmp/v checkout "$V_REF"
           chmod +x /tmp/v-bootstrap/v/v
           (
             cd /tmp/v

From dd0f9ff8e37eb72718ea042b59f181f79b018f71 Mon Sep 17 00:00:00 2001
From: weigang <guweigang@bullsoft.org>
Date: Sun, 10 May 2026 20:32:45 +0800
Subject: [PATCH 09/10] build: use cc for V compilation

---
 .github/workflows/vhttpd-binaries.yml |  4 ++--
 Makefile                              | 17 +++++++++--------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/vhttpd-binaries.yml b/.github/workflows/vhttpd-binaries.yml
index 3d21e8a..70f459f 100644
--- a/.github/workflows/vhttpd-binaries.yml
+++ b/.github/workflows/vhttpd-binaries.yml
@@ -98,8 +98,8 @@ jobs:
           chmod +x /tmp/v-bootstrap/v/v
           (
             cd /tmp/v
-            /tmp/v-bootstrap/v/v -o v cmd/v
-            ./v -o v cmd/v
+            /tmp/v-bootstrap/v/v -cc cc -o v cmd/v
+            ./v -cc cc -o v cmd/v
           )
           chmod +x /tmp/v/v
           echo "/tmp/v" >> "$GITHUB_PATH"
diff --git a/Makefile b/Makefile
index bb619dc..12efd00 100644
--- a/Makefile
+++ b/Makefile
@@ -3,6 +3,7 @@
 ROOT := $(CURDIR)
 SRC_DIR := $(ROOT)/src
 VHTTPD_BIN ?= $(ROOT)/vhttpd
+V_CC ?= cc
 VPHP_V_GC ?= auto
 VPHP_V_GC_STRIPPED := $(strip $(VPHP_V_GC))
 RESOLVED_VPHP_V_GC := $(shell if [ -n "$(VPHP_V_GC_STRIPPED)" ] && [ "$(VPHP_V_GC_STRIPPED)" != "auto" ]; then printf "%s" "$(VPHP_V_GC_STRIPPED)"; elif pkg-config --exists bdw-gc 2>/dev/null; then printf boehm; else printf none; fi)
@@ -79,12 +80,12 @@ ifeq ($(WITH_DB),1)
 endif
 
 build: prepare-build-src
-	v $(V_FLAGS) $(V_DB_FLAGS) $(V_GC_FLAG) -o $(VHTTPD_BIN) $(BUILD_STAGE_DIR)
+	v -cc $(V_CC) $(V_FLAGS) $(V_DB_FLAGS) $(V_GC_FLAG) -o $(VHTTPD_BIN) $(BUILD_STAGE_DIR)
 
 vhttpd: build
 
 prod: prepare-build-src
-	v $(V_FLAGS) $(V_DB_FLAGS) $(V_GC_FLAG) $(V_PROD_FLAGS) $(V_NOCACHE_FLAGS) -o $(VHTTPD_BIN) $(BUILD_STAGE_DIR)
+	v -cc $(V_CC) $(V_FLAGS) $(V_DB_FLAGS) $(V_GC_FLAG) $(V_PROD_FLAGS) $(V_NOCACHE_FLAGS) -o $(VHTTPD_BIN) $(BUILD_STAGE_DIR)
 
 build-prod: prod
 
@@ -127,22 +128,22 @@ psr-matrix:
 test: test-fast
 
 test-fast:
-	v test $(FAST_TEST_FILES)
+	v -cc $(V_CC) test $(FAST_TEST_FILES)
 
 test-inproc:
-	v test $(INPROC_TEST_FILES)
+	v -cc $(V_CC) test $(INPROC_TEST_FILES)
 
 test-codexbot:
-	v test $(CODEXBOT_TEST_FILES)
+	v -cc $(V_CC) test $(CODEXBOT_TEST_FILES)
 
 test-codexbot-fast:
-	v test $(CODEXBOT_FAST_TEST_FILES)
+	v -cc $(V_CC) test $(CODEXBOT_FAST_TEST_FILES)
 
 test-codexbot-lifecycle:
-	v test $(CODEXBOT_LIFECYCLE_TEST_FILES)
+	v -cc $(V_CC) test $(CODEXBOT_LIFECYCLE_TEST_FILES)
 
 test-profile-codexbot:
 	@/bin/zsh $(ROOT)/tools/profile_codexbot_tests.sh $(ROOT)
 
 test-all:
-	v test $(SRC_DIR)
+	v -cc $(V_CC) test $(SRC_DIR)

From 76247e8d1edc69e7fdd8860a0442982be4e6d1ca Mon Sep 17 00:00:00 2001
From: weigang <guweigang@bullsoft.org>
Date: Sun, 10 May 2026 20:34:04 +0800
Subject: [PATCH 10/10] ci: pass cc to production build

---
 .github/workflows/vhttpd-binaries.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/vhttpd-binaries.yml b/.github/workflows/vhttpd-binaries.yml
index 70f459f..9ae848b 100644
--- a/.github/workflows/vhttpd-binaries.yml
+++ b/.github/workflows/vhttpd-binaries.yml
@@ -156,7 +156,7 @@ jobs:
         shell: bash
         run: |
           set -euo pipefail
-          make prod VPHP_V_GC=boehm WITH_DB=1
+          make prod V_CC=cc VPHP_V_GC=boehm WITH_DB=1
 
       - name: Smoke test
         shell: bash