From 96ce95c7c09dcde6e794bffd2a844d0936839e21 Mon Sep 17 00:00:00 2001 From: weigang Date: Wed, 6 May 2026 09:58:45 +0800 Subject: [PATCH 01/10] config: add OpenAI gateway settings --- examples/paseo-relay/paseo-relay.toml | 5 + src/config.v | 595 +++++++++++++++++++++----- src/multi_server_runtime_config.v | 46 +- src/provider_spec.v | 1 + src/server_logic_test.v | 4 +- 5 files changed, 546 insertions(+), 105 deletions(-) diff --git a/examples/paseo-relay/paseo-relay.toml b/examples/paseo-relay/paseo-relay.toml index ddb2ebf..320cd61 100644 --- a/examples/paseo-relay/paseo-relay.toml +++ b/examples/paseo-relay/paseo-relay.toml @@ -24,6 +24,11 @@ port = 19901 root = "${paths.root}/examples/paseo-relay" executor = "vjsx" app = "${paths.vjsx_app}" +websocket_affinity.enabled = false +websocket_affinity.source = "app" +websocket_affinity.key = "serverId" +websocket_affinity.scope = "lane" +websocket_affinity.fallback = "reject" websocket_actor.enabled = true websocket_actor.fallback = "unkeyed" websocket_actor.queue_timeout_ms = 30000 diff --git a/src/config.v b/src/config.v index 99b09c5..4ef59ed 100644 --- a/src/config.v +++ b/src/config.v @@ -70,6 +70,24 @@ mut: enable_network bool @[toml: 'enable_network'] } +struct PluginConfig { +mut: + kind string = 'vjsx' + entry string + app_entry string @[toml: 'app_entry'] + module_root string @[toml: 'module_root'] + build_root string @[toml: 'build_root'] + signature_root string @[toml: 'signature_root'] + signature_include []string @[toml: 'signature_include'] + signature_exclude []string @[toml: 'signature_exclude'] + runtime_profile string = 'script' @[toml: 'runtime_profile'] + thread_count int = 1 @[toml: 'thread_count'] + max_requests int @[toml: 'max_requests'] + enable_fs bool @[toml: 'enable_fs'] + enable_process bool @[toml: 'enable_process'] + enable_network bool @[toml: 'enable_network'] +} + struct WebSocketAffinityConfig { mut: enabled bool @@ -88,12 +106,12 @@ mut: struct WebSocketActorConfig { mut: - enabled bool - sources []WebSocketActorSourceConfig - fallback string - queue_timeout_ms int @[toml: 'queue_timeout_ms'] - max_queue_per_key int @[toml: 'max_queue_per_key'] - events []string + enabled bool + sources []WebSocketActorSourceConfig + fallback string + queue_timeout_ms int @[toml: 'queue_timeout_ms'] + max_queue_per_key int @[toml: 'max_queue_per_key'] + events []string } struct AdminConfig { @@ -157,6 +175,43 @@ mut: flush_interval_ms int = 400 @[toml: 'flush_interval_ms'] } +struct OpenAIEndpointsConfig { +mut: + models bool = true @[toml: 'models'] + chat_completions bool = true @[toml: 'chat_completions'] + responses bool = true @[toml: 'responses'] + embeddings bool @[toml: 'embeddings'] +} + +struct OpenAIBackendConfig { +mut: + kind string = 'openai_http' + base_url string @[toml: 'base_url'] + executor string + api_key string @[toml: 'api_key'] + api_key_env string = 'OPENAI_API_KEY' @[toml: 'api_key_env'] + timeout_ms int = 60000 @[toml: 'timeout_ms'] +} + +struct OpenAIRouteConfig { +mut: + model string + models []string + backend string + upstream_model string @[toml: 'upstream_model'] +} + +struct OpenAIConfig { +mut: + enabled bool + base_path string = '/v1' @[toml: 'base_path'] + default_backend string @[toml: 'default_backend'] + plugin string + endpoints OpenAIEndpointsConfig + backends map[string]OpenAIBackendConfig + routes map[string]OpenAIRouteConfig +} + struct BridgeConfig { mut: enabled bool @@ -204,47 +259,51 @@ mut: struct SiteConfig { mut: - project_root string @[toml: 'project_root'] - host string = '127.0.0.1' - port int - app string - worker_entry string - paths PathsConfig - worker WorkerConfig - executor ExecutorConfig - php PhpConfig - vjsx VjsxConfig + project_root string @[toml: 'project_root'] + host string = '127.0.0.1' + port int + app string + worker_entry string + paths PathsConfig + worker WorkerConfig + executor ExecutorConfig + php PhpConfig + vjsx VjsxConfig + plugins map[string]PluginConfig websocket_affinity WebSocketAffinityConfig @[toml: 'websocket_affinity'] - websocket_actor WebSocketActorConfig @[toml: 'websocket_actor'] - assets AssetsConfig - runtime RuntimeConfig - mcp McpConfig - feishu FeishuConfig - codex CodexConfig - db DbConfig + websocket_actor WebSocketActorConfig @[toml: 'websocket_actor'] + assets AssetsConfig + runtime RuntimeConfig + mcp McpConfig + feishu FeishuConfig + codex CodexConfig + openai OpenAIConfig + db DbConfig } struct VhttpdConfig { mut: - server ServerConfig - files FilesConfig - paths PathsConfig - worker WorkerConfig - executor ExecutorConfig - php PhpConfig - vjsx VjsxConfig + server ServerConfig + files FilesConfig + paths PathsConfig + worker WorkerConfig + executor ExecutorConfig + php PhpConfig + vjsx VjsxConfig + plugins map[string]PluginConfig websocket_affinity WebSocketAffinityConfig @[toml: 'websocket_affinity'] - websocket_actor WebSocketActorConfig @[toml: 'websocket_actor'] - admin AdminConfig - assets AssetsConfig - runtime RuntimeConfig - mcp McpConfig - feishu FeishuConfig - codex CodexConfig - db DbConfig - listeners map[string]ListenerConfig - sites map[string]SiteConfig - config_path string + websocket_actor WebSocketActorConfig @[toml: 'websocket_actor'] + admin AdminConfig + assets AssetsConfig + runtime RuntimeConfig + mcp McpConfig + feishu FeishuConfig + codex CodexConfig + openai OpenAIConfig + db DbConfig + listeners map[string]ListenerConfig + sites map[string]SiteConfig + config_path string } fn default_vhttpd_config() VhttpdConfig { @@ -346,11 +405,14 @@ fn load_vhttpd_config(args []string) !VhttpdConfig { doc := toml.parse_text(text)! decode_paths_config(doc, mut cfg)! decode_feishu_config(doc, mut cfg)! + decode_openai_root_config(doc, mut cfg)! + decode_plugins_root_config(doc, mut cfg)! if root_any := doc.value_opt('bridge') { root := root_any.as_map() - if cfg.feishu.bridge.ws_url.trim_space() == '' && cfg.feishu.bridge.client_id.trim_space() == '' - && cfg.feishu.bridge.token.trim_space() == '' && cfg.feishu.bridge.target_id.trim_space() == '' - && !cfg.feishu.bridge.enabled { + if cfg.feishu.bridge.ws_url.trim_space() == '' + && cfg.feishu.bridge.client_id.trim_space() == '' + && cfg.feishu.bridge.token.trim_space() == '' + && cfg.feishu.bridge.target_id.trim_space() == '' && !cfg.feishu.bridge.enabled { cfg.feishu.bridge = decode_bridge_config_map(root) } } @@ -424,6 +486,20 @@ fn decode_feishu_config(doc toml.Doc, mut cfg VhttpdConfig) ! { cfg.feishu.apps = apps.clone() } +fn decode_openai_root_config(doc toml.Doc, mut cfg VhttpdConfig) ! { + if root_any := doc.value_opt('openai') { + root := root_any.as_map() + cfg.openai = decode_openai_config_map(root) + } +} + +fn decode_plugins_root_config(doc toml.Doc, mut cfg VhttpdConfig) ! { + if root_any := doc.value_opt('plugins') { + root := root_any.as_map() + cfg.plugins = decode_plugins_config_map(root) + } +} + fn toml_string_from_map(entry map[string]toml.Any, key string, default_val string) string { return (entry[key] or { toml.Any(default_val) }).string() } @@ -595,6 +671,62 @@ fn decode_vjsx_config_map(entry map[string]toml.Any) VjsxConfig { return cfg } +fn decode_plugin_config_map(entry map[string]toml.Any) PluginConfig { + mut cfg := PluginConfig{} + if 'kind' in entry { + cfg.kind = toml_string_from_map(entry, 'kind', cfg.kind) + } + if 'entry' in entry { + cfg.entry = toml_string_from_map(entry, 'entry', cfg.entry) + } + if 'app_entry' in entry { + cfg.app_entry = toml_string_from_map(entry, 'app_entry', cfg.app_entry) + } + if cfg.app_entry.trim_space() == '' && cfg.entry.trim_space() != '' { + cfg.app_entry = cfg.entry + } + if 'module_root' in entry { + cfg.module_root = toml_string_from_map(entry, 'module_root', cfg.module_root) + } + if 'build_root' in entry { + cfg.build_root = toml_string_from_map(entry, 'build_root', cfg.build_root) + } + if 'signature_root' in entry { + cfg.signature_root = toml_string_from_map(entry, 'signature_root', cfg.signature_root) + } + cfg.signature_include = toml_string_list_from_map(entry, 'signature_include') + cfg.signature_exclude = toml_string_list_from_map(entry, 'signature_exclude') + if 'runtime_profile' in entry { + cfg.runtime_profile = toml_string_from_map(entry, 'runtime_profile', cfg.runtime_profile) + } + if 'thread_count' in entry { + cfg.thread_count = toml_int_from_map(entry, 'thread_count', cfg.thread_count) + } + if 'max_requests' in entry { + cfg.max_requests = toml_int_from_map(entry, 'max_requests', cfg.max_requests) + } + if 'enable_fs' in entry { + cfg.enable_fs = toml_bool_from_map(entry, 'enable_fs', cfg.enable_fs) + } + if 'enable_process' in entry { + cfg.enable_process = toml_bool_from_map(entry, 'enable_process', cfg.enable_process) + } + if 'enable_network' in entry { + cfg.enable_network = toml_bool_from_map(entry, 'enable_network', cfg.enable_network) + } + return cfg +} + +fn decode_plugins_config_map(entry map[string]toml.Any) map[string]PluginConfig { + mut plugins := map[string]PluginConfig{} + for name, value in entry { + if value is map[string]toml.Any { + plugins[name] = decode_plugin_config_map(value) + } + } + return plugins +} + fn decode_websocket_affinity_config_map(entry map[string]toml.Any) WebSocketAffinityConfig { mut cfg := WebSocketAffinityConfig{} if 'enabled' in entry { @@ -793,6 +925,112 @@ fn decode_codex_config_map(entry map[string]toml.Any) CodexConfig { return cfg } +fn decode_openai_endpoints_config_map(entry map[string]toml.Any) OpenAIEndpointsConfig { + mut cfg := OpenAIEndpointsConfig{} + if 'models' in entry { + cfg.models = toml_bool_from_map(entry, 'models', cfg.models) + } + if 'chat_completions' in entry { + cfg.chat_completions = toml_bool_from_map(entry, 'chat_completions', cfg.chat_completions) + } + if 'responses' in entry { + cfg.responses = toml_bool_from_map(entry, 'responses', cfg.responses) + } + if 'embeddings' in entry { + cfg.embeddings = toml_bool_from_map(entry, 'embeddings', cfg.embeddings) + } + return cfg +} + +fn decode_openai_backend_config_map(entry map[string]toml.Any) OpenAIBackendConfig { + mut cfg := OpenAIBackendConfig{} + if 'kind' in entry { + cfg.kind = toml_string_from_map(entry, 'kind', cfg.kind) + } + if 'base_url' in entry { + cfg.base_url = toml_string_from_map(entry, 'base_url', cfg.base_url) + } + if 'executor' in entry { + cfg.executor = toml_string_from_map(entry, 'executor', cfg.executor) + } + if 'api_key' in entry { + cfg.api_key = toml_string_from_map(entry, 'api_key', cfg.api_key) + } + if 'api_key_env' in entry { + cfg.api_key_env = toml_string_from_map(entry, 'api_key_env', cfg.api_key_env) + } + if 'timeout_ms' in entry { + cfg.timeout_ms = toml_int_from_map(entry, 'timeout_ms', cfg.timeout_ms) + } + return cfg +} + +fn decode_openai_route_config_map(entry map[string]toml.Any) OpenAIRouteConfig { + mut cfg := OpenAIRouteConfig{} + if 'model' in entry { + cfg.model = toml_string_from_map(entry, 'model', cfg.model) + } + cfg.models = toml_string_list_from_map(entry, 'models') + if 'backend' in entry { + cfg.backend = toml_string_from_map(entry, 'backend', cfg.backend) + } + if 'upstream_model' in entry { + cfg.upstream_model = toml_string_from_map(entry, 'upstream_model', cfg.upstream_model) + } + return cfg +} + +fn decode_openai_config_map(entry map[string]toml.Any) OpenAIConfig { + mut cfg := OpenAIConfig{} + if 'enabled' in entry { + cfg.enabled = toml_bool_from_map(entry, 'enabled', cfg.enabled) + } + if 'base_path' in entry { + cfg.base_path = toml_string_from_map(entry, 'base_path', cfg.base_path) + } + if 'default_backend' in entry { + cfg.default_backend = toml_string_from_map(entry, 'default_backend', cfg.default_backend) + } + if 'plugin' in entry { + cfg.plugin = toml_string_from_map(entry, 'plugin', cfg.plugin) + } + if endpoints_any := entry['endpoints'] { + if endpoints_any is map[string]toml.Any { + cfg.endpoints = decode_openai_endpoints_config_map(endpoints_any) + } + } + mut backends := map[string]OpenAIBackendConfig{} + if backends_any := entry['backends'] { + if backends_any is map[string]toml.Any { + for name, value in backends_any { + if value is map[string]toml.Any { + backends[name] = decode_openai_backend_config_map(value) + } + } + } + } + cfg.backends = backends.clone() + mut routes := map[string]OpenAIRouteConfig{} + if routes_any := entry['routes'] { + if routes_any is map[string]toml.Any { + for name, value in routes_any { + if value is map[string]toml.Any { + mut route := decode_openai_route_config_map(value) + if route.model.trim_space() == '' { + route.model = name + } + if route.models.len == 0 && route.model.trim_space() != '' { + route.models = [route.model] + } + routes[name] = route + } + } + } + } + cfg.routes = routes.clone() + return cfg +} + fn decode_bridge_config_map(entry map[string]toml.Any) BridgeConfig { mut cfg := BridgeConfig{} if 'enabled' in entry { @@ -874,6 +1112,11 @@ fn decode_site_config_map(entry map[string]toml.Any) SiteConfig { cfg.vjsx = decode_vjsx_config_map(vjsx_any) } } + if plugins_any := entry['plugins'] { + if plugins_any is map[string]toml.Any { + cfg.plugins = decode_plugins_config_map(plugins_any) + } + } if websocket_affinity_any := entry['websocket_affinity'] { if websocket_affinity_any is map[string]toml.Any { cfg.websocket_affinity = decode_websocket_affinity_config_map(websocket_affinity_any) @@ -909,6 +1152,11 @@ fn decode_site_config_map(entry map[string]toml.Any) SiteConfig { cfg.codex = decode_codex_config_map(codex_any) } } + if openai_any := entry['openai'] { + if openai_any is map[string]toml.Any { + cfg.openai = decode_openai_config_map(openai_any) + } + } return cfg } @@ -942,8 +1190,8 @@ fn resolve_config_variables(mut cfg VhttpdConfig, config_path string) ! { for _ in 0 .. max_passes { mut changed := false mut vars := build_config_variable_map(cfg) - cfg.paths.root, changed = expand_config_string(cfg.paths.root, 'paths', vars, env_map, - changed)! + cfg.paths.root, changed = expand_config_string(cfg.paths.root, 'paths', vars, + env_map, changed)! vars['paths.root'] = resolve_config_path(base_dir, cfg.paths.root) mut next_paths := map[string]string{} for key, value in cfg.paths.values { @@ -954,30 +1202,30 @@ fn resolve_config_variables(mut cfg VhttpdConfig, config_path string) ! { } } cfg.paths.values = next_paths.clone() - cfg.server.host, changed = expand_config_string(cfg.server.host, 'server', vars, env_map, - changed)! - cfg.files.event_log, changed = expand_config_string(cfg.files.event_log, 'files', vars, + cfg.server.host, changed = expand_config_string(cfg.server.host, 'server', vars, env_map, changed)! - cfg.files.pid_file, changed = expand_config_string(cfg.files.pid_file, 'files', vars, env_map, - changed)! - cfg.worker.cmd, changed = expand_config_string(cfg.worker.cmd, 'worker', vars, env_map, - changed)! - cfg.worker.socket, changed = expand_config_string(cfg.worker.socket, 'worker', vars, env_map, - changed)! - cfg.worker.socket_prefix, changed = expand_config_string(cfg.worker.socket_prefix, 'worker', + cfg.files.event_log, changed = expand_config_string(cfg.files.event_log, 'files', vars, env_map, changed)! - cfg.executor.kind, changed = expand_config_string(cfg.executor.kind, 'executor', vars, env_map, - changed)! - cfg.vjsx.app_entry, changed = expand_config_string(cfg.vjsx.app_entry, 'vjsx', vars, env_map, - changed)! - cfg.vjsx.module_root, changed = expand_config_string(cfg.vjsx.module_root, 'vjsx', vars, - env_map, changed)! - cfg.vjsx.build_root, changed = expand_config_string(cfg.vjsx.build_root, 'vjsx', vars, + cfg.files.pid_file, changed = expand_config_string(cfg.files.pid_file, 'files', + vars, env_map, changed)! + cfg.worker.cmd, changed = expand_config_string(cfg.worker.cmd, 'worker', vars, env_map, changed)! - cfg.vjsx.signature_root, changed = expand_config_string(cfg.vjsx.signature_root, 'vjsx', + cfg.worker.socket, changed = expand_config_string(cfg.worker.socket, 'worker', + vars, env_map, changed)! + cfg.worker.socket_prefix, changed = expand_config_string(cfg.worker.socket_prefix, + 'worker', vars, env_map, changed)! + cfg.executor.kind, changed = expand_config_string(cfg.executor.kind, 'executor', + vars, env_map, changed)! + cfg.vjsx.app_entry, changed = expand_config_string(cfg.vjsx.app_entry, 'vjsx', vars, env_map, changed)! - cfg.vjsx.runtime_profile, changed = expand_config_string(cfg.vjsx.runtime_profile, 'vjsx', + cfg.vjsx.module_root, changed = expand_config_string(cfg.vjsx.module_root, 'vjsx', vars, env_map, changed)! + cfg.vjsx.build_root, changed = expand_config_string(cfg.vjsx.build_root, 'vjsx', + vars, env_map, changed)! + cfg.vjsx.signature_root, changed = expand_config_string(cfg.vjsx.signature_root, + 'vjsx', vars, env_map, changed)! + cfg.vjsx.runtime_profile, changed = expand_config_string(cfg.vjsx.runtime_profile, + 'vjsx', vars, env_map, changed)! for i, raw in cfg.vjsx.signature_include { next, c := expand_config_string(raw, 'vjsx', vars, env_map, false)! if c { @@ -992,6 +1240,60 @@ fn resolve_config_variables(mut cfg VhttpdConfig, config_path string) ! { changed = true } } + mut next_plugins := map[string]PluginConfig{} + for name, plugin in cfg.plugins { + entry, entry_changed := expand_config_string(plugin.entry, 'plugins.${name}', + vars, env_map, false)! + app_entry, app_entry_changed := expand_config_string(plugin.app_entry, 'plugins.${name}', + vars, env_map, false)! + module_root, module_root_changed := expand_config_string(plugin.module_root, + 'plugins.${name}', vars, env_map, false)! + build_root, build_root_changed := expand_config_string(plugin.build_root, + 'plugins.${name}', vars, env_map, false)! + signature_root, signature_root_changed := expand_config_string(plugin.signature_root, + 'plugins.${name}', vars, env_map, false)! + runtime_profile, runtime_profile_changed := expand_config_string(plugin.runtime_profile, + 'plugins.${name}', vars, env_map, false)! + mut signature_include := plugin.signature_include.clone() + for i, raw in signature_include { + next, c := expand_config_string(raw, 'plugins.${name}', vars, env_map, + false)! + if c { + signature_include[i] = next + changed = true + } + } + mut signature_exclude := plugin.signature_exclude.clone() + for i, raw in signature_exclude { + next, c := expand_config_string(raw, 'plugins.${name}', vars, env_map, + false)! + if c { + signature_exclude[i] = next + changed = true + } + } + next_plugins[name] = PluginConfig{ + kind: plugin.kind + entry: entry + app_entry: app_entry + module_root: module_root + build_root: build_root + signature_root: signature_root + signature_include: signature_include + signature_exclude: signature_exclude + runtime_profile: runtime_profile + thread_count: plugin.thread_count + max_requests: plugin.max_requests + enable_fs: plugin.enable_fs + enable_process: plugin.enable_process + enable_network: plugin.enable_network + } + if entry_changed || app_entry_changed || module_root_changed || build_root_changed + || signature_root_changed || runtime_profile_changed { + changed = true + } + } + cfg.plugins = next_plugins.clone() for i, raw in cfg.worker.sockets { next, c := expand_config_string(raw, 'worker', vars, env_map, false)! if c { @@ -999,11 +1301,12 @@ fn resolve_config_variables(mut cfg VhttpdConfig, config_path string) ! { changed = true } } - cfg.php.bin, changed = expand_config_string(cfg.php.bin, 'php', vars, env_map, changed)! - cfg.php.worker_entry, changed = expand_config_string(cfg.php.worker_entry, 'php', vars, - env_map, changed)! - cfg.php.app_entry, changed = expand_config_string(cfg.php.app_entry, 'php', vars, env_map, + cfg.php.bin, changed = expand_config_string(cfg.php.bin, 'php', vars, env_map, changed)! + cfg.php.worker_entry, changed = expand_config_string(cfg.php.worker_entry, 'php', + vars, env_map, changed)! + cfg.php.app_entry, changed = expand_config_string(cfg.php.app_entry, 'php', vars, + env_map, changed)! for i, raw in cfg.php.extensions { next, c := expand_config_string(raw, 'php', vars, env_map, false)! if c { @@ -1027,26 +1330,26 @@ fn resolve_config_variables(mut cfg VhttpdConfig, config_path string) ! { } } cfg.worker.env = next_env.clone() - cfg.admin.host, changed = expand_config_string(cfg.admin.host, 'admin', vars, env_map, - changed)! - cfg.admin.token, changed = expand_config_string(cfg.admin.token, 'admin', vars, env_map, - changed)! - cfg.assets.prefix, changed = expand_config_string(cfg.assets.prefix, 'assets', vars, env_map, - changed)! - cfg.assets.root, changed = expand_config_string(cfg.assets.root, 'assets', vars, env_map, - changed)! - cfg.assets.cache_control, changed = expand_config_string(cfg.assets.cache_control, 'assets', + cfg.admin.host, changed = expand_config_string(cfg.admin.host, 'admin', vars, + env_map, changed)! + cfg.admin.token, changed = expand_config_string(cfg.admin.token, 'admin', vars, + env_map, changed)! + cfg.assets.prefix, changed = expand_config_string(cfg.assets.prefix, 'assets', vars, env_map, changed)! - cfg.runtime.timezone, changed = expand_config_string(cfg.runtime.timezone, 'runtime', vars, + cfg.assets.root, changed = expand_config_string(cfg.assets.root, 'assets', vars, env_map, changed)! - cfg.feishu.open_base_url, changed = expand_config_string(cfg.feishu.open_base_url, 'feishu', + cfg.assets.cache_control, changed = expand_config_string(cfg.assets.cache_control, + 'assets', vars, env_map, changed)! + cfg.runtime.timezone, changed = expand_config_string(cfg.runtime.timezone, 'runtime', vars, env_map, changed)! + cfg.feishu.open_base_url, changed = expand_config_string(cfg.feishu.open_base_url, + 'feishu', vars, env_map, changed)! mut next_apps := map[string]FeishuAppConfig{} for name, app_cfg in cfg.feishu.apps { - app_id, app_id_changed := expand_config_string(app_cfg.app_id, 'feishu.${name}', vars, env_map, - false)! - app_secret, app_secret_changed := expand_config_string(app_cfg.app_secret, 'feishu.${name}', + app_id, app_id_changed := expand_config_string(app_cfg.app_id, 'feishu.${name}', vars, env_map, false)! + app_secret, app_secret_changed := expand_config_string(app_cfg.app_secret, + 'feishu.${name}', vars, env_map, false)! next_apps[name] = FeishuAppConfig{ app_id: app_id app_secret: app_secret @@ -1058,27 +1361,86 @@ fn resolve_config_variables(mut cfg VhttpdConfig, config_path string) ! { cfg.feishu.apps = next_apps.clone() // codex - cfg.codex.url, changed = expand_config_string(cfg.codex.url, 'codex', vars, env_map, changed)! - cfg.codex.model, changed = expand_config_string(cfg.codex.model, 'codex', vars, env_map, - changed)! - cfg.codex.effort, changed = expand_config_string(cfg.codex.effort, 'codex', vars, env_map, - changed)! - cfg.codex.cwd, changed = expand_config_string(cfg.codex.cwd, 'codex', vars, env_map, changed)! - cfg.codex.approval_policy, changed = expand_config_string(cfg.codex.approval_policy, 'codex', - vars, env_map, changed)! - cfg.codex.sandbox, changed = expand_config_string(cfg.codex.sandbox, 'codex', vars, env_map, - changed)! - cfg.feishu.bridge.ws_url, changed = expand_config_string(cfg.feishu.bridge.ws_url, 'feishu.bridge', vars, env_map, + cfg.codex.url, changed = expand_config_string(cfg.codex.url, 'codex', vars, env_map, changed)! - cfg.feishu.bridge.client_id, changed = expand_config_string(cfg.feishu.bridge.client_id, 'feishu.bridge', vars, + cfg.codex.model, changed = expand_config_string(cfg.codex.model, 'codex', vars, env_map, changed)! - cfg.feishu.bridge.token, changed = expand_config_string(cfg.feishu.bridge.token, 'feishu.bridge', vars, env_map, - changed)! - cfg.feishu.bridge.target_id, changed = expand_config_string(cfg.feishu.bridge.target_id, 'feishu.bridge', vars, - env_map, changed)! - - cfg.feishu.bridge.target_id, changed = expand_config_string(cfg.feishu.bridge.target_id, 'feishu.bridge', vars, + cfg.codex.effort, changed = expand_config_string(cfg.codex.effort, 'codex', vars, env_map, changed)! + cfg.codex.cwd, changed = expand_config_string(cfg.codex.cwd, 'codex', vars, env_map, + changed)! + cfg.codex.approval_policy, changed = expand_config_string(cfg.codex.approval_policy, + 'codex', vars, env_map, changed)! + cfg.codex.sandbox, changed = expand_config_string(cfg.codex.sandbox, 'codex', + vars, env_map, changed)! + cfg.openai.base_path, changed = expand_config_string(cfg.openai.base_path, 'openai', + vars, env_map, changed)! + cfg.openai.default_backend, changed = expand_config_string(cfg.openai.default_backend, + 'openai', vars, env_map, changed)! + cfg.openai.plugin, changed = expand_config_string(cfg.openai.plugin, 'openai', + vars, env_map, changed)! + mut next_openai_backends := map[string]OpenAIBackendConfig{} + for name, backend in cfg.openai.backends { + base_url, base_url_changed := expand_config_string(backend.base_url, 'openai.backends.${name}', + vars, env_map, false)! + api_key, api_key_changed := expand_config_string(backend.api_key, 'openai.backends.${name}', + vars, env_map, false)! + executor, executor_changed := expand_config_string(backend.executor, 'openai.backends.${name}', + vars, env_map, false)! + api_key_env, api_key_env_changed := expand_config_string(backend.api_key_env, + 'openai.backends.${name}', vars, env_map, false)! + next_openai_backends[name] = OpenAIBackendConfig{ + kind: backend.kind + base_url: base_url + executor: executor + api_key: api_key + api_key_env: api_key_env + timeout_ms: backend.timeout_ms + } + if base_url_changed || executor_changed || api_key_changed || api_key_env_changed { + changed = true + } + } + cfg.openai.backends = next_openai_backends.clone() + mut next_openai_routes := map[string]OpenAIRouteConfig{} + for name, route in cfg.openai.routes { + model, model_changed := expand_config_string(route.model, 'openai.routes.${name}', + vars, env_map, false)! + backend, backend_changed := expand_config_string(route.backend, 'openai.routes.${name}', + vars, env_map, false)! + upstream_model, upstream_model_changed := expand_config_string(route.upstream_model, + 'openai.routes.${name}', vars, env_map, false)! + mut models := route.models.clone() + for i, raw in models { + next, c := expand_config_string(raw, 'openai.routes.${name}', vars, env_map, + false)! + if c { + models[i] = next + changed = true + } + } + next_openai_routes[name] = OpenAIRouteConfig{ + model: model + models: models + backend: backend + upstream_model: upstream_model + } + if model_changed || backend_changed || upstream_model_changed { + changed = true + } + } + cfg.openai.routes = next_openai_routes.clone() + cfg.feishu.bridge.ws_url, changed = expand_config_string(cfg.feishu.bridge.ws_url, + 'feishu.bridge', vars, env_map, changed)! + cfg.feishu.bridge.client_id, changed = expand_config_string(cfg.feishu.bridge.client_id, + 'feishu.bridge', vars, env_map, changed)! + cfg.feishu.bridge.token, changed = expand_config_string(cfg.feishu.bridge.token, + 'feishu.bridge', vars, env_map, changed)! + cfg.feishu.bridge.target_id, changed = expand_config_string(cfg.feishu.bridge.target_id, + 'feishu.bridge', vars, env_map, changed)! + + cfg.feishu.bridge.target_id, changed = expand_config_string(cfg.feishu.bridge.target_id, + 'feishu.bridge', vars, env_map, changed)! if !changed { resolve_config_paths(mut cfg, config_path) @@ -1149,6 +1511,14 @@ fn resolve_config_paths(mut cfg VhttpdConfig, config_path string) { cfg.vjsx.module_root = resolve_config_path(cfg.paths.root, cfg.vjsx.module_root) cfg.vjsx.build_root = resolve_config_path(cfg.paths.root, cfg.vjsx.build_root) cfg.vjsx.signature_root = resolve_config_path(cfg.paths.root, cfg.vjsx.signature_root) + for name, mut plugin in cfg.plugins { + plugin.entry = resolve_config_path(cfg.paths.root, plugin.entry) + plugin.app_entry = resolve_config_path(cfg.paths.root, plugin.app_entry) + plugin.module_root = resolve_config_path(cfg.paths.root, plugin.module_root) + plugin.build_root = resolve_config_path(cfg.paths.root, plugin.build_root) + plugin.signature_root = resolve_config_path(cfg.paths.root, plugin.signature_root) + cfg.plugins[name] = plugin + } cfg.assets.root = resolve_config_path(cfg.paths.root, cfg.assets.root) cfg.codex.cwd = resolve_config_path(cfg.paths.root, cfg.codex.cwd) } @@ -1199,6 +1569,10 @@ fn build_config_variable_map(cfg VhttpdConfig) map[string]string { 'feishu.bridge.client_id': cfg.feishu.bridge.client_id 'feishu.bridge.token': cfg.feishu.bridge.token 'feishu.bridge.target_id': cfg.feishu.bridge.target_id + 'openai.enabled': '${cfg.openai.enabled}' + 'openai.base_path': cfg.openai.base_path + 'openai.default_backend': cfg.openai.default_backend + 'openai.plugin': cfg.openai.plugin } for key, value in cfg.paths.values { vars['paths.${key}'] = value @@ -1213,6 +1587,25 @@ fn build_config_variable_map(cfg VhttpdConfig) map[string]string { vars['feishu.${name}.app_id'] = app_cfg.app_id vars['feishu.${name}.app_secret'] = app_cfg.app_secret } + for name, backend in cfg.openai.backends { + vars['openai.backends.${name}.kind'] = backend.kind + vars['openai.backends.${name}.base_url'] = backend.base_url + vars['openai.backends.${name}.api_key_env'] = backend.api_key_env + } + for name, route in cfg.openai.routes { + vars['openai.routes.${name}.model'] = route.model + vars['openai.routes.${name}.backend'] = route.backend + vars['openai.routes.${name}.upstream_model'] = route.upstream_model + } + for name, plugin in cfg.plugins { + vars['plugins.${name}.kind'] = plugin.kind + vars['plugins.${name}.entry'] = plugin.entry + vars['plugins.${name}.app_entry'] = plugin.app_entry + vars['plugins.${name}.module_root'] = plugin.module_root + vars['plugins.${name}.build_root'] = plugin.build_root + vars['plugins.${name}.signature_root'] = plugin.signature_root + vars['plugins.${name}.runtime_profile'] = plugin.runtime_profile + } return vars } diff --git a/src/multi_server_runtime_config.v b/src/multi_server_runtime_config.v index 8bbc1fb..ed783d6 100644 --- a/src/multi_server_runtime_config.v +++ b/src/multi_server_runtime_config.v @@ -166,6 +166,13 @@ fn merge_vjsx_config(base VjsxConfig, override VjsxConfig) VjsxConfig { return cfg } +fn merge_plugins_config(base map[string]PluginConfig, override map[string]PluginConfig) map[string]PluginConfig { + if override.len == 0 { + return base.clone() + } + return override.clone() +} + fn merge_websocket_affinity_config(base WebSocketAffinityConfig, override WebSocketAffinityConfig) WebSocketAffinityConfig { defaults := default_vhttpd_config().websocket_affinity mut cfg := base @@ -337,6 +344,39 @@ fn merge_bridge_config(base BridgeConfig, override BridgeConfig) BridgeConfig { return cfg } +fn merge_openai_config(base OpenAIConfig, override OpenAIConfig) OpenAIConfig { + defaults := default_vhttpd_config().openai + mut cfg := base + if override.enabled != defaults.enabled { + cfg.enabled = override.enabled + } + if override.base_path != defaults.base_path { + cfg.base_path = override.base_path + } + if override.default_backend != defaults.default_backend { + cfg.default_backend = override.default_backend + } + if override.endpoints.models != defaults.endpoints.models { + cfg.endpoints.models = override.endpoints.models + } + if override.endpoints.chat_completions != defaults.endpoints.chat_completions { + cfg.endpoints.chat_completions = override.endpoints.chat_completions + } + if override.endpoints.responses != defaults.endpoints.responses { + cfg.endpoints.responses = override.endpoints.responses + } + if override.endpoints.embeddings != defaults.endpoints.embeddings { + cfg.endpoints.embeddings = override.endpoints.embeddings + } + if override.backends.len > 0 { + cfg.backends = override.backends.clone() + } + if override.routes.len > 0 { + cfg.routes = override.routes.clone() + } + return cfg +} + fn site_config_as_vhttpd_config(global_cfg VhttpdConfig, site_cfg SiteConfig) VhttpdConfig { mut cfg := global_cfg cfg.listeners = map[string]ListenerConfig{} @@ -348,6 +388,7 @@ fn site_config_as_vhttpd_config(global_cfg VhttpdConfig, site_cfg SiteConfig) Vh env_map := map[string]string{} project_root, _ = expand_config_string(project_root, '', global_vars, env_map, false) or { site_cfg.project_root, false } + project_root = resolve_config_path(global_cfg.paths.root, project_root) cfg.paths = PathsConfig{ root: project_root values: cfg.paths.values.clone() @@ -357,10 +398,10 @@ fn site_config_as_vhttpd_config(global_cfg VhttpdConfig, site_cfg SiteConfig) Vh cfg.executor = merge_executor_config(global_cfg.executor, site_cfg.executor, site_cfg) cfg.php = merge_php_config(global_cfg.php, site_cfg.php) cfg.vjsx = merge_vjsx_config(global_cfg.vjsx, site_cfg.vjsx) + cfg.plugins = merge_plugins_config(global_cfg.plugins, site_cfg.plugins) cfg.websocket_affinity = merge_websocket_affinity_config(global_cfg.websocket_affinity, site_cfg.websocket_affinity) - cfg.websocket_actor = merge_websocket_actor_config(global_cfg.websocket_actor, - site_cfg.websocket_actor) + cfg.websocket_actor = merge_websocket_actor_config(global_cfg.websocket_actor, site_cfg.websocket_actor) if site_cfg.worker_entry.trim_space() != '' && cfg.executor.kind == 'php' && cfg.php.worker_entry.trim_space() == '' { cfg.php.worker_entry = site_cfg.worker_entry @@ -381,6 +422,7 @@ fn site_config_as_vhttpd_config(global_cfg VhttpdConfig, site_cfg SiteConfig) Vh cfg.mcp = merge_mcp_config(global_cfg.mcp, site_cfg.mcp) cfg.feishu = merge_feishu_config(global_cfg.feishu, site_cfg.feishu) cfg.codex = merge_codex_config(global_cfg.codex, site_cfg.codex) + cfg.openai = merge_openai_config(global_cfg.openai, site_cfg.openai) cfg.feishu.bridge = merge_bridge_config(global_cfg.feishu.bridge, site_cfg.feishu.bridge) cfg.config_path = global_cfg.config_path return cfg diff --git a/src/provider_spec.v b/src/provider_spec.v index eb966eb..d60d113 100644 --- a/src/provider_spec.v +++ b/src/provider_spec.v @@ -3,6 +3,7 @@ module main pub enum ProviderRouteKind { codex feishu + openai ollama generic } diff --git a/src/server_logic_test.v b/src/server_logic_test.v index be70e3c..18db287 100644 --- a/src/server_logic_test.v +++ b/src/server_logic_test.v @@ -1940,7 +1940,7 @@ fn test_paseo_relay_example_config_enables_websocket_dispatch() { config_path := os.join_path(os.dir(@FILE), '..', 'examples', 'paseo-relay', 'paseo-relay.toml') cfg := load_vhttpd_config(['--config', config_path]) or { panic(err) } assert cfg.worker.websocket_dispatch - assert cfg.sites['paseo_relay'].websocket_affinity.enabled + assert !cfg.sites['paseo_relay'].websocket_affinity.enabled assert cfg.sites['paseo_relay'].websocket_affinity.source == 'app' assert cfg.sites['paseo_relay'].websocket_affinity.key == 'serverId' assert cfg.sites['paseo_relay'].websocket_affinity.fallback == 'reject' @@ -1954,7 +1954,7 @@ fn test_paseo_relay_example_config_enables_websocket_dispatch() { } assert runtime.listeners.len == 1 assert runtime.listeners[0].runtime_cfg.executor_plan.bootstrap.websocket_dispatch_mode - assert runtime.listeners[0].site_cfg.websocket_affinity.enabled + assert !runtime.listeners[0].site_cfg.websocket_affinity.enabled assert runtime.listeners[0].site_cfg.websocket_affinity.key == 'serverId' assert runtime.listeners[0].site_cfg.websocket_actor.enabled assert runtime.listeners[0].site_cfg.websocket_actor.sources.len == 3 From 406ff943021d893c26dae24439399f0c8301e8da Mon Sep 17 00:00:00 2001 From: weigang Date: Wed, 6 May 2026 09:58:57 +0800 Subject: [PATCH 02/10] openai: add aggregation gateway runtime --- src/app_runtime_builder.v | 12 +- src/command_executor.v | 20 +- src/inproc_vjsx_executor.v | 374 ++++- src/main.v | 153 +- src/openai_runtime.v | 2709 ++++++++++++++++++++++++++++++++ src/openai_runtime_test.v | 388 +++++ src/plugin_runtime.v | 118 ++ src/server_shutdown_hooks.v | 1 + src/worker_backend_transport.v | 30 +- v.mod | 1 + 10 files changed, 3664 insertions(+), 142 deletions(-) create mode 100644 src/openai_runtime.v create mode 100644 src/openai_runtime_test.v create mode 100644 src/plugin_runtime.v diff --git a/src/app_runtime_builder.v b/src/app_runtime_builder.v index 70ff548..d96a6d8 100644 --- a/src/app_runtime_builder.v +++ b/src/app_runtime_builder.v @@ -64,6 +64,8 @@ fn build_app_runtime(provider_settings ProviderRuntimeSettings, executor_plan Lo admin_on_data_plane: !build_cfg.admin_enabled admin_token: build_cfg.admin_token runtime_config_json: json.encode(cfg) + plugin_configs: cfg.plugins.clone() + plugin_vjsx: build_vjsx_plugin_runtimes(cfg.plugins) assets_enabled: build_cfg.assets_enabled assets_prefix: build_cfg.assets_prefix assets_root: build_cfg.assets_root @@ -79,6 +81,14 @@ fn build_app_runtime(provider_settings ProviderRuntimeSettings, executor_plan Lo feishu_reconnect_delay_ms: provider_settings.feishu.reconnect_delay_ms feishu_token_refresh_skew_seconds: provider_settings.feishu.token_refresh_skew_seconds feishu_recent_event_limit: provider_settings.feishu.recent_event_limit + openai_enabled: cfg.openai.enabled + openai_base_path: cfg.openai.base_path + openai_default_backend: cfg.openai.default_backend + openai_plugin: cfg.openai.plugin + openai_endpoints: cfg.openai.endpoints + openai_backends: cfg.openai.backends.clone() + openai_routes: cfg.openai.routes.clone() + openai_responses: new_memory_state_store[OpenAIResponseRecord]() websocket_upstream_recent_dispatch_limit: 50 auto_start_dynamic_upstreams: true feishu_static_apps: provider_settings.feishu.apps.clone() @@ -97,7 +107,7 @@ fn build_app_runtime(provider_settings ProviderRuntimeSettings, executor_plan Lo specs: map[string]ProviderSpec{} } ollama_enabled: provider_settings.ollama_enabled - db_runtime: build_db_runtime(provider_settings.db) + db_runtime: build_db_runtime(provider_settings.db) fixture_websocket_runtime: map[string]FixtureWebSocketUpstreamRuntime{} websocket_upstream_recent_activities: []WebSocketUpstreamActivitySnapshot{} provider_instance_specs: map[string]ProviderInstanceSpec{} diff --git a/src/command_executor.v b/src/command_executor.v index 2cba5d2..5cfe5d2 100644 --- a/src/command_executor.v +++ b/src/command_executor.v @@ -100,10 +100,12 @@ pub fn (mut exec CommandExecutor) execute(source_activity_id string, ctx Dispatc for command in commands { mut next := command mut metadata := command.metadata.clone() - if (metadata['trace_id'] or { '' }).trim_space() == '' && ctx.session.trace_id.trim_space() != '' { + if (metadata['trace_id'] or { '' }).trim_space() == '' + && ctx.session.trace_id.trim_space() != '' { metadata['trace_id'] = ctx.session.trace_id } - if (metadata['request_id'] or { '' }).trim_space() == '' && ctx.session.request_id.trim_space() != '' { + if (metadata['request_id'] or { '' }).trim_space() == '' + && ctx.session.request_id.trim_space() != '' { metadata['request_id'] = ctx.session.request_id } next.metadata = metadata.clone() @@ -143,10 +145,13 @@ pub fn (mut exec CommandExecutor) execute_websocket_upstream_commands(source_act log.info('[ws-cmd] executing ${commands.len} commands from ${source_activity_id}') for index, command in commands { normalized := NormalizedCommand.from_worker_command(command) - log.info('[ws-cmd] #${index}: type=${normalized.routing_type()} kind=${normalized.kind} event=${normalized.normalized_event('')} provider=${normalized.normalized_provider('')} stream_id=${normalized.correlation.stream_id} trace_id=${normalized.metadata['trace_id'] or { '' }} request_id=${normalized.correlation.request_id}') + log.info('[ws-cmd] #${index}: type=${normalized.routing_type()} kind=${normalized.kind} event=${normalized.normalized_event('')} provider=${normalized.normalized_provider('')} stream_id=${normalized.correlation.stream_id} trace_id=${normalized.metadata['trace_id'] or { + '' + }} request_id=${normalized.correlation.request_id}') mut snapshot := exec.new_snapshot(source_activity_id, index, command) if normalized.is_provider_instance_command() { - handled, exec_err := exec.execute_provider_instance_command(normalized, mut snapshot) + handled, exec_err := exec.execute_provider_instance_command(normalized, mut + snapshot) if handled { if exec_err != '' { last_error = exec_err @@ -156,7 +161,8 @@ pub fn (mut exec CommandExecutor) execute_websocket_upstream_commands(source_act } } route := exec.route_from_normalized(normalized) - handled, exec_err := exec.execute_routed_command(route, command, normalized, mut snapshot) + handled, exec_err := exec.execute_routed_command(route, command, normalized, mut + snapshot) if handled { if exec_err != '' { last_error = exec_err @@ -231,11 +237,15 @@ fn (mut exec CommandExecutor) execute_routed_command(route ProviderRouteKind, co false, '' } } + .openai { + false, '' + } .generic { exec.generic.execute(command, normalized, mut snapshot) } } } + // Unified App-level entrypoint, now backed by CommandExecutor object. fn (mut app App) execute_command_envelopes(source_activity_id string, ctx DispatchContext, commands []WorkerWebSocketUpstreamCommand) ([]WebSocketUpstreamCommandActivity, string) { mut executor := CommandExecutor.new(mut app) diff --git a/src/inproc_vjsx_executor.v b/src/inproc_vjsx_executor.v index 5a6be39..2bd27b1 100644 --- a/src/inproc_vjsx_executor.v +++ b/src/inproc_vjsx_executor.v @@ -1306,6 +1306,7 @@ fn websocket_actor_value_from_source(frame WorkerWebSocketFrame, source WebSocke 'metadata' { (frame.metadata[key_name] or { '' }).trim_space() } else { (frame.query[key_name] or { '' }).trim_space() } } + if value == '' { return WebSocketActorDecision{} } @@ -1492,8 +1493,7 @@ fn (e InProcVjsxExecutor) resolve_websocket_dispatch_affinity(frame WorkerWebSoc } return '', affinity.priority, false } - return affinity_key, affinity.priority, websocket_should_pin_affinity_lane(frame, - affinity_key) + return affinity_key, affinity.priority, websocket_should_pin_affinity_lane(frame, affinity_key) } fn websocket_should_pin_affinity_lane(frame WorkerWebSocketFrame, affinity_key string) bool { @@ -1607,37 +1607,35 @@ fn (e InProcVjsxExecutor) lane_worker_by_id(lane_id string) ?VjsxLaneWorker { return none } -fn (state &VjsxExecutorState) schedule_lane_wakeup(lane_id string, wake_at_ms i64, generation u64) { +fn (mut state VjsxExecutorState) schedule_lane_wakeup(lane_id string, wake_at_ms i64, generation u64) { if isnil(state) || lane_id.trim_space() == '' { return } - mut state_ref := state - state_ref.mu.@lock() - state_ref.lane_wakeup_by_id[lane_id] = VjsxLaneWakeup{ + state.mu.@lock() + state.lane_wakeup_by_id[lane_id] = VjsxLaneWakeup{ wake_at_ms: wake_at_ms generation: generation } - state_ref.mu.unlock() + state.mu.unlock() go state.deliver_lane_wakeup(lane_id, wake_at_ms, generation) } -fn (state &VjsxExecutorState) cancel_lane_wakeup(lane_id string, generation u64) { +fn (mut state VjsxExecutorState) cancel_lane_wakeup(lane_id string, generation u64) { if isnil(state) || lane_id.trim_space() == '' { return } - mut state_ref := state - state_ref.mu.@lock() - current_wakeup := state_ref.lane_wakeup_by_id[lane_id] or { - state_ref.mu.unlock() + state.mu.@lock() + current_wakeup := state.lane_wakeup_by_id[lane_id] or { + state.mu.unlock() return } if current_wakeup.generation == generation { - state_ref.lane_wakeup_by_id.delete(lane_id) + state.lane_wakeup_by_id.delete(lane_id) } - state_ref.mu.unlock() + state.mu.unlock() } -fn (state &VjsxExecutorState) deliver_lane_wakeup(lane_id string, wake_at_ms i64, generation u64) { +fn (mut state VjsxExecutorState) deliver_lane_wakeup(lane_id string, wake_at_ms i64, generation u64) { if isnil(state) || lane_id.trim_space() == '' { return } @@ -1645,18 +1643,17 @@ fn (state &VjsxExecutorState) deliver_lane_wakeup(lane_id string, wake_at_ms i64 if delay_ms > 0 { time.sleep(time.millisecond * int(delay_ms)) } - mut state_ref := state - state_ref.mu.@lock() - current_wakeup := state_ref.lane_wakeup_by_id[lane_id] or { - state_ref.mu.unlock() + state.mu.@lock() + current_wakeup := state.lane_wakeup_by_id[lane_id] or { + state.mu.unlock() return } if current_wakeup.wake_at_ms != wake_at_ms || current_wakeup.generation != generation { - state_ref.mu.unlock() + state.mu.unlock() return } - state_ref.lane_wakeup_by_id.delete(lane_id) - state_ref.mu.unlock() + state.lane_wakeup_by_id.delete(lane_id) + state.mu.unlock() executor := InProcVjsxExecutor{ state: state } @@ -2006,7 +2003,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c } log.debug('[vhttpd] lane worker recv lane=${lane_id} event=${task.frame.event} request_id=${task.frame.request_id} trace_id=${task.frame.trace_id}') lane := worker_executor.lane_snapshot_by_id(lane_id) or { - err_msg = inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_lane_not_found') + err_msg = inproc_vjsx_normalize_error_message(err.msg(), + 'inproc_vjsx_executor_lane_not_found') log.debug('[vhttpd] lane worker reply_error lane=${lane_id} event=${task.frame.event} request_id=${task.frame.request_id} error=${err_msg}') task.slot.mu.@lock() task.slot.result = InProcVjsxWebSocketTaskResult{ @@ -2020,7 +2018,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c } response_json = worker_executor.dispatch_websocket_callback_on_lane(mut task_app, task.frame, lane) or { - err_msg = inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_websocket_dispatch_failed') + err_msg = inproc_vjsx_normalize_error_message(err.msg(), + 'inproc_vjsx_executor_websocket_dispatch_failed') eprintln('[vhttpd] websocket lane worker error lane=${lane_id} event=${task.frame.event} path=${task.frame.path} request_id=${task.frame.request_id} trace_id=${task.frame.trace_id} query=${task.frame.query} error=${err_msg}') log.debug('[vhttpd] lane worker reply_error lane=${lane_id} event=${task.frame.event} request_id=${task.frame.request_id} error=${err_msg}') task.slot.mu.@lock() @@ -2049,7 +2048,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c task.slot.mu.@lock() task.slot.result = InProcVjsxLaneSnapshotTaskResult{ ok: false - error: inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_lane_not_found') + error: inproc_vjsx_normalize_error_message(err.msg(), + 'inproc_vjsx_executor_lane_not_found') } task.slot.ready = true task.slot.mu.unlock() @@ -2072,7 +2072,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c task.slot.mu.@lock() task.slot.result = InProcVjsxLaneSnapshotTaskResult{ ok: false - error: inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_snapshot_failed') + error: inproc_vjsx_normalize_error_message(err.msg(), + 'inproc_vjsx_executor_snapshot_failed') } task.slot.ready = true task.slot.mu.unlock() @@ -2094,7 +2095,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c task.slot.mu.@lock() task.slot.result = InProcVjsxLaneWarmupTaskResult{ ok: false - error: inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_lane_not_found') + error: inproc_vjsx_normalize_error_message(err.msg(), + 'inproc_vjsx_executor_lane_not_found') } task.slot.ready = true task.slot.mu.unlock() @@ -2118,7 +2120,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c task.slot.mu.@lock() task.slot.result = InProcVjsxLaneWarmupTaskResult{ ok: false - error: inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_warmup_host_failed') + error: inproc_vjsx_normalize_error_message(err.msg(), + 'inproc_vjsx_executor_warmup_host_failed') } task.slot.ready = true task.slot.mu.unlock() @@ -2129,7 +2132,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c task.slot.mu.@lock() task.slot.result = InProcVjsxLaneWarmupTaskResult{ ok: false - error: inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_warmup_startup_failed') + error: inproc_vjsx_normalize_error_message(err.msg(), + 'inproc_vjsx_executor_warmup_startup_failed') } task.slot.ready = true task.slot.mu.unlock() @@ -2180,7 +2184,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c task.slot.mu.@lock() task.slot.result = InProcVjsxLaneAffinityTaskResult{ ok: false - error: inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_lane_not_found') + error: inproc_vjsx_normalize_error_message(err.msg(), + 'inproc_vjsx_executor_lane_not_found') } task.slot.ready = true task.slot.mu.unlock() @@ -2195,7 +2200,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c task.slot.mu.@lock() task.slot.result = InProcVjsxLaneAffinityTaskResult{ ok: false - error: inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_websocket_actor_failed') + error: inproc_vjsx_normalize_error_message(err.msg(), + 'inproc_vjsx_executor_websocket_actor_failed') } task.slot.ready = true task.slot.mu.unlock() @@ -2208,7 +2214,8 @@ fn inproc_vjsx_lane_worker_loop(state &VjsxExecutorState, lane_id string, task_c task.slot.mu.@lock() task.slot.result = InProcVjsxLaneAffinityTaskResult{ ok: false - error: inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_websocket_affinity_failed') + error: inproc_vjsx_normalize_error_message(err.msg(), + 'inproc_vjsx_executor_websocket_affinity_failed') } task.slot.ready = true task.slot.mu.unlock() @@ -2465,8 +2472,9 @@ fn inproc_vjsx_host_snapshot_builder(state_ptr &VjsxExecutorState, idx int) vjsx return ctx.js_undefined() } lane := executor.lane_snapshot_by_id(lane_id) or { return ctx.js_undefined() } - raw := executor.execute_snapshot_hook(mut app, executor.lane_index_by_id(lane.id), - lane) or { return ctx.js_undefined() } + raw := executor.execute_snapshot_hook(mut app, executor.lane_index_by_id(lane.id), lane) or { + return ctx.js_undefined() + } if raw.trim_space() == '' || raw.trim_space() == 'undefined' || raw.trim_space() == 'null' { return ctx.js_undefined() @@ -2557,7 +2565,8 @@ fn inproc_vjsx_host_session_store_builder(mut state VjsxExecutorState, idx int) } 'set' { if req.ttl_ms > 0 { - state.session_store.set_with_ttl(full_key, req.value, req.ttl_ms * time.millisecond) or { + state.session_store.set_with_ttl(full_key, req.value, + req.ttl_ms * time.millisecond) or { return ctx.js_string(json.encode(InProcVjsxHostSessionStoreResponse{ error: err.msg() })) @@ -2584,7 +2593,8 @@ fn inproc_vjsx_host_session_store_builder(mut state VjsxExecutorState, idx int) } } else { swapped = state.session_store.compare_and_swap_set_with_ttl(full_key, - req.expected_found, req.expected_value, req.value, req.ttl_ms * time.millisecond) or { + req.expected_found, req.expected_value, req.value, + req.ttl_ms * time.millisecond) or { return ctx.js_string(json.encode(InProcVjsxHostSessionStoreResponse{ error: err.msg() })) @@ -2618,7 +2628,8 @@ fn inproc_vjsx_host_session_store_builder(mut state VjsxExecutorState, idx int) } 'keys' { prefix := '${namespace}:' - keys := state.session_store.keys().filter(it.starts_with(prefix)).map(it[prefix.len..]) + keys := + state.session_store.keys().filter(it.starts_with(prefix)).map(it[prefix.len..]) ctx.js_string(json.encode(InProcVjsxHostSessionStoreResponse{ ok: true found: keys.len > 0 @@ -2751,6 +2762,7 @@ fn inproc_vjsx_host_http_fetch_builder(mut state VjsxExecutorState, idx int) vjs 'OPTIONS' { http.Method.options } else { http.Method.get } } + body := parsed.body mut header := http.new_header() for name, value in parsed.headers { @@ -2984,6 +2996,12 @@ fn inproc_vjsx_module_aliases(kind string) []string { ['websocket_upstream', 'websocketUpstream', 'handleWebSocketUpstream', 'handle_websocket_upstream'] } + 'plugin' { + ['plugin', 'handlePlugin', 'handle_plugin'] + } + 'openai' { + ['openai', 'openaiPlugin', 'handleOpenAI', 'handleOpenai', 'handle_openai'] + } 'startup' { ['startup', 'lane_startup', 'laneStartup'] } @@ -3010,6 +3028,8 @@ fn inproc_vjsx_global_handler_name(kind string) string { 'websocket_affinity' { '__vhttpd_websocket_affinity_handle' } 'websocket_actor' { '__vhttpd_websocket_actor_handle' } 'websocket_upstream' { '__vhttpd_websocket_upstream_handle' } + 'plugin' { '__vhttpd_plugin_handle' } + 'openai' { '__vhttpd_openai_handle' } 'startup' { '__vhttpd_startup_handle' } 'app_startup' { '__vhttpd_app_startup_handle' } 'snapshot' { '__vhttpd_snapshot_handle' } @@ -3299,6 +3319,7 @@ fn inproc_vjsx_new_runtime_session_ptr(config VjsxRuntimeFacadeConfig) !&vjsx.Ru return error('inproc_vjsx_executor_unsupported_runtime_profile:${config.runtime_profile}') } } + // Keep the RuntimeSession on the heap; lane hosts outlive ensure_lane_host(). mut session := session_value return &session @@ -3322,6 +3343,7 @@ fn inproc_vjsx_log_runtime_profile(lane_id string, idx int, runtime_profile stri 'node' { vjsx.RuntimeProfileKind.node } else { vjsx.RuntimeProfileKind.unknown } } + missing := if expected_kind == .unknown { []string{} } else { @@ -3368,10 +3390,10 @@ fn (e InProcVjsxExecutor) ensure_lane_host(idx int) ! { session.set_diagnostic_handler(inproc_vjsx_log_runtime_diagnostic) session.configure_event_loop(vjsx.RuntimeSessionEventLoopConfig{ session_id: lane_id - wake_fn: fn [state, lane_id] (req vjsx.RuntimeSessionWakeRequest) { + wake_fn: fn [mut state, lane_id] (req vjsx.RuntimeSessionWakeRequest) { state.schedule_lane_wakeup(lane_id, req.wake_at_ms, req.generation) } - cancel_wake_fn: fn [state, lane_id] (req vjsx.RuntimeSessionWakeCancelRequest) { + cancel_wake_fn: fn [mut state, lane_id] (req vjsx.RuntimeSessionWakeCancelRequest) { state.cancel_lane_wakeup(lane_id, req.generation) } }) @@ -3384,14 +3406,15 @@ fn (e InProcVjsxExecutor) ensure_lane_host(idx int) ! { mut has_http_handler := false mut has_websocket_handler := false mut has_upstream_handler := false + mut has_plugin_handler := false if as_module { if vjsx.is_typescript_file(config.app_entry) || vjsx.is_runtime_module_file(config.app_entry) { runtimejs.install_typescript_runtime(ctx)! } log.debug('[vhttpd] ensure_lane_host importing module lane=${lane_id} idx=${idx}') - module_entry_path := runtimejs.build_runtime_module_entry(ctx, config.app_entry, - true, temp_root) or { + module_entry_path := runtimejs.build_runtime_module_entry(ctx, config.app_entry, true, + temp_root) or { session.close() os.rmdir_all(temp_root) or {} return error('inproc_vjsx_executor_bootstrap_failed:${err.msg()}') @@ -3408,7 +3431,12 @@ fn (e InProcVjsxExecutor) ensure_lane_host(idx int) ! { has_upstream_handler = inproc_vjsx_module_has_callable(&module_binding_value, 'websocket_upstream') || inproc_vjsx_global_has_callable(ctx, 'websocket_upstream') - if !has_http_handler && !has_websocket_handler && !has_upstream_handler { + has_plugin_handler = inproc_vjsx_module_has_callable(&module_binding_value, 'plugin') + || inproc_vjsx_module_has_callable(&module_binding_value, 'openai') + || inproc_vjsx_global_has_callable(ctx, 'plugin') + || inproc_vjsx_global_has_callable(ctx, 'openai') + if !has_http_handler && !has_websocket_handler && !has_upstream_handler + && !has_plugin_handler { mut cleanup_binding := module_binding_value cleanup_binding.close() session.close() @@ -3445,8 +3473,7 @@ fn (e InProcVjsxExecutor) ensure_lane_host(idx int) ! { module_binding_ptr = &module_binding } else { log.debug('[vhttpd] ensure_lane_host loading script entry lane=${lane_id} idx=${idx}') - mut entry_exports := load_inproc_vjsx_entry(mut ctx, config, idx, source_signature, - false) or { + mut entry_exports := load_inproc_vjsx_entry(mut ctx, config, idx, source_signature, false) or { session.close() os.rmdir_all(temp_root) or {} return error('inproc_vjsx_executor_bootstrap_failed:${err.msg()}') @@ -3475,13 +3502,20 @@ fn (e InProcVjsxExecutor) ensure_lane_host(idx int) ! { http_handler := ctx.js_global('__vhttpd_handle') websocket_handler := ctx.js_global('__vhttpd_websocket_handle') upstream_handler := ctx.js_global('__vhttpd_websocket_upstream_handle') + plugin_handler := ctx.js_global('__vhttpd_plugin_handle') + openai_handler := ctx.js_global('__vhttpd_openai_handle') has_http_handler = !http_handler.is_undefined() && http_handler.is_function() has_websocket_handler = !websocket_handler.is_undefined() && websocket_handler.is_function() has_upstream_handler = !upstream_handler.is_undefined() && upstream_handler.is_function() + has_plugin_handler = (!plugin_handler.is_undefined() && plugin_handler.is_function()) + || (!openai_handler.is_undefined() && openai_handler.is_function()) http_handler.free() websocket_handler.free() upstream_handler.free() - if !has_http_handler && !has_websocket_handler && !has_upstream_handler { + plugin_handler.free() + openai_handler.free() + if !has_http_handler && !has_websocket_handler && !has_upstream_handler + && !has_plugin_handler { session.close() os.rmdir_all(temp_root) or {} return error('inproc_vjsx_executor_missing_handler') @@ -3504,7 +3538,7 @@ fn (e InProcVjsxExecutor) ensure_lane_host(idx int) ! { } state.lanes[idx].healthy = true state.lanes[idx].dirty = false - log.debug('[vhttpd] ensure_lane_host ready lane=${lane_id} idx=${idx} http=${has_http_handler} websocket=${has_websocket_handler} upstream=${has_upstream_handler}') + log.debug('[vhttpd] ensure_lane_host ready lane=${lane_id} idx=${idx} http=${has_http_handler} websocket=${has_websocket_handler} upstream=${has_upstream_handler} plugin=${has_plugin_handler}') } fn (e InProcVjsxExecutor) activate_lane_request_context(idx int, mut app App, lane_id string, req HttpLogicDispatchRequest) { @@ -3727,11 +3761,10 @@ fn (e InProcVjsxExecutor) aggregate_runtime_lane_snapshots(mut app App, current_ state.mu.unlock() mut items := []string{} for lane in lanes { - items << inproc_vjsx_aggregated_snapshot_item_json(lane.id, true, json.encode(app.admin_runtime_snapshot()), - '') + items << inproc_vjsx_aggregated_snapshot_item_json(lane.id, true, + json.encode(app.admin_runtime_snapshot()), '') } - return inproc_vjsx_aggregated_snapshot_json('all_lanes', 'runtime', current_lane_id, - items) + return inproc_vjsx_aggregated_snapshot_json('all_lanes', 'runtime', current_lane_id, items) } fn (e InProcVjsxExecutor) aggregate_app_lane_snapshots(mut app App, current_lane_id string, include_current bool) string { @@ -3761,8 +3794,7 @@ fn (e InProcVjsxExecutor) aggregate_app_lane_snapshots(mut app App, current_lane items << inproc_vjsx_aggregated_snapshot_item_json(lane.id, false, '', '') continue } - items << inproc_vjsx_aggregated_snapshot_item_json(lane.id, true, lane_snapshot, - '') + items << inproc_vjsx_aggregated_snapshot_item_json(lane.id, true, lane_snapshot, '') } return inproc_vjsx_aggregated_snapshot_json(scope, 'app', current_lane_id, items) } @@ -3961,7 +3993,8 @@ fn build_websocket_js_runtime(ctx &vjsx.Context, runtime_meta InProcVjsxRuntimeM request.set('protocolVersion', runtime_meta.request_protocol_version) request.set('remoteAddr', runtime_meta.request_remote_addr) request.set('ip', runtime_meta.request_remote_addr) - request.set('server', websocket_js_value_from_json(ctx, json.encode(runtime_meta.request_server))) + request.set('server', websocket_js_value_from_json(ctx, + json.encode(runtime_meta.request_server))) runtime.set('request', request) runtime.set('method', runtime_meta.method) runtime.set('path', runtime_meta.path) @@ -4446,8 +4479,8 @@ fn (e InProcVjsxExecutor) websocket_callback_input(lane VjsxExecutionLane, frame } fn build_websocket_callback_payload(ctx &vjsx.Context, input InProcVjsxWebSocketCallbackInput, runtime_config_json string, mut app App) (vjsx.Value, vjsx.Value) { - mut js_runtime := build_websocket_js_runtime(ctx, input.runtime_meta, runtime_config_json, mut - app) + mut js_runtime := + build_websocket_js_runtime(ctx, input.runtime_meta, runtime_config_json, mut app) create_frame_fn := ctx.js_global('__vhttpd_create_websocket_frame') defer { create_frame_fn.free() @@ -4525,12 +4558,14 @@ fn inproc_vjsx_invoke_websocket_callback(host VjsxLaneHost, ctx &vjsx.Context, j invoke_arg.free() } mut result := host.call_handler(invoke_handler, invoke_arg) or { - err_msg := inproc_vjsx_context_error_message(ctx, err.msg(), 'inproc_vjsx_executor_websocket_handler_failed') + err_msg := inproc_vjsx_context_error_message(ctx, err.msg(), + 'inproc_vjsx_executor_websocket_handler_failed') return error(err_msg) } if result.is_exception() { result.free() - err_msg := inproc_vjsx_context_error_message(ctx, 'exception', 'inproc_vjsx_executor_websocket_handler_failed') + err_msg := inproc_vjsx_context_error_message(ctx, 'exception', + 'inproc_vjsx_executor_websocket_handler_failed') return error(err_msg) } return result @@ -4543,14 +4578,16 @@ fn inproc_vjsx_normalize_websocket_callback_result(host VjsxLaneHost, ctx &vjsx. } log.debug('[vhttpd] websocket_on_lane handler_ok lane=${lane.id} idx=${idx} event=${frame.event} promise=${result.instanceof('Promise')}') resolved := host.resolve_value(result) or { - err_msg := inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_websocket_handler_failed') + err_msg := inproc_vjsx_normalize_error_message(err.msg(), + 'inproc_vjsx_executor_websocket_handler_failed') return error(err_msg) } defer { resolved.free() } mut normalized := host.call_handler(normalize_fn, js_frame, resolved) or { - err_msg := inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_websocket_normalize_failed') + err_msg := inproc_vjsx_normalize_error_message(err.msg(), + 'inproc_vjsx_executor_websocket_normalize_failed') return error('inproc_vjsx_executor_websocket_normalize_failed:${err_msg}') } defer { @@ -4630,8 +4667,8 @@ fn (e InProcVjsxExecutor) resolve_websocket_affinity_on_lane(mut app App, frame state.mu.unlock() ctx := host.context() runtime_meta := e.websocket_runtime_meta(lane, frame) - mut js_runtime := build_websocket_js_runtime(ctx, runtime_meta, app.runtime_config_json, mut - app) + mut js_runtime := + build_websocket_js_runtime(ctx, runtime_meta, app.runtime_config_json, mut app) defer { js_runtime.free() } @@ -4644,7 +4681,8 @@ fn (e InProcVjsxExecutor) resolve_websocket_affinity_on_lane(mut app App, frame e.record_lane_success(lane.id) return WebSocketAffinityDecision{} } - err_msg := inproc_vjsx_context_error_message(ctx, err.msg(), 'inproc_vjsx_executor_websocket_affinity_failed') + err_msg := inproc_vjsx_context_error_message(ctx, err.msg(), + 'inproc_vjsx_executor_websocket_affinity_failed') e.record_lane_error(lane.id, err_msg) return error(err_msg) } @@ -4652,12 +4690,14 @@ fn (e InProcVjsxExecutor) resolve_websocket_affinity_on_lane(mut app App, frame result.free() } if result.is_exception() { - err_msg := inproc_vjsx_context_error_message(ctx, 'exception', 'inproc_vjsx_executor_websocket_affinity_failed') + err_msg := inproc_vjsx_context_error_message(ctx, 'exception', + 'inproc_vjsx_executor_websocket_affinity_failed') e.record_lane_error(lane.id, err_msg) return error(err_msg) } resolved := host.resolve_value(result) or { - err_msg := inproc_vjsx_context_error_message(ctx, err.msg(), 'inproc_vjsx_executor_websocket_affinity_failed') + err_msg := inproc_vjsx_context_error_message(ctx, err.msg(), + 'inproc_vjsx_executor_websocket_affinity_failed') e.record_lane_error(lane.id, err_msg) return error(err_msg) } @@ -4699,8 +4739,8 @@ fn (e InProcVjsxExecutor) resolve_websocket_actor_on_lane(mut app App, frame Wor state.mu.unlock() ctx := host.context() runtime_meta := e.websocket_runtime_meta(lane, frame) - mut js_runtime := build_websocket_js_runtime(ctx, runtime_meta, app.runtime_config_json, mut - app) + mut js_runtime := + build_websocket_js_runtime(ctx, runtime_meta, app.runtime_config_json, mut app) defer { js_runtime.free() } @@ -4713,7 +4753,8 @@ fn (e InProcVjsxExecutor) resolve_websocket_actor_on_lane(mut app App, frame Wor e.record_lane_success(lane.id) return WebSocketActorDecision{} } - err_msg := inproc_vjsx_context_error_message(ctx, err.msg(), 'inproc_vjsx_executor_websocket_actor_failed') + err_msg := inproc_vjsx_context_error_message(ctx, err.msg(), + 'inproc_vjsx_executor_websocket_actor_failed') e.record_lane_error(lane.id, err_msg) return error(err_msg) } @@ -4721,7 +4762,8 @@ fn (e InProcVjsxExecutor) resolve_websocket_actor_on_lane(mut app App, frame Wor result.free() } if result.is_exception() { - err_msg := inproc_vjsx_context_error_message(ctx, 'exception', 'inproc_vjsx_executor_websocket_actor_failed') + err_msg := inproc_vjsx_context_error_message(ctx, 'exception', + 'inproc_vjsx_executor_websocket_actor_failed') e.record_lane_error(lane.id, err_msg) return error(err_msg) } @@ -5088,6 +5130,190 @@ pub fn (e InProcVjsxExecutor) dispatch_http(mut app App, req HttpLogicDispatchRe return error(last_err) } +fn (e InProcVjsxExecutor) call_plugin_once(mut app App, req PluginCallRequest) !PluginCallResponse { + e.bootstrap_placeholder()! + lane := e.acquire_next_lane(inproc_vjsx_lane_wait_timeout_ms)! + defer { + e.release_lane(lane.id) + } + idx := e.lane_index_by_id(lane.id) + if idx < 0 { + e.record_lane_error(lane.id, 'inproc_vjsx_executor_lane_not_found') + return error('inproc_vjsx_executor_lane_not_found') + } + e.ensure_lane_host(idx) or { + e.record_lane_error(lane.id, err.msg()) + return error(err.msg()) + } + e.run_startup_hooks(mut app, idx, lane) or { + e.record_lane_error(lane.id, err.msg()) + return error(err.msg()) + } + e.activate_lane_request_context(idx, mut app, lane.id, HttpLogicDispatchRequest{ + method: req.op + path: '/_plugin/${req.capability}' + trace_id: req.trace_id + request_id: req.request_id + }) + defer { + e.clear_lane_request_context(idx) + } + mut state := e.state + state.mu.@lock() + mut host := state.hosts[idx] + state.mu.unlock() + ctx := host.context() + req_obj := ctx.json_parse(json.encode(req)) + defer { + req_obj.free() + } + entry_kind := if req.capability.trim_space() == '' { + 'plugin' + } else { + req.capability.trim_space() + } + mut result := host.call_entry(entry_kind, req_obj) or { + if err.msg() == 'inproc_vjsx_executor_missing_${entry_kind}_handler' { + host.call_entry('plugin', req_obj) or { + e.record_lane_error(lane.id, err.msg()) + return error('inproc_vjsx_executor_plugin_handler_failed:${err.msg()}') + } + } else { + e.record_lane_error(lane.id, err.msg()) + return error('inproc_vjsx_executor_plugin_handler_failed:${err.msg()}') + } + } + defer { + result.free() + } + resolved := host.resolve_value(result) or { + e.record_lane_error(lane.id, err.msg()) + return error('inproc_vjsx_executor_plugin_handler_failed:${err.msg()}') + } + defer { + resolved.free() + } + raw := resolved.json_stringify() + e.record_lane_success(lane.id) + return PluginCallResponse{ + ok: true + result: raw + } +} + +pub fn (e InProcVjsxExecutor) call_plugin(mut app App, req PluginCallRequest) !PluginCallResponse { + e.remember_app(mut app) + mut last_err := 'inproc_vjsx_executor_plugin_call_failed' + for attempt in 0 .. inproc_vjsx_dispatch_retry_attempts { + resp := e.call_plugin_once(mut app, req) or { + last_err = err.msg() + if attempt + 1 < inproc_vjsx_dispatch_retry_attempts + && inproc_vjsx_should_retry_dispatch(last_err) { + continue + } + return error(last_err) + } + return resp + } + return error(last_err) +} + +fn (e InProcVjsxExecutor) call_plugin_stream_once(mut app App, req PluginCallRequest, on_frame PluginStreamFrameFn) !PluginStreamCallResponse { + e.bootstrap_placeholder()! + lane := e.acquire_next_lane(inproc_vjsx_lane_wait_timeout_ms)! + defer { + e.release_lane(lane.id) + } + idx := e.lane_index_by_id(lane.id) + if idx < 0 { + e.record_lane_error(lane.id, 'inproc_vjsx_executor_lane_not_found') + return error('inproc_vjsx_executor_lane_not_found') + } + e.ensure_lane_host(idx) or { + e.record_lane_error(lane.id, err.msg()) + return error(err.msg()) + } + e.run_startup_hooks(mut app, idx, lane) or { + e.record_lane_error(lane.id, err.msg()) + return error(err.msg()) + } + e.activate_lane_request_context(idx, mut app, lane.id, HttpLogicDispatchRequest{ + method: req.op + path: '/_plugin/${req.capability}' + trace_id: req.trace_id + request_id: req.request_id + }) + defer { + e.clear_lane_request_context(idx) + } + mut state := e.state + state.mu.@lock() + mut host := state.hosts[idx] + state.mu.unlock() + ctx := host.context() + req_obj := ctx.json_parse(json.encode(req)) + defer { + req_obj.free() + } + entry_kind := if req.capability.trim_space() == '' { + 'plugin' + } else { + req.capability.trim_space() + } + mut result := host.call_entry(entry_kind, req_obj) or { + if err.msg() == 'inproc_vjsx_executor_missing_${entry_kind}_handler' { + host.call_entry('plugin', req_obj) or { + e.record_lane_error(lane.id, err.msg()) + return error('inproc_vjsx_executor_plugin_handler_failed:${err.msg()}') + } + } else { + e.record_lane_error(lane.id, err.msg()) + return error('inproc_vjsx_executor_plugin_handler_failed:${err.msg()}') + } + } + defer { + result.free() + } + if host.session.is_streamable_value(result) { + completed := host.session.stream_value(result, fn [on_frame] (frame vjsx.Value) !bool { + raw := frame.json_stringify() + return on_frame(raw)! + }) or { + e.record_lane_error(lane.id, err.msg()) + return error('inproc_vjsx_executor_plugin_stream_failed:${err.msg()}') + } + e.record_lane_success(lane.id) + return PluginStreamCallResponse{ + streamed: true + response: PluginCallResponse{ + ok: true + result: '{"streamed":true,"completed":${completed}}' + } + } + } + resolved := host.resolve_value(result) or { + e.record_lane_error(lane.id, err.msg()) + return error('inproc_vjsx_executor_plugin_handler_failed:${err.msg()}') + } + defer { + resolved.free() + } + raw := resolved.json_stringify() + e.record_lane_success(lane.id) + return PluginStreamCallResponse{ + streamed: false + response: PluginCallResponse{ + ok: true + result: raw + } + } +} + +pub fn (e InProcVjsxExecutor) call_plugin_stream(mut app App, req PluginCallRequest, on_frame PluginStreamFrameFn) !PluginStreamCallResponse { + e.remember_app(mut app) + return e.call_plugin_stream_once(mut app, req, on_frame) +} + pub fn (e InProcVjsxExecutor) open_websocket_session(mut app App, req WebSocketSessionOpenRequest) !WebSocketSessionOpenOutcome { e.remember_app(mut app) _ = app @@ -5349,8 +5575,8 @@ pub fn (e InProcVjsxExecutor) dispatch_websocket_event(mut app App, frame Worker result := inproc_vjsx_await_websocket_task_result(done_ch, mut slot)! if frame.event == 'open' { } - return e.finalize_websocket_dispatch_response(frame, '', '', actor.key, actor.class_name, - actor.persist, result) + return e.finalize_websocket_dispatch_response(frame, '', '', actor.key, + actor.class_name, actor.persist, result) } } affinity_key, affinity_priority, should_queue := e.resolve_websocket_dispatch_affinity(frame) or { @@ -5394,8 +5620,8 @@ pub fn (e InProcVjsxExecutor) dispatch_websocket_event(mut app App, frame Worker state.websocket_affinity_lane_by_key[affinity_key] or { '' } } state.mu.unlock() - return e.finalize_websocket_dispatch_response(frame, affinity_key, lane_id, '', - '', false, result) + return e.finalize_websocket_dispatch_response(frame, affinity_key, lane_id, '', '', false, + result) } lane, direct_affinity_key := e.acquire_websocket_lane(frame) or { if err.msg() == 'inproc_vjsx_executor_websocket_affinity_key_missing' { @@ -5428,6 +5654,6 @@ pub fn (e InProcVjsxExecutor) dispatch_websocket_event(mut app App, frame Worker result := inproc_vjsx_await_websocket_task_result(done_ch, mut slot)! if frame.event == 'open' { } - return e.finalize_websocket_dispatch_response(frame, direct_affinity_key, lane.id, - '', '', false, result) + return e.finalize_websocket_dispatch_response(frame, direct_affinity_key, lane.id, '', '', + false, result) } diff --git a/src/main.v b/src/main.v index c43b349..2d709c9 100644 --- a/src/main.v +++ b/src/main.v @@ -37,6 +37,8 @@ pub mut: admin_on_data_plane bool admin_token string runtime_config_json string + plugin_configs map[string]PluginConfig + plugin_vjsx map[string]InProcVjsxExecutor assets_enabled bool assets_prefix string assets_root string @@ -52,6 +54,14 @@ pub mut: feishu_reconnect_delay_ms int feishu_token_refresh_skew_seconds int feishu_recent_event_limit int + openai_enabled bool + openai_base_path string + openai_default_backend string + openai_plugin string + openai_endpoints OpenAIEndpointsConfig + openai_backends map[string]OpenAIBackendConfig + openai_routes map[string]OpenAIRouteConfig + openai_responses MemoryStateStore[OpenAIResponseRecord] websocket_upstream_recent_dispatch_limit int auto_start_dynamic_upstreams bool feishu_static_apps map[string]FeishuAppConfig @@ -94,30 +104,30 @@ pub mut: websocket_upstream_recent_activities []WebSocketUpstreamActivitySnapshot provider_instance_specs map[string]ProviderInstanceSpec = map[string]ProviderInstanceSpec{} // codex upstream - codex_mu sync.Mutex - codex_runtime CodexProviderRuntime - codex_instances map[string]CodexProviderRuntime = map[string]CodexProviderRuntime{} - ollama_enabled bool - db_runtime DbProviderRuntime - feishu_buffers map[string]FeishuStreamBuffer - feishu_http_lane shared FeishuHttpLane - feishu_control_http_lane shared FeishuControlHttpLane - feishu_http_test_stub bool - feishu_http_test_delay_ms int - feishu_http_test_inflight int - feishu_http_test_calls int - feishu_http_test_message_seq int - feishu_card_bridge_mu sync.Mutex - feishu_card_bridge_send_mu sync.Mutex - feishu_card_bridge_clients map[string]&websocket.Client = map[string]&websocket.Client{} - feishu_card_bridge_pending map[string]chan FeishuCardBridgeResult = map[string]chan FeishuCardBridgeResult{} + codex_mu sync.Mutex + codex_runtime CodexProviderRuntime + codex_instances map[string]CodexProviderRuntime = map[string]CodexProviderRuntime{} + ollama_enabled bool + db_runtime DbProviderRuntime + feishu_buffers map[string]FeishuStreamBuffer + feishu_http_lane shared FeishuHttpLane + feishu_control_http_lane shared FeishuControlHttpLane + feishu_http_test_stub bool + feishu_http_test_delay_ms int + feishu_http_test_inflight int + feishu_http_test_calls int + feishu_http_test_message_seq int + feishu_card_bridge_mu sync.Mutex + feishu_card_bridge_send_mu sync.Mutex + feishu_card_bridge_clients map[string]&websocket.Client = map[string]&websocket.Client{} + feishu_card_bridge_pending map[string]chan FeishuCardBridgeResult = map[string]chan FeishuCardBridgeResult{} feishu_card_bridge_proxy_pending map[string]chan FeishuBridgeProxyResult = map[string]chan FeishuBridgeProxyResult{} - feishu_card_bridge_client_conn &websocket.Client = unsafe { nil } + feishu_card_bridge_client_conn &websocket.Client = unsafe { nil } feishu_card_bridge_enabled_flag bool - feishu_card_bridge_ws_url string - feishu_card_bridge_client_id string - feishu_card_bridge_token string - feishu_card_bridge_target_id string + feishu_card_bridge_ws_url string + feishu_card_bridge_client_id string + feishu_card_bridge_token string + feishu_card_bridge_target_id string } struct CodexTarget { @@ -283,15 +293,15 @@ struct WorkerWebSocketFrame { } struct WorkerWebSocketDispatchResponse { - mode string - event string - id string - accepted bool - closed bool - commands []WorkerWebSocketFrame + mode string + event string + id string + accepted bool + closed bool + commands []WorkerWebSocketFrame affinity_key string @[json: 'affinity_key'] - error string - error_class string @[json: 'error_class'] + error string + error_class string @[json: 'error_class'] } struct WorkerWebSocketDispatchCommandFailure { @@ -847,8 +857,7 @@ fn proxy_worker_websocket_dispatch(mut app App, mut ctx Context, method string, app.ws_hub_meta_snapshot(req_id), map[string][]string{}, map[string]map[string]string{}, map[string]int{}, map[string][]string{}) resp := app.kernel_dispatch_websocket_event(open_frame) or { - err_msg := inproc_vjsx_normalize_error_message(err.msg(), - 'inproc_vjsx_executor_websocket_open_failed') + err_msg := inproc_vjsx_normalize_error_message(err.msg(), 'inproc_vjsx_executor_websocket_open_failed') log.error('[vhttpd] kernel_dispatch_websocket_event failed trace_id=${trace_id} path=${normalized_path} error=${err_msg}') ctx.set_custom_header('x-vhttpd-trace-id', trace_id) or {} ctx.set_custom_header('x-vhttpd-error-class', 'transport_error') or {} @@ -882,8 +891,7 @@ fn proxy_worker_websocket_dispatch(mut app App, mut ctx Context, method string, ctx.conn.set_read_timeout(time.infinite) mut conn := ctx.conn spawn handle_worker_websocket_dispatch_session(mut app, mut conn, key, method.to_upper(), - normalized_path, query, headers, remote_addr, req_id, trace_id, start_ms, - resp.commands.clone()) + normalized_path, query, headers, remote_addr, req_id, trace_id, start_ms, resp.commands.clone()) return veb.no_result() } @@ -961,18 +969,18 @@ fn handle_worker_websocket_dispatch_session(mut app App, mut client_conn net.Tcp mut ws_server := websocket.new_server(.ip, 0, '') mut lifecycle := &WebSocketDispatchConnState{} mut state := &WebSocketDispatchBridgeState{ - app: &app - lifecycle: lifecycle + app: &app + lifecycle: lifecycle open_commands: open_commands.clone() - conn_id: req_id - method: method - path: path - query: query.clone() - headers: headers.clone() - remote_addr: remote_addr - request_id: req_id - trace_id: trace_id - start_ms: start_ms + conn_id: req_id + method: method + path: path + query: query.clone() + headers: headers.clone() + remote_addr: remote_addr + request_id: req_id + trace_id: trace_id + start_ms: start_ms } ws_server.on_message_ref(worker_websocket_dispatch_message_cb, state) ws_server.on_close_ref(worker_websocket_dispatch_close_cb, state) @@ -980,9 +988,7 @@ fn handle_worker_websocket_dispatch_session(mut app App, mut client_conn net.Tcp defer { worker_websocket_dispatch_finalize(state) } - ws_server.handle_handshake(mut client_conn, key) or { - return - } + ws_server.handle_handshake(mut client_conn, key) or { return } } fn worker_websocket_dispatch_attached_cb(mut sc websocket.ServerClient, ref voidptr) ! { @@ -992,8 +998,8 @@ fn worker_websocket_dispatch_attached_cb(mut sc websocket.ServerClient, ref void unsafe { mut state := &WebSocketDispatchBridgeState(ref) state.app.ws_hub_register_conn(state.conn_id, '', state.method, state.request_id, - state.trace_id, state.path, state.query, state.headers, state.remote_addr, sc.client, - state.lifecycle) + state.trace_id, state.path, state.query, state.headers, state.remote_addr, + sc.client, state.lifecycle) worker_websocket_dispatch_process_open(state) worker_websocket_dispatch_activate(state) } @@ -1109,7 +1115,10 @@ fn worker_websocket_dispatch_message_cb(mut ws websocket.Client, msg &websocket. return } if result.failures.len > 0 { - if close_frame := state.app.websocket_dispatch_followup_failures(state.conn_id, state.method, state.path, state.query, state.headers, state.remote_addr, state.request_id, state.trace_id, result.failures) { + if close_frame := state.app.websocket_dispatch_followup_failures(state.conn_id, + state.method, state.path, state.query, state.headers, state.remote_addr, state.request_id, + state.trace_id, result.failures) + { code := if close_frame.code > 0 { close_frame.code } else { 1000 } worker_websocket_dispatch_begin_local_close(mut state) ws.close(code, close_frame.reason)! @@ -1934,8 +1943,14 @@ pub fn (mut app App) events_stream(mut ctx Context) veb.Result { @['/:path...'; get] pub fn (mut app App) proxy_get(mut ctx Context, path string) veb.Result { + start_ms := time.now().unix_milli() log.info('[http] route proxy_get path=${path} url=${ctx.req.url}') target := if ctx.req.url == '' { path } else { ctx.req.url } + req_id := resolve_request_id(ctx, target) + trace_id := resolve_trace_id(ctx, target) + if result := app.openai_try_handle(mut ctx, 'GET', target, req_id, trace_id, start_ms) { + return result + } request_path, _ := normalize_request_target(target) normalized_target := normalize_path(request_path) if normalized_target == '/mcp' { @@ -1950,8 +1965,14 @@ pub fn (mut app App) proxy_get(mut ctx Context, path string) veb.Result { @['/:path...'; post] pub fn (mut app App) proxy_post(mut ctx Context, path string) veb.Result { + start_ms := time.now().unix_milli() log.info('[http] route proxy_post path=${path} url=${ctx.req.url} body_len=${ctx.req.data.len}') target := if ctx.req.url == '' { path } else { ctx.req.url } + req_id := resolve_request_id(ctx, target) + trace_id := resolve_trace_id(ctx, target) + if result := app.openai_try_handle(mut ctx, 'POST', target, req_id, trace_id, start_ms) { + return result + } request_path, _ := normalize_request_target(target) normalized_target := normalize_path(request_path) if normalized_target == '/mcp' { @@ -1966,27 +1987,45 @@ pub fn (mut app App) proxy_post(mut ctx Context, path string) veb.Result { @['/:path...'; put] pub fn (mut app App) proxy_put(mut ctx Context, path string) veb.Result { + start_ms := time.now().unix_milli() + target := if ctx.req.url == '' { path } else { ctx.req.url } + req_id := resolve_request_id(ctx, target) + trace_id := resolve_trace_id(ctx, target) + if result := app.openai_try_handle(mut ctx, 'PUT', target, req_id, trace_id, start_ms) { + return result + } if !app.has_http_logic_executor() { ctx.res.set_status(.not_found) return ctx.text('Not Found') } - target := if ctx.req.url == '' { path } else { ctx.req.url } return proxy_worker_response(mut app, mut ctx, 'PUT', target, '') } @['/:path...'; patch] pub fn (mut app App) proxy_patch(mut ctx Context, path string) veb.Result { + start_ms := time.now().unix_milli() + target := if ctx.req.url == '' { path } else { ctx.req.url } + req_id := resolve_request_id(ctx, target) + trace_id := resolve_trace_id(ctx, target) + if result := app.openai_try_handle(mut ctx, 'PATCH', target, req_id, trace_id, start_ms) { + return result + } if !app.has_http_logic_executor() { ctx.res.set_status(.not_found) return ctx.text('Not Found') } - target := if ctx.req.url == '' { path } else { ctx.req.url } return proxy_worker_response(mut app, mut ctx, 'PATCH', target, '') } @['/:path...'; delete] pub fn (mut app App) proxy_delete(mut ctx Context, path string) veb.Result { + start_ms := time.now().unix_milli() target := if ctx.req.url == '' { path } else { ctx.req.url } + req_id := resolve_request_id(ctx, target) + trace_id := resolve_trace_id(ctx, target) + if result := app.openai_try_handle(mut ctx, 'DELETE', target, req_id, trace_id, start_ms) { + return result + } if normalize_path(target) == '/mcp' { return app.mcp_delete(mut ctx) } @@ -1999,10 +2038,16 @@ pub fn (mut app App) proxy_delete(mut ctx Context, path string) veb.Result { @['/:path...'; head] pub fn (mut app App) proxy_head(mut ctx Context, path string) veb.Result { + start_ms := time.now().unix_milli() + target := if ctx.req.url == '' { path } else { ctx.req.url } + req_id := resolve_request_id(ctx, target) + trace_id := resolve_trace_id(ctx, target) + if result := app.openai_try_handle(mut ctx, 'HEAD', target, req_id, trace_id, start_ms) { + return result + } if !app.has_http_logic_executor() { ctx.res.set_status(.not_found) return ctx.text('') } - target := if ctx.req.url == '' { path } else { ctx.req.url } return proxy_worker_response(mut app, mut ctx, 'HEAD', target, '') } diff --git a/src/openai_runtime.v b/src/openai_runtime.v new file mode 100644 index 0000000..e1a4332 --- /dev/null +++ b/src/openai_runtime.v @@ -0,0 +1,2709 @@ +module main + +import json +import net +import net.http +import os +import time +import veb +import x.json2 + +const openai_response_registry_ttl = 24 * time.hour +const openai_stream_done_fetch_error = 'openai_stream_done' + +struct OpenAIModelObject { + id string + object string = 'model' + created int + owned_by string = 'vhttpd' +} + +struct OpenAIModelsResponse { + object string = 'list' + data []OpenAIModelObject +} + +struct OpenAIErrorBody { + message string + typ string @[json: 'type'] + code string +} + +struct OpenAIErrorResponse { + error OpenAIErrorBody +} + +struct OpenAIResolvedRoute { + route_name string + model string + backend_name string + upstream_model string + backend OpenAIBackendConfig +} + +struct OpenAIUpstreamPlan { + backend string + method string + path string + body string + upstream_model string @[json: 'upstream_model'] + stream_mode string @[json: 'stream_mode'] + response_codec string @[json: 'response_codec'] + output_protocol string @[json: 'output_protocol'] + mapper string + headers map[string]string +} + +struct OpenAIResolvedPlan { + backend_name string + backend OpenAIBackendConfig + method string + path string + body string + model string + stream_mode string + response_codec string + output_protocol string + mapper string + headers map[string]string +} + +struct OpenAIPluginPlanResult { + handled bool + plan OpenAIResolvedPlan +} + +struct OpenAIPluginModelsResult { + handled bool + models []string +} + +struct OpenAIResponseRecord { + id string + backend_name string + backend_kind string + executor string + model string + status string + created_at_unix i64 + updated_at_unix i64 + request_id string + trace_id string + body string +} + +@[heap] +struct OpenAIResponsesStreamRegistryState { +mut: + completed_body string +} + +struct OpenAIPluginChatPayload { + method string + path string + model string + stream bool + body string + base_path string @[json: 'base_path'] + request_id string @[json: 'request_id'] + trace_id string @[json: 'trace_id'] +} + +struct OpenAIPluginResponsesPayload { + method string + path string + model string + stream bool + body string + base_path string @[json: 'base_path'] + request_id string @[json: 'request_id'] + trace_id string @[json: 'trace_id'] +} + +struct OpenAIPluginModelsPayload { + method string + path string + base_path string @[json: 'base_path'] + request_id string @[json: 'request_id'] + trace_id string @[json: 'trace_id'] +} + +struct OpenAIPluginFallbackPayload { + method string + path string + model string + stream bool + body string + base_path string @[json: 'base_path'] + failed_backend string @[json: 'failed_backend'] + status_code int @[json: 'status_code'] + error_code string @[json: 'error_code'] + error_message string @[json: 'error_message'] + request_id string @[json: 'request_id'] + trace_id string @[json: 'trace_id'] +} + +struct OpenAIExecutorPayload { + method string + path string + model string + stream bool + body string + backend string + request_id string @[json: 'request_id'] + trace_id string @[json: 'trace_id'] + response_codec string @[json: 'response_codec'] + output_protocol string @[json: 'output_protocol'] +} + +struct OpenAIPluginMapFramePayload { + model string + frame string + response_codec string @[json: 'response_codec'] + output_protocol string @[json: 'output_protocol'] + request_id string @[json: 'request_id'] + trace_id string @[json: 'trace_id'] +} + +struct OpenAIChatStreamDelta { + content string +} + +struct OpenAIChatStreamChoice { + index int + delta OpenAIChatStreamDelta +} + +struct OpenAIChatStreamChunk { + id string + object string = 'chat.completion.chunk' + created int + model string + choices []OpenAIChatStreamChoice +} + +struct OpenAIChatMessage { + role string + content string +} + +struct OpenAIChatCompletionChoice { + index int + message OpenAIChatMessage + finish_reason string @[json: 'finish_reason'] +} + +struct OpenAIChatCompletionResponse { + id string + object string = 'chat.completion' + created int + model string + choices []OpenAIChatCompletionChoice +} + +struct OpenAIFrameMapping { + content string + tool_calls []json2.Any + usage map[string]int + done bool + handled bool + error string + finish_reason string +} + +struct OpenAIChunkDecodeState { +mut: + mode string = 'unknown' + buffer string + remaining int + need_chunk_crlf bool + done bool +} + +fn openai_hex_chunk_size(raw string) ?int { + hex_part := raw.all_before(';').trim_space() + if hex_part == '' { + return none + } + mut size := 0 + for ch in hex_part { + mut value := -1 + if ch >= `0` && ch <= `9` { + value = int(ch - `0`) + } else if ch >= `a` && ch <= `f` { + value = 10 + int(ch - `a`) + } else if ch >= `A` && ch <= `F` { + value = 10 + int(ch - `A`) + } else { + return none + } + size = (size * 16) + value + } + return size +} + +fn openai_decode_progress_chunk(mut decoder OpenAIChunkDecodeState, chunk []u8) string { + if chunk.len == 0 || decoder.done { + return '' + } + incoming := chunk.bytestr() + if decoder.mode == 'plain' { + return incoming + } + decoder.buffer += incoming + if decoder.mode == 'unknown' { + if decoder.buffer.contains('\r\n') { + first_line := decoder.buffer.all_before('\r\n') + _ := openai_hex_chunk_size(first_line) or { + decoder.mode = 'plain' + out := decoder.buffer + decoder.buffer = '' + return out + } + decoder.mode = 'chunked' + } else if decoder.buffer.contains('\n') || decoder.buffer.len > 64 { + decoder.mode = 'plain' + out := decoder.buffer + decoder.buffer = '' + return out + } else { + return '' + } + } + mut out := '' + for decoder.mode == 'chunked' && decoder.buffer.len > 0 && !decoder.done { + if decoder.need_chunk_crlf { + if decoder.buffer.len < 2 { + break + } + if decoder.buffer.starts_with('\r\n') { + decoder.buffer = decoder.buffer[2..] + } else if decoder.buffer.starts_with('\n') { + decoder.buffer = decoder.buffer[1..] + } + decoder.need_chunk_crlf = false + } + if decoder.remaining == 0 { + if !decoder.buffer.contains('\r\n') { + break + } + line := decoder.buffer.all_before('\r\n') + decoder.buffer = decoder.buffer.all_after('\r\n') + size := openai_hex_chunk_size(line) or { + decoder.mode = 'plain' + out += decoder.buffer + decoder.buffer = '' + break + } + if size == 0 { + decoder.done = true + decoder.buffer = '' + break + } + decoder.remaining = size + } + if decoder.remaining > 0 { + take := if decoder.buffer.len < decoder.remaining { + decoder.buffer.len + } else { + decoder.remaining + } + out += decoder.buffer[..take] + decoder.buffer = decoder.buffer[take..] + decoder.remaining -= take + if decoder.remaining == 0 { + decoder.need_chunk_crlf = true + } + } + } + return out +} + +@[heap] +struct OpenAIStreamProxyState { +mut: + conn net.TcpConn + method string + status_code int + content_type string + response_headers map[string]string + headers_written bool + error_body string + chunk_decoder OpenAIChunkDecodeState + done bool + done_probe string + final_written bool +} + +@[heap] +struct OpenAIMappedStreamProxyState { +mut: + app &App = unsafe { nil } + conn net.TcpConn + method string + status_code int + response_headers map[string]string + headers_written bool + line_buffer string + model string + request_id string + trace_id string + mapper string + response_codec string + output_protocol string + created int + done bool + mapper_error string + error_body string + usage map[string]int + chunk_decoder OpenAIChunkDecodeState + final_written bool +} + +fn normalize_openai_base_path(raw string) string { + mut base := normalize_path(raw.trim_space()) + for base.len > 1 && base.ends_with('/') { + base = base[..base.len - 1] + } + return base +} + +fn openai_relative_path(target string, base_path string) ?string { + request_path, _ := normalize_request_target(target) + path := normalize_path(request_path) + base := normalize_openai_base_path(base_path) + if path == base { + return '' + } + prefix := '${base}/' + if !path.starts_with(prefix) { + return none + } + return '/' + path[prefix.len..] +} + +fn openai_relative_target(target string, base_path string) ?string { + request_path, query := normalize_request_target(target) + path := normalize_path(request_path) + base := normalize_openai_base_path(base_path) + mut relative := '' + if path == base { + relative = '' + } else { + prefix := '${base}/' + if !path.starts_with(prefix) { + return none + } + relative = '/' + path[prefix.len..] + } + if query == '' { + return relative + } + return '${relative}?${query}' +} + +fn openai_response_content_type(header http.Header, fallback string) string { + return header.get(.content_type) or { fallback } +} + +fn openai_is_stream_request(body string) bool { + parsed := json2.decode[json2.Any](body) or { return false } + root := parsed.as_map() + stream_any := root['stream'] or { return false } + return stream_any.bool() +} + +fn openai_is_stream_target(target string) bool { + _, query := normalize_request_target(target) + if query == '' { + return false + } + params := parse_query_map(query) + stream := params['stream'] or { return false } + return stream.to_lower() in ['1', 'true', 'yes'] +} + +fn openai_request_model(body string) string { + parsed := json2.decode[json2.Any](body) or { return '' } + root := parsed.as_map() + return (root['model'] or { json2.Any('') }).str() +} + +fn openai_response_id_from_body(body string) string { + parsed := json2.decode[json2.Any](body) or { return '' } + root := parsed.as_map() + if (root['object'] or { json2.Any('') }).str() != 'response' { + return '' + } + return (root['id'] or { json2.Any('') }).str().trim_space() +} + +fn openai_response_status_from_body(body string) string { + parsed := json2.decode[json2.Any](body) or { return '' } + root := parsed.as_map() + return (root['status'] or { json2.Any('') }).str() +} + +fn openai_response_id_from_relative(relative string) string { + path := normalize_path(relative.all_before('?')) + prefix := '/responses/' + if !path.starts_with(prefix) { + return '' + } + rest := path[prefix.len..] + if rest.trim_space() == '' { + return '' + } + return rest.split('/')[0].trim_space() +} + +fn openai_response_registry_record(plan OpenAIResolvedPlan, response_id string, body string, req_id string, trace_id string) OpenAIResponseRecord { + now := time.now().unix() + status := openai_response_status_from_body(body) + return OpenAIResponseRecord{ + id: response_id + backend_name: plan.backend_name + backend_kind: plan.backend.kind + executor: plan.backend.executor + model: plan.model + status: if status == '' { 'completed' } else { status } + created_at_unix: now + updated_at_unix: now + request_id: req_id + trace_id: trace_id + body: body + } +} + +fn (mut app App) openai_store_response_record(plan OpenAIResolvedPlan, body string, req_id string, trace_id string) string { + response_id := openai_response_id_from_body(body) + if response_id == '' { + return '' + } + record := openai_response_registry_record(plan, response_id, body, req_id, trace_id) + app.openai_responses.set_with_ttl(response_id, record, openai_response_registry_ttl) or {} + return response_id +} + +fn openai_replace_model_in_body(body string, upstream_model string) string { + if upstream_model.trim_space() == '' { + return body + } + parsed := json2.decode[json2.Any](body) or { return body } + mut root := parsed.as_map() + root['model'] = json2.Any(upstream_model) + return json2.Any(root).json_str() +} + +fn openai_route_models(route OpenAIRouteConfig, route_name string) []string { + mut models := []string{} + for raw in route.models { + model := raw.trim_space() + if model != '' && model !in models { + models << model + } + } + if route.model.trim_space() != '' && route.model !in models { + models << route.model.trim_space() + } + if models.len == 0 && route_name.trim_space() != '' { + models << route_name.trim_space() + } + return models +} + +fn (app &App) openai_models() []string { + mut models := []string{} + for name, route in app.openai_routes { + for model in openai_route_models(route, name) { + if model !in models { + models << model + } + } + } + models.sort() + return models +} + +fn (app &App) openai_resolve_route(model string) !OpenAIResolvedRoute { + requested := model.trim_space() + if requested != '' { + for name, route in app.openai_routes { + if requested in openai_route_models(route, name) { + backend_name := if route.backend.trim_space() != '' { + route.backend.trim_space() + } else { + app.openai_default_backend.trim_space() + } + if backend_name == '' { + return error('missing backend for model ${requested}') + } + backend := app.openai_backends[backend_name] or { + return error('unknown backend ${backend_name}') + } + upstream_model := if route.upstream_model.trim_space() != '' { + route.upstream_model.trim_space() + } else { + requested + } + return OpenAIResolvedRoute{ + route_name: name + model: requested + backend_name: backend_name + upstream_model: upstream_model + backend: backend + } + } + } + } + backend_name := app.openai_default_backend.trim_space() + if backend_name == '' { + return error('no matching route for model ${requested}') + } + backend := app.openai_backends[backend_name] or { + return error('unknown backend ${backend_name}') + } + return OpenAIResolvedRoute{ + model: requested + backend_name: backend_name + upstream_model: requested + backend: backend + } +} + +fn openai_builtin_plan_from_route_for_endpoint_method(route OpenAIResolvedRoute, body string, upstream_path string, output_protocol string, method string) OpenAIResolvedPlan { + return OpenAIResolvedPlan{ + backend_name: route.backend_name + backend: route.backend + method: method.to_upper() + path: upstream_path + body: openai_replace_model_in_body(body, route.upstream_model) + model: route.model + stream_mode: 'passthrough' + response_codec: 'sse' + output_protocol: output_protocol + mapper: 'builtin' + headers: map[string]string{} + } +} + +fn openai_builtin_plan_from_route_for_endpoint(route OpenAIResolvedRoute, body string, upstream_path string, output_protocol string) OpenAIResolvedPlan { + return openai_builtin_plan_from_route_for_endpoint_method(route, body, upstream_path, + output_protocol, 'POST') +} + +fn openai_builtin_plan_from_route(route OpenAIResolvedRoute, body string) OpenAIResolvedPlan { + return openai_builtin_plan_from_route_for_endpoint(route, body, '/chat/completions', + 'openai.chat.completion') +} + +fn openai_json_string_field(obj map[string]json2.Any, key string, default_val string) string { + value := obj[key] or { return default_val } + text := value.str() + if text == '' { + return default_val + } + return text +} + +fn openai_json_string_map_field(obj map[string]json2.Any, key string) map[string]string { + mut out := map[string]string{} + value := obj[key] or { return out } + for name, item in value.as_map() { + out[name] = item.str() + } + return out +} + +fn openai_plan_error(code string, message string) IError { + return error('${code}:${message}') +} + +fn openai_plan_error_code(err_msg string) string { + if err_msg.starts_with('openai_plugin_plan_') && err_msg.contains(':') { + return err_msg.all_before(':') + } + if err_msg.starts_with('openai_plugin_') && err_msg.contains(':') { + return err_msg.all_before(':') + } + if err_msg.starts_with('unknown backend ') { + return 'openai_plugin_plan_unknown_backend' + } + return 'model_not_found' +} + +fn openai_plan_error_message(err_msg string) string { + if (err_msg.starts_with('openai_plugin_plan_') || err_msg.starts_with('openai_plugin_')) + && err_msg.contains(':') { + return err_msg.all_after(':') + } + return err_msg +} + +fn openai_validate_plan_method(raw string) !string { + method := raw.trim_space().to_upper() + if method == '' { + return 'POST' + } + if method in ['GET', 'POST', 'PUT', 'PATCH', 'DELETE', 'HEAD'] { + return method + } + return openai_plan_error('openai_plugin_plan_invalid_method', 'unsupported upstream method ${method}') +} + +fn openai_validate_plan_path(raw string) !string { + path := raw.trim_space() + if path == '' { + return '/chat/completions' + } + if !path.starts_with('/') { + return openai_plan_error('openai_plugin_plan_invalid_path', 'upstream path must start with /') + } + if path.contains('\r') || path.contains('\n') { + return openai_plan_error('openai_plugin_plan_invalid_path', 'upstream path must not contain newlines') + } + return path +} + +fn openai_validate_stream_mode(raw string) !string { + mode := raw.trim_space() + if mode == '' { + return 'passthrough' + } + if mode in ['passthrough', 'mapped', 'executor'] { + return mode + } + return openai_plan_error('openai_plugin_plan_unsupported_stream_mode', 'unsupported stream_mode ${mode}') +} + +fn openai_validate_response_codec(raw string, stream_mode string) !string { + codec := raw.trim_space() + if codec == '' { + return if stream_mode == 'mapped' { 'ndjson' } else { 'sse' } + } + if codec in ['sse', 'json', 'ndjson', 'text'] { + return codec + } + return openai_plan_error('openai_plugin_plan_unsupported_response_codec', 'unsupported response_codec ${codec}') +} + +fn openai_validate_output_protocol(raw string, stream_mode string) !string { + protocol := raw.trim_space() + if protocol == '' { + return 'openai.chat.completion' + } + if stream_mode == 'mapped' && protocol != 'openai.chat.completion' { + return openai_plan_error('openai_plugin_plan_unsupported_output_protocol', 'unsupported output_protocol ${protocol}') + } + return protocol +} + +fn openai_validate_mapper(raw string) !string { + mapper := raw.trim_space() + if mapper == '' { + return 'builtin' + } + if mapper in ['builtin', 'plugin'] { + return mapper + } + return openai_plan_error('openai_plugin_plan_unsupported_mapper', 'unsupported mapper ${mapper}') +} + +fn openai_sanitize_plan_headers(headers map[string]string) map[string]string { + mut out := map[string]string{} + for name, value in headers { + lower := name.trim_space().to_lower() + if lower == '' + || lower in ['connection', 'content-length', 'transfer-encoding', 'host', 'server', 'upgrade', 'proxy-connection', 'keep-alive', 'te', 'trailer'] { + continue + } + if lower.contains('\r') || lower.contains('\n') || value.contains('\r') + || value.contains('\n') { + continue + } + out[name] = value + } + return out +} + +fn openai_plugin_not_handled(raw string) bool { + parsed := json2.decode[json2.Any](raw) or { return false } + root := parsed.as_map() + for key in ['not_handled', 'notHandled'] { + value := root[key] or { continue } + if value.bool() { + return true + } + } + return false +} + +fn openai_upstream_plan_from_plugin_json_with_defaults(raw string, default_path string, default_output_protocol string) !OpenAIUpstreamPlan { + parsed := json2.decode[json2.Any](raw)! + mut root := parsed.as_map() + if plan_any := root['plan'] { + root = plan_any.as_map() + } + body := if body_any := root['body'] { body_any.str() } else { '' } + return OpenAIUpstreamPlan{ + backend: openai_json_string_field(root, 'backend', '') + method: openai_json_string_field(root, 'method', 'POST') + path: openai_json_string_field(root, 'path', default_path) + body: body + upstream_model: openai_json_string_field(root, 'upstream_model', '') + stream_mode: openai_json_string_field(root, 'stream_mode', 'passthrough') + response_codec: openai_json_string_field(root, 'response_codec', '') + output_protocol: openai_json_string_field(root, 'output_protocol', default_output_protocol) + mapper: openai_json_string_field(root, 'mapper', '') + headers: openai_json_string_map_field(root, 'headers') + } +} + +fn openai_upstream_plan_from_plugin_json(raw string) !OpenAIUpstreamPlan { + return openai_upstream_plan_from_plugin_json_with_defaults(raw, '/chat/completions', + 'openai.chat.completion') +} + +fn openai_models_from_plugin_json(raw string) ![]string { + parsed := json2.decode[json2.Any](raw)! + root := parsed.as_map() + mut models := []string{} + if models_any := root['models'] { + for item in models_any.as_array() { + model := item.str().trim_space() + if model != '' && model !in models { + models << model + } + } + } + if data_any := root['data'] { + for item in data_any.as_array() { + row := item.as_map() + model := (row['id'] or { json2.Any('') }).str().trim_space() + if model != '' && model !in models { + models << model + } + } + } + models.sort() + return models +} + +fn (mut app App) openai_call_plugin(op string, payload string, req_id string, trace_id string, metadata map[string]string) !PluginCallResponse { + plugin_name := app.openai_plugin.trim_space() + if plugin_name == '' { + return error('openai_plugin_not_configured') + } + return app.call_plugin(PluginCallRequest{ + plugin: plugin_name + capability: 'openai' + op: op + request_id: req_id + trace_id: trace_id + payload: payload + metadata: metadata + }) +} + +fn (mut app App) openai_plugin_models(method string, path string, req_id string, trace_id string) !OpenAIPluginModelsResult { + resp := app.openai_call_plugin('models', json.encode(OpenAIPluginModelsPayload{ + method: method.to_upper() + path: path + base_path: app.openai_base_path + request_id: req_id + trace_id: trace_id + }), req_id, trace_id, map[string]string{})! + if openai_plugin_not_handled(resp.result) { + return OpenAIPluginModelsResult{} + } + return OpenAIPluginModelsResult{ + handled: true + models: openai_models_from_plugin_json(resp.result)! + } +} + +fn (mut app App) openai_resolved_plan_from_plugin_result_with_defaults(model string, body string, raw string, default_path string, default_output_protocol string) !OpenAIResolvedPlan { + plan := openai_upstream_plan_from_plugin_json_with_defaults(raw, default_path, default_output_protocol)! + backend_name := plan.backend.trim_space() + if backend_name == '' { + return openai_plan_error('openai_plugin_plan_missing_backend', 'plugin plan must include backend') + } + backend := app.openai_backends[backend_name] or { + return openai_plan_error('openai_plugin_plan_unknown_backend', 'unknown backend ${backend_name}') + } + plan_method := openai_validate_plan_method(plan.method)! + plan_path := openai_validate_plan_path(plan.path)! + stream_mode := openai_validate_stream_mode(plan.stream_mode)! + response_codec := openai_validate_response_codec(plan.response_codec, stream_mode)! + output_protocol := openai_validate_output_protocol(plan.output_protocol, stream_mode)! + mapper := openai_validate_mapper(plan.mapper)! + plan_headers := openai_sanitize_plan_headers(plan.headers) + plan_body := if plan.body.trim_space() != '' { + plan.body + } else { + openai_replace_model_in_body(body, plan.upstream_model) + } + return OpenAIResolvedPlan{ + backend_name: backend_name + backend: backend + method: plan_method + path: plan_path + body: plan_body + model: model + stream_mode: stream_mode + response_codec: response_codec + output_protocol: output_protocol + mapper: mapper + headers: plan_headers + } +} + +fn (mut app App) openai_resolved_plan_from_plugin_result(model string, body string, raw string) !OpenAIResolvedPlan { + return app.openai_resolved_plan_from_plugin_result_with_defaults(model, body, raw, + '/chat/completions', 'openai.chat.completion') +} + +fn (mut app App) openai_plugin_plan(model string, body string, method string, path string, req_id string, trace_id string) !OpenAIPluginPlanResult { + resp := app.openai_call_plugin('chat.route', json.encode(OpenAIPluginChatPayload{ + method: method.to_upper() + path: path + model: model + stream: openai_is_stream_request(body) + body: body + base_path: app.openai_base_path + request_id: req_id + trace_id: trace_id + }), req_id, trace_id, { + 'model': model + })! + if openai_plugin_not_handled(resp.result) { + return OpenAIPluginPlanResult{} + } + return OpenAIPluginPlanResult{ + handled: true + plan: app.openai_resolved_plan_from_plugin_result(model, body, resp.result)! + } +} + +fn (mut app App) openai_plugin_responses_plan(model string, body string, method string, path string, req_id string, trace_id string) !OpenAIPluginPlanResult { + resp := app.openai_call_plugin('responses.route', json.encode(OpenAIPluginResponsesPayload{ + method: method.to_upper() + path: path + model: model + stream: openai_is_stream_request(body) + body: body + base_path: app.openai_base_path + request_id: req_id + trace_id: trace_id + }), req_id, trace_id, { + 'model': model + })! + if openai_plugin_not_handled(resp.result) { + return OpenAIPluginPlanResult{} + } + return OpenAIPluginPlanResult{ + handled: true + plan: app.openai_resolved_plan_from_plugin_result_with_defaults(model, body, + resp.result, '/responses', 'openai.response')! + } +} + +fn (mut app App) openai_plugin_fallback_plan(model string, body string, method string, path string, failed_plan OpenAIResolvedPlan, status_code int, error_code string, error_message string, req_id string, trace_id string) !OpenAIPluginPlanResult { + if app.openai_plugin.trim_space() == '' { + return OpenAIPluginPlanResult{} + } + resp := app.openai_call_plugin('chat.fallback', json.encode(OpenAIPluginFallbackPayload{ + method: method.to_upper() + path: path + model: model + stream: openai_is_stream_request(body) + body: body + base_path: app.openai_base_path + failed_backend: failed_plan.backend_name + status_code: status_code + error_code: error_code + error_message: error_message + request_id: req_id + trace_id: trace_id + }), req_id, trace_id, { + 'model': model + 'failed_backend': failed_plan.backend_name + })! + if openai_plugin_not_handled(resp.result) { + return OpenAIPluginPlanResult{} + } + return OpenAIPluginPlanResult{ + handled: true + plan: app.openai_resolved_plan_from_plugin_result(model, body, resp.result)! + } +} + +fn (mut app App) openai_call_executor_op(plan OpenAIResolvedPlan, op string, method string, path string, req_id string, trace_id string) !PluginCallResponse { + executor_name := plan.backend.executor.trim_space() + if executor_name == '' { + return error('openai_executor_missing_name:${plan.backend_name}') + } + return app.call_plugin(PluginCallRequest{ + plugin: executor_name + capability: 'openai' + op: op + request_id: req_id + trace_id: trace_id + payload: json.encode(OpenAIExecutorPayload{ + method: method.to_upper() + path: path + model: plan.model + stream: openai_is_stream_request(plan.body) + body: plan.body + backend: plan.backend_name + request_id: req_id + trace_id: trace_id + response_codec: plan.response_codec + output_protocol: plan.output_protocol + }) + metadata: { + 'model': plan.model + 'backend': plan.backend_name + } + }) +} + +fn (mut app App) openai_call_executor(plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string) !PluginCallResponse { + return app.openai_call_executor_op(plan, 'chat.execute', method, path, req_id, trace_id) +} + +fn (mut app App) openai_call_executor_stream_op(plan OpenAIResolvedPlan, op string, method string, path string, req_id string, trace_id string, on_frame PluginStreamFrameFn) !PluginStreamCallResponse { + executor_name := plan.backend.executor.trim_space() + if executor_name == '' { + return error('openai_executor_missing_name:${plan.backend_name}') + } + return app.call_plugin_stream(PluginCallRequest{ + plugin: executor_name + capability: 'openai' + op: op + request_id: req_id + trace_id: trace_id + payload: json.encode(OpenAIExecutorPayload{ + method: method.to_upper() + path: path + model: plan.model + stream: openai_is_stream_request(plan.body) + body: plan.body + backend: plan.backend_name + request_id: req_id + trace_id: trace_id + response_codec: plan.response_codec + output_protocol: plan.output_protocol + }) + metadata: { + 'model': plan.model + 'backend': plan.backend_name + } + }, on_frame) +} + +fn (mut app App) openai_call_executor_stream(plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string, on_frame PluginStreamFrameFn) !PluginStreamCallResponse { + return app.openai_call_executor_stream_op(plan, 'chat.execute', method, path, req_id, + trace_id, on_frame) +} + +fn (mut app App) openai_resolve_plan(model string, body string, method string, path string, req_id string, trace_id string) !OpenAIResolvedPlan { + if app.openai_plugin.trim_space() != '' { + result := app.openai_plugin_plan(model, body, method, path, req_id, trace_id)! + if result.handled { + return result.plan + } + } + route := app.openai_resolve_route(model)! + return openai_builtin_plan_from_route(route, body) +} + +fn (mut app App) openai_resolve_responses_plan(model string, body string, method string, path string, req_id string, trace_id string) !OpenAIResolvedPlan { + if app.openai_plugin.trim_space() != '' { + result := app.openai_plugin_responses_plan(model, body, method, path, req_id, + trace_id)! + if result.handled { + return result.plan + } + } + route := app.openai_resolve_route(model)! + return openai_builtin_plan_from_route_for_endpoint(route, body, '/responses', 'openai.response') +} + +fn (mut app App) openai_resolve_responses_passthrough_plan(relative_target string, body string, method string) !OpenAIResolvedPlan { + model := openai_request_model(body) + if model.trim_space() != '' { + route := app.openai_resolve_route(model)! + return openai_builtin_plan_from_route_for_endpoint_method(route, body, relative_target, + 'openai.response', method) + } + backend_name := app.openai_default_backend.trim_space() + if backend_name == '' { + return error('openai_responses_passthrough_missing_default_backend') + } + backend := app.openai_backends[backend_name] or { + return error('unknown backend ${backend_name}') + } + return OpenAIResolvedPlan{ + backend_name: backend_name + backend: backend + method: method.to_upper() + path: relative_target + body: body + model: model + stream_mode: 'passthrough' + response_codec: 'sse' + output_protocol: 'openai.response' + mapper: 'builtin' + headers: map[string]string{} + } +} + +fn openai_error(mut app App, mut ctx Context, status int, path string, method string, req_id string, trace_id string, start_ms i64, code string, message string) veb.Result { + return openai_error_typed(mut app, mut ctx, status, path, method, req_id, trace_id, + start_ms, code, message, 'invalid_request_error') +} + +fn openai_error_typed(mut app App, mut ctx Context, status int, path string, method string, req_id string, trace_id string, start_ms i64, code string, message string, typ string) veb.Result { + body := openai_error_body_json(code, message, typ) + ctx.res.set_status(http.status_from_int(status)) + ctx.set_content_type('application/json; charset=utf-8') + ctx.set_custom_header('x-request-id', req_id) or {} + ctx.set_custom_header('x-vhttpd-trace-id', trace_id) or {} + app.emit('http.request', { + 'method': method.to_upper() + 'path': normalize_path(path) + 'status': '${status}' + 'request_id': req_id + 'trace_id': trace_id + 'duration_ms': '${time.now().unix_milli() - start_ms}' + 'provider': 'openai' + }) + return ctx.text(body) +} + +fn openai_error_body_json(code string, message string, typ string) string { + return json.encode(OpenAIErrorResponse{ + error: OpenAIErrorBody{ + message: message + typ: typ + code: code + } + }) +} + +fn openai_upstream_error_from_body(body string, fallback_code string, fallback_message string) (string, string, string) { + parsed := json2.decode[json2.Any](body) or { + trimmed := body.trim_space() + return fallback_code, if trimmed == '' { + fallback_message + } else { + trimmed + }, 'server_error' + } + root := parsed.as_map() + if error_any := root['error'] { + error_obj := error_any.as_map() + message := (error_obj['message'] or { json2.Any(fallback_message) }).str() + code := (error_obj['code'] or { json2.Any(fallback_code) }).str() + typ := (error_obj['type'] or { json2.Any('server_error') }).str() + return if code == '' { fallback_code } else { code }, if message == '' { + fallback_message + } else { + message + }, if typ == '' { + 'server_error' + } else { + typ + } + } + message := (root['message'] or { json2.Any(fallback_message) }).str() + code := (root['code'] or { json2.Any(fallback_code) }).str() + typ := (root['type'] or { json2.Any('server_error') }).str() + return if code == '' { fallback_code } else { code }, if message == '' { + fallback_message + } else { + message + }, if typ == '' { + 'server_error' + } else { + typ + } +} + +fn openai_write_error_response_conn(mut conn net.TcpConn, status int, headers map[string]string, code string, message string, typ string) { + write_http_stream_headers_conn(mut conn, status, 'application/json; charset=utf-8', + headers, false) or {} + conn.write_string(openai_error_body_json(code, message, typ)) or {} +} + +fn openai_write_sse_error(mut conn net.TcpConn, code string, message string, typ string) { + write_chunk(mut conn, 'data: ${openai_error_body_json(code, message, typ)}\n\n') or {} + write_chunk(mut conn, 'data: [DONE]\n\n') or {} +} + +fn openai_finish_passthrough_stream(mut state OpenAIStreamProxyState) ! { + if state.headers_written && !state.final_written { + write_final_chunk(mut state.conn)! + state.final_written = true + } +} + +fn openai_finish_mapped_stream(mut state OpenAIMappedStreamProxyState) ! { + if state.headers_written && !state.final_written { + write_final_chunk(mut state.conn)! + state.final_written = true + } +} + +fn openai_passthrough_chunk_has_done(mut state OpenAIStreamProxyState, decoded string) bool { + if decoded == '' { + return false + } + combined := state.done_probe + decoded + if combined.contains('data: [DONE]') { + state.done = true + return true + } + state.done_probe = if combined.len > 64 { combined[combined.len - 64..] } else { combined } + return false +} + +fn openai_build_upstream_url(base_url string, relative string) string { + mut base := base_url.trim_space() + for base.ends_with('/') { + base = base[..base.len - 1] + } + return '${base}${relative}' +} + +fn openai_backend_auth_key(backend OpenAIBackendConfig) string { + if backend.api_key.trim_space() != '' { + return backend.api_key.trim_space() + } + if backend.api_key_env.trim_space() != '' { + return os.getenv(backend.api_key_env.trim_space()) + } + return '' +} + +fn openai_http_method(raw string, fallback string) http.Method { + return match raw.trim_space().to_upper() { + 'GET' { + .get + } + 'PUT' { + .put + } + 'PATCH' { + .patch + } + 'DELETE' { + .delete + } + 'HEAD' { + .head + } + else { + match fallback.trim_space().to_upper() { + 'HEAD' { .head } + else { .post } + } + } + } +} + +fn openai_build_headers(mut ctx Context, backend OpenAIBackendConfig, req_id string, stream bool, extra map[string]string) http.Header { + mut header := http.new_header() + content_type := ctx.req.header.get(.content_type) or { 'application/json' } + accept := if stream { 'text/event-stream' } else { ctx.req.header.get(.accept) or { + 'application/json'} } + header.add(.content_type, content_type) + header.add(.accept, accept) + header.add_custom('x-request-id', req_id) or {} + api_key := openai_backend_auth_key(backend) + if api_key != '' { + header.add(.authorization, 'Bearer ${api_key}') + } + for name, value in extra { + if name.trim_space() != '' { + header.add_custom(name, value) or {} + } + } + return header +} + +fn ensure_openai_stream_headers_written(mut state OpenAIStreamProxyState) ! { + if state.headers_written { + return + } + mut headers := state.response_headers.clone() + headers['x-accel-buffering'] = 'no' + write_http_stream_headers_conn_with_close(mut state.conn, state.status_code, state.content_type, + headers, true, false)! + state.headers_written = true +} + +fn openai_progress_body_cb(request &http.Request, chunk []u8, _body_read_so_far u64, _body_expected_size u64, status_code int) ! { + mut state := &OpenAIStreamProxyState(unsafe { nil }) + pstate := unsafe { &voidptr(&state) } + unsafe { + *pstate = request.user_ptr + } + if status_code > 0 { + state.status_code = status_code + } + decoded := openai_decode_progress_chunk(mut state.chunk_decoder, chunk) + if state.status_code >= 400 { + if decoded.len > 0 { + state.error_body += decoded + } + return + } + ensure_openai_stream_headers_written(mut state)! + if state.method.to_upper() != 'HEAD' && decoded.len > 0 { + write_chunk(mut state.conn, decoded)! + } + if openai_passthrough_chunk_has_done(mut state, decoded) { + openai_finish_passthrough_stream(mut state)! + return error(openai_stream_done_fetch_error) + } +} + +fn ensure_openai_mapped_stream_headers_written(mut state OpenAIMappedStreamProxyState) ! { + if state.headers_written { + return + } + mut headers := state.response_headers.clone() + headers['x-accel-buffering'] = 'no' + write_http_stream_headers_conn_with_close(mut state.conn, state.status_code, 'text/event-stream', + headers, true, false)! + state.headers_written = true +} + +fn openai_extract_mapped_row(line string) OpenAIFrameMapping { + parsed := json2.decode[json2.Any](line) or { return OpenAIFrameMapping{} } + root := parsed.as_map() + done := (root['done'] or { json2.Any(false) }).bool() + mut tool_calls := []json2.Any{} + usage := openai_usage_from_map(root) + if message_any := root['message'] { + message := message_any.as_map() + content := (message['content'] or { json2.Any('') }).str() + if tool_calls_any := message['tool_calls'] { + tool_calls = tool_calls_any.as_array() + } + return OpenAIFrameMapping{ + content: content + tool_calls: tool_calls + usage: usage + done: done + handled: true + finish_reason: if tool_calls.len > 0 { 'tool_calls' } else { '' } + } + } + if tool_calls_any := root['tool_calls'] { + tool_calls = tool_calls_any.as_array() + return OpenAIFrameMapping{ + tool_calls: tool_calls + usage: usage + done: done + handled: true + finish_reason: if tool_calls.len > 0 { 'tool_calls' } else { '' } + } + } + if response_any := root['response'] { + return OpenAIFrameMapping{ + content: response_any.str() + usage: usage + done: done + handled: true + } + } + if content_any := root['content'] { + return OpenAIFrameMapping{ + content: content_any.str() + usage: usage + done: done + handled: true + } + } + return OpenAIFrameMapping{ + usage: usage + done: done + handled: true + } +} + +fn openai_int_field(obj map[string]json2.Any, key string) int { + value := obj[key] or { return 0 } + return value.int() +} + +fn openai_usage_from_map(root map[string]json2.Any) map[string]int { + if usage_any := root['usage'] { + usage := usage_any.as_map() + prompt := openai_int_field(usage, 'prompt_tokens') + completion := openai_int_field(usage, 'completion_tokens') + total_raw := openai_int_field(usage, 'total_tokens') + total := if total_raw > 0 { total_raw } else { prompt + completion } + if prompt > 0 || completion > 0 || total > 0 { + return { + 'prompt_tokens': prompt + 'completion_tokens': completion + 'total_tokens': total + } + } + } + prompt := openai_int_field(root, 'prompt_tokens') + openai_int_field(root, 'prompt_eval_count') + completion := openai_int_field(root, 'completion_tokens') + openai_int_field(root, 'eval_count') + total_raw := openai_int_field(root, 'total_tokens') + total := if total_raw > 0 { total_raw } else { prompt + completion } + if prompt > 0 || completion > 0 || total > 0 { + return { + 'prompt_tokens': prompt + 'completion_tokens': completion + 'total_tokens': total + } + } + return map[string]int{} +} + +fn openai_merge_usage(mut acc map[string]int, usage map[string]int) { + for key, value in usage { + if value > 0 { + acc[key] = value + } + } +} + +fn openai_stream_chunk_json(state OpenAIMappedStreamProxyState, mapping OpenAIFrameMapping) string { + mut delta := map[string]json2.Any{} + if mapping.content != '' { + delta['content'] = json2.Any(mapping.content) + } + if mapping.tool_calls.len > 0 { + delta['tool_calls'] = json2.Any(mapping.tool_calls) + } + mut choice := map[string]json2.Any{} + choice['index'] = json2.Any(0) + choice['delta'] = json2.Any(delta) + if mapping.finish_reason != '' { + choice['finish_reason'] = json2.Any(mapping.finish_reason) + } + mut root := map[string]json2.Any{} + root['id'] = json2.Any('chatcmpl-${state.request_id}') + root['object'] = json2.Any('chat.completion.chunk') + root['created'] = json2.Any(state.created) + root['model'] = json2.Any(state.model) + root['choices'] = json2.Any([json2.Any(choice)]) + return json2.Any(root).json_str() +} + +fn openai_usage_json_obj(usage map[string]int) map[string]json2.Any { + return { + 'prompt_tokens': json2.Any(usage['prompt_tokens']) + 'completion_tokens': json2.Any(usage['completion_tokens']) + 'total_tokens': json2.Any(usage['total_tokens']) + } +} + +fn openai_stream_usage_chunk_json(state OpenAIMappedStreamProxyState) string { + mut root := map[string]json2.Any{} + root['id'] = json2.Any('chatcmpl-${state.request_id}') + root['object'] = json2.Any('chat.completion.chunk') + root['created'] = json2.Any(state.created) + root['model'] = json2.Any(state.model) + root['choices'] = json2.Any([]json2.Any{}) + root['usage'] = json2.Any(openai_usage_json_obj(state.usage)) + return json2.Any(root).json_str() +} + +fn openai_write_stream_usage_chunk(mut state OpenAIMappedStreamProxyState) ! { + if state.usage.len == 0 { + return + } + ensure_openai_mapped_stream_headers_written(mut state)! + write_chunk(mut state.conn, 'data: ${openai_stream_usage_chunk_json(state)}\n\n')! +} + +fn openai_tool_call_index(call map[string]json2.Any, fallback int) int { + index_any := call['index'] or { return fallback } + return index_any.int() +} + +fn openai_merge_tool_call(existing map[string]json2.Any, incoming map[string]json2.Any) map[string]json2.Any { + mut merged := existing.clone() + for key in ['id', 'type', 'index'] { + if value := incoming[key] { + if key == 'index' || value.str() != '' { + merged[key] = value + } + } + } + if incoming_fn_any := incoming['function'] { + incoming_fn := incoming_fn_any.as_map() + mut fn_obj := if existing_fn_any := merged['function'] { + existing_fn_any.as_map() + } else { + map[string]json2.Any{} + } + if name_any := incoming_fn['name'] { + name := name_any.str() + if name != '' { + fn_obj['name'] = json2.Any(name) + } + } + if args_any := incoming_fn['arguments'] { + args := args_any.str() + if args != '' { + prev := (fn_obj['arguments'] or { json2.Any('') }).str() + fn_obj['arguments'] = json2.Any(prev + args) + } + } + merged['function'] = json2.Any(fn_obj) + } + return merged +} + +fn openai_merge_tool_calls(mut acc []json2.Any, calls []json2.Any) { + for call_any in calls { + call := call_any.as_map() + index := openai_tool_call_index(call, acc.len) + mut found := -1 + for i, existing_any in acc { + existing := existing_any.as_map() + if openai_tool_call_index(existing, i) == index { + found = i + break + } + } + if found < 0 { + acc << json2.Any(call) + continue + } + acc[found] = json2.Any(openai_merge_tool_call(acc[found].as_map(), call)) + } +} + +fn openai_plugin_map_frame_result(raw string) OpenAIFrameMapping { + if openai_plugin_not_handled(raw) { + return OpenAIFrameMapping{} + } + parsed := json2.decode[json2.Any](raw) or { + return OpenAIFrameMapping{ + handled: true + error: 'invalid mapper response' + } + } + root := parsed.as_map() + if error_any := root['error'] { + error_obj := error_any.as_map() + if error_obj.len > 0 { + message := (error_obj['message'] or { json2.Any('mapper error') }).str() + return OpenAIFrameMapping{ + done: true + handled: true + error: message + } + } + err_msg := error_any.str() + return OpenAIFrameMapping{ + done: true + handled: true + error: if err_msg == '' { 'mapper error' } else { err_msg } + } + } + content := (root['content'] or { json2.Any('') }).str() + mut tool_calls := []json2.Any{} + if tool_calls_any := root['tool_calls'] { + tool_calls = tool_calls_any.as_array() + } + usage := openai_usage_from_map(root) + done := (root['done'] or { json2.Any(false) }).bool() + return OpenAIFrameMapping{ + content: content + tool_calls: tool_calls + usage: usage + done: done + handled: true + finish_reason: openai_json_string_field(root, 'finish_reason', if tool_calls.len > 0 { + 'tool_calls' + } else { + '' + }) + } +} + +fn (mut app App) openai_plugin_map_frame(plan OpenAIResolvedPlan, frame string, req_id string, trace_id string) !OpenAIFrameMapping { + resp := app.openai_call_plugin('chat.map_frame', json.encode(OpenAIPluginMapFramePayload{ + model: plan.model + frame: frame + response_codec: plan.response_codec + output_protocol: plan.output_protocol + request_id: req_id + trace_id: trace_id + }), req_id, trace_id, { + 'model': plan.model + 'mapper': 'plugin' + })! + return openai_plugin_map_frame_result(resp.result) +} + +fn openai_map_line_with_plugin(mut state OpenAIMappedStreamProxyState, line string) OpenAIFrameMapping { + mut app := unsafe { &App(state.app) } + return app.openai_plugin_map_frame(OpenAIResolvedPlan{ + model: state.model + response_codec: state.response_codec + output_protocol: state.output_protocol + }, line, state.request_id, state.trace_id) or { + return OpenAIFrameMapping{ + done: true + handled: true + error: err.msg() + } + } +} + +fn openai_write_mapped_stream_line(mut state OpenAIMappedStreamProxyState, line string) ! { + trimmed := line.trim_space() + if trimmed == '' { + return + } + mapping := if state.mapper == 'plugin' { + plugin_mapping := openai_map_line_with_plugin(mut state, trimmed) + if plugin_mapping.error != '' { + state.mapper_error = plugin_mapping.error + } + if plugin_mapping.handled { + plugin_mapping + } else { + openai_extract_mapped_row(trimmed) + } + } else { + openai_extract_mapped_row(trimmed) + } + if mapping.error != '' { + state.mapper_error = mapping.error + ensure_openai_mapped_stream_headers_written(mut state)! + openai_write_sse_error(mut state.conn, 'mapper_error', mapping.error, 'server_error') + state.done = true + openai_finish_mapped_stream(mut state)! + return + } + if mapping.content != '' || mapping.tool_calls.len > 0 { + ensure_openai_mapped_stream_headers_written(mut state)! + write_chunk(mut state.conn, 'data: ${openai_stream_chunk_json(state, mapping)}\n\n')! + } + openai_merge_usage(mut state.usage, mapping.usage) + if mapping.done && !state.done { + ensure_openai_mapped_stream_headers_written(mut state)! + openai_write_stream_usage_chunk(mut state)! + write_chunk(mut state.conn, 'data: [DONE]\n\n')! + state.done = true + openai_finish_mapped_stream(mut state)! + } +} + +fn openai_mapped_progress_body_cb(request &http.Request, chunk []u8, _body_read_so_far u64, _body_expected_size u64, status_code int) ! { + mut state := &OpenAIMappedStreamProxyState(unsafe { nil }) + pstate := unsafe { &voidptr(&state) } + unsafe { + *pstate = request.user_ptr + } + if status_code > 0 { + state.status_code = status_code + } + decoded := openai_decode_progress_chunk(mut state.chunk_decoder, chunk) + if state.status_code >= 400 { + if decoded.len > 0 { + state.error_body += decoded + } + return + } + if state.method.to_upper() == 'HEAD' || decoded.len == 0 { + return + } + state.line_buffer += decoded + for state.line_buffer.contains('\n') { + line := state.line_buffer.all_before('\n') + state.line_buffer = state.line_buffer.all_after('\n') + openai_write_mapped_stream_line(mut state, line)! + } + if state.done { + return error(openai_stream_done_fetch_error) + } +} + +fn openai_reset_mapped_stream_state_for_plan(mut state OpenAIMappedStreamProxyState, plan OpenAIResolvedPlan) { + state.status_code = 200 + state.response_headers['x-vhttpd-openai-backend'] = plan.backend_name + state.line_buffer = '' + state.model = plan.model + state.mapper = plan.mapper + state.response_codec = plan.response_codec + state.output_protocol = plan.output_protocol + state.done = false + state.mapper_error = '' + state.error_body = '' + state.usage = map[string]int{} + state.final_written = false +} + +fn openai_fetch_mapped_stream(mut ctx Context, mut state OpenAIMappedStreamProxyState, plan OpenAIResolvedPlan, method string, req_id string) string { + _ := http.fetch( + url: openai_build_upstream_url(plan.backend.base_url, plan.path) + method: openai_http_method(plan.method, method) + header: openai_build_headers(mut ctx, plan.backend, req_id, true, + plan.headers) + data: plan.body + on_progress_body: openai_mapped_progress_body_cb + user_ptr: state + stop_copying_limit: 65536 + ) or { + if err.msg() == openai_stream_done_fetch_error { + return '' + } + return err.msg() + } + return '' +} + +fn openai_proxy_mapped_stream(mut app App, mut ctx Context, plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result { + if plan.response_codec != 'ndjson' || plan.output_protocol != 'openai.chat.completion' { + return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms, + 'openai_plugin_plan_unsupported_mapper', 'unsupported mapper ${plan.response_codec} -> ${plan.output_protocol}') + } + if plan.backend.kind.trim_space() !in ['', 'openai_http', 'http'] { + return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms, + 'unsupported_backend', 'unsupported OpenAI backend kind ${plan.backend.kind}') + } + if plan.backend.base_url.trim_space() == '' { + return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms, + 'missing_backend_base_url', 'OpenAI backend ${plan.backend_name} has no base_url') + } + ctx.takeover_conn_reusable() + ctx.conn.set_write_timeout(time.infinite) + ctx.conn.set_read_timeout(time.infinite) + mut client_conn := ctx.conn + mut headers := map[string]string{} + headers['x-request-id'] = req_id + headers['x-vhttpd-trace-id'] = trace_id + headers['x-vhttpd-openai-backend'] = plan.backend_name + mut state := &OpenAIMappedStreamProxyState{ + app: unsafe { &app } + conn: client_conn + method: method + status_code: 200 + response_headers: headers + model: plan.model + request_id: req_id + trace_id: trace_id + mapper: plan.mapper + response_codec: plan.response_codec + output_protocol: plan.output_protocol + created: int(time.now().unix()) + } + mut fetch_err_msg := openai_fetch_mapped_stream(mut ctx, mut state, plan, method, + req_id) + if fetch_err_msg != '' && !state.headers_written { + fallback := app.openai_plugin_fallback_plan(plan.model, plan.body, method, path, + plan, 502, 'upstream_fetch_failed', fetch_err_msg, req_id, trace_id) or { + OpenAIPluginPlanResult{} + } + if fallback.handled && fallback.plan.stream_mode == 'mapped' { + openai_reset_mapped_stream_state_for_plan(mut state, fallback.plan) + fallback_err_msg := openai_fetch_mapped_stream(mut ctx, mut state, fallback.plan, + method, req_id) + if fallback_err_msg == '' { + fetch_err_msg = '' + } else { + fetch_err_msg = fallback_err_msg + } + } + } + if fetch_err_msg != '' && !state.headers_written { + err_headers := { + 'x-request-id': req_id + 'x-vhttpd-trace-id': trace_id + 'x-vhttpd-error-class': 'openai_upstream_fetch_failed' + } + openai_write_error_response_conn(mut client_conn, 502, err_headers, 'upstream_fetch_failed', + fetch_err_msg, 'server_error') + client_conn.close() or {} + return veb.no_result() + } + if state.status_code >= 400 && !state.headers_written { + code, message, _ := openai_upstream_error_from_body(state.error_body, 'upstream_error', + 'upstream returned HTTP ${state.status_code}') + fallback := app.openai_plugin_fallback_plan(plan.model, plan.body, method, path, + plan, state.status_code, code, message, req_id, trace_id) or { + OpenAIPluginPlanResult{} + } + if fallback.handled && fallback.plan.stream_mode == 'mapped' { + openai_reset_mapped_stream_state_for_plan(mut state, fallback.plan) + fallback_err_msg := openai_fetch_mapped_stream(mut ctx, mut state, fallback.plan, + method, req_id) + if fallback_err_msg != '' && !state.headers_written { + err_headers := { + 'x-request-id': req_id + 'x-vhttpd-trace-id': trace_id + 'x-vhttpd-error-class': 'openai_upstream_fetch_failed' + } + openai_write_error_response_conn(mut client_conn, 502, err_headers, 'upstream_fetch_failed', + fallback_err_msg, 'server_error') + client_conn.close() or {} + return veb.no_result() + } + } + } + if state.status_code >= 400 && !state.headers_written { + code, message, typ := openai_upstream_error_from_body(state.error_body, 'upstream_error', + 'upstream returned HTTP ${state.status_code}') + err_headers := { + 'x-request-id': req_id + 'x-vhttpd-trace-id': trace_id + 'x-vhttpd-error-class': 'openai_upstream_error' + } + openai_write_error_response_conn(mut client_conn, state.status_code, err_headers, + code, message, typ) + client_conn.close() or {} + return veb.no_result() + } + if state.line_buffer.trim_space() != '' { + openai_write_mapped_stream_line(mut state, state.line_buffer) or {} + state.line_buffer = '' + } + if state.mapper_error != '' && !state.final_written { + ensure_openai_mapped_stream_headers_written(mut state) or {} + openai_write_sse_error(mut client_conn, 'mapper_error', state.mapper_error, 'server_error') + state.done = true + openai_finish_mapped_stream(mut state) or {} + } + if !state.done { + ensure_openai_mapped_stream_headers_written(mut state) or {} + openai_write_stream_usage_chunk(mut state) or {} + write_chunk(mut client_conn, 'data: [DONE]\n\n') or {} + state.done = true + } + if state.headers_written { + openai_finish_mapped_stream(mut state) or {} + } + app.emit('http.request', { + 'method': method.to_upper() + 'path': normalize_path(path) + 'status': '${state.status_code}' + 'request_id': req_id + 'trace_id': trace_id + 'duration_ms': '${time.now().unix_milli() - start_ms}' + 'provider': 'openai' + 'backend': plan.backend_name + 'mapper': '${plan.response_codec}->${plan.output_protocol}' + }) + return veb.no_result() +} + +fn openai_fetch_passthrough_stream(mut ctx Context, mut state OpenAIStreamProxyState, plan OpenAIResolvedPlan, method string, req_id string) string { + _ := http.fetch( + url: openai_build_upstream_url(plan.backend.base_url, plan.path) + method: openai_http_method(plan.method, method) + header: openai_build_headers(mut ctx, plan.backend, req_id, true, + plan.headers) + data: plan.body + on_progress_body: openai_progress_body_cb + user_ptr: state + stop_copying_limit: 65536 + ) or { return err.msg() } + return '' +} + +fn openai_proxy_stream(mut app App, mut ctx Context, plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result { + if plan.backend.kind.trim_space() !in ['', 'openai_http'] { + return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms, + 'unsupported_backend', 'unsupported OpenAI backend kind ${plan.backend.kind}') + } + if plan.backend.base_url.trim_space() == '' { + return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms, + 'missing_backend_base_url', 'OpenAI backend ${plan.backend_name} has no base_url') + } + ctx.takeover_conn_reusable() + ctx.conn.set_write_timeout(time.infinite) + ctx.conn.set_read_timeout(time.infinite) + mut client_conn := ctx.conn + mut headers := map[string]string{} + headers['x-request-id'] = req_id + headers['x-vhttpd-trace-id'] = trace_id + headers['x-vhttpd-openai-backend'] = plan.backend_name + mut state := &OpenAIStreamProxyState{ + conn: client_conn + method: method + status_code: 200 + content_type: 'text/event-stream' + response_headers: headers + } + fetch_method := openai_http_method(plan.method, method) + mut fetch_err_msg := '' + _ := http.fetch( + url: openai_build_upstream_url(plan.backend.base_url, plan.path) + method: fetch_method + header: openai_build_headers(mut ctx, plan.backend, req_id, true, + plan.headers) + data: plan.body + on_progress_body: openai_progress_body_cb + user_ptr: state + stop_copying_limit: 65536 + ) or { + if err.msg() == openai_stream_done_fetch_error { + fetch_err_msg = '' + } else { + fetch_err_msg = err.msg() + } + http.Response{} + } + if fetch_err_msg != '' && !state.headers_written { + fallback := app.openai_plugin_fallback_plan(plan.model, plan.body, method, path, + plan, 502, 'upstream_fetch_failed', fetch_err_msg, req_id, trace_id) or { + OpenAIPluginPlanResult{} + } + if fallback.handled && fallback.plan.stream_mode == 'passthrough' { + state.status_code = 200 + state.error_body = '' + state.chunk_decoder = OpenAIChunkDecodeState{} + state.done = false + state.done_probe = '' + state.final_written = false + state.response_headers['x-vhttpd-openai-backend'] = fallback.plan.backend_name + fallback_err_msg := openai_fetch_passthrough_stream(mut ctx, mut state, fallback.plan, + method, req_id) + if fallback_err_msg == '' { + fetch_err_msg = '' + } else { + fetch_err_msg = fallback_err_msg + } + } + } + if fetch_err_msg != '' && !state.headers_written { + err_headers := { + 'x-request-id': req_id + 'x-vhttpd-trace-id': trace_id + 'x-vhttpd-error-class': 'openai_upstream_fetch_failed' + } + openai_write_error_response_conn(mut client_conn, 502, err_headers, 'upstream_fetch_failed', + fetch_err_msg, 'server_error') + client_conn.close() or {} + return veb.no_result() + } + if state.status_code >= 400 && !state.headers_written { + code, message, typ := openai_upstream_error_from_body(state.error_body, 'upstream_error', + 'upstream returned HTTP ${state.status_code}') + fallback := app.openai_plugin_fallback_plan(plan.model, plan.body, method, path, + plan, state.status_code, code, message, req_id, trace_id) or { + OpenAIPluginPlanResult{} + } + if fallback.handled && fallback.plan.stream_mode == 'passthrough' { + state.status_code = 200 + state.error_body = '' + state.chunk_decoder = OpenAIChunkDecodeState{} + state.done = false + state.done_probe = '' + state.final_written = false + state.response_headers['x-vhttpd-openai-backend'] = fallback.plan.backend_name + fallback_err_msg := openai_fetch_passthrough_stream(mut ctx, mut state, fallback.plan, + method, req_id) + if fallback_err_msg == '' && state.status_code < 400 { + if !state.headers_written { + ensure_openai_stream_headers_written(mut state) or {} + } + if state.headers_written { + openai_finish_passthrough_stream(mut state) or {} + } + return veb.no_result() + } + } + if state.status_code >= 400 { + code2, message2, typ2 := openai_upstream_error_from_body(state.error_body, + 'upstream_error', 'upstream returned HTTP ${state.status_code}') + err_headers := { + 'x-request-id': req_id + 'x-vhttpd-trace-id': trace_id + 'x-vhttpd-error-class': 'openai_upstream_error' + } + openai_write_error_response_conn(mut client_conn, state.status_code, err_headers, + code2, message2, typ2) + client_conn.close() or {} + return veb.no_result() + } + err_headers := { + 'x-request-id': req_id + 'x-vhttpd-trace-id': trace_id + 'x-vhttpd-error-class': 'openai_upstream_error' + } + openai_write_error_response_conn(mut client_conn, state.status_code, err_headers, + code, message, typ) + client_conn.close() or {} + return veb.no_result() + } + if !state.headers_written { + ensure_openai_stream_headers_written(mut state) or {} + } + if state.headers_written { + openai_finish_passthrough_stream(mut state) or {} + } + app.emit('http.request', { + 'method': method.to_upper() + 'path': normalize_path(path) + 'status': '${state.status_code}' + 'request_id': req_id + 'trace_id': trace_id + 'duration_ms': '${time.now().unix_milli() - start_ms}' + 'provider': 'openai' + 'backend': plan.backend_name + }) + return veb.no_result() +} + +fn openai_proxy_once(mut app App, mut ctx Context, plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result { + return openai_proxy_once_attempt(mut app, mut ctx, plan, method, path, req_id, trace_id, + start_ms, true) +} + +fn openai_proxy_once_attempt(mut app App, mut ctx Context, plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string, start_ms i64, allow_fallback bool) veb.Result { + if plan.backend.kind.trim_space() == 'executor' { + return openai_proxy_executor_once(mut app, mut ctx, plan, method, path, req_id, + trace_id, start_ms) + } + if plan.backend.kind.trim_space() !in ['', 'openai_http', 'http'] { + return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms, + 'unsupported_backend', 'unsupported OpenAI backend kind ${plan.backend.kind}') + } + if plan.backend.base_url.trim_space() == '' { + return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms, + 'missing_backend_base_url', 'OpenAI backend ${plan.backend_name} has no base_url') + } + resp := http.fetch( + url: openai_build_upstream_url(plan.backend.base_url, plan.path) + method: openai_http_method(plan.method, method) + header: openai_build_headers(mut ctx, plan.backend, req_id, false, plan.headers) + data: plan.body + ) or { + if allow_fallback { + fallback := app.openai_plugin_fallback_plan(plan.model, plan.body, method, + path, plan, 502, 'upstream_fetch_failed', err.msg(), req_id, trace_id) or { + OpenAIPluginPlanResult{} + } + if fallback.handled { + return openai_proxy_once_attempt(mut app, mut ctx, fallback.plan, method, + path, req_id, trace_id, start_ms, false) + } + } + return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms, + 'upstream_fetch_failed', err.msg()) + } + if resp.status_code >= 400 { + code, message, typ := openai_upstream_error_from_body(resp.body, 'upstream_error', + 'upstream returned HTTP ${resp.status_code}') + if allow_fallback { + fallback := app.openai_plugin_fallback_plan(plan.model, plan.body, method, + path, plan, resp.status_code, code, message, req_id, trace_id) or { + OpenAIPluginPlanResult{} + } + if fallback.handled { + return openai_proxy_once_attempt(mut app, mut ctx, fallback.plan, method, + path, req_id, trace_id, start_ms, false) + } + } + return openai_error_typed(mut app, mut ctx, resp.status_code, path, method, req_id, + trace_id, start_ms, code, message, typ) + } + ctx.res.set_status(http.status_from_int(resp.status_code)) + ctx.set_content_type(if plan.stream_mode == 'mapped' { + 'application/json; charset=utf-8' + } else { + openai_response_content_type(resp.header, 'application/json; charset=utf-8') + }) + ctx.set_custom_header('x-request-id', req_id) or {} + ctx.set_custom_header('x-vhttpd-trace-id', trace_id) or {} + ctx.set_custom_header('x-vhttpd-openai-backend', plan.backend_name) or {} + app.emit('http.request', { + 'method': method.to_upper() + 'path': normalize_path(path) + 'status': '${resp.status_code}' + 'request_id': req_id + 'trace_id': trace_id + 'duration_ms': '${time.now().unix_milli() - start_ms}' + 'provider': 'openai' + 'backend': plan.backend_name + }) + if plan.stream_mode == 'mapped' { + mapped_body := openai_map_once_response(plan, resp.body, req_id, int(time.now().unix())) or { + return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, + start_ms, openai_plan_error_code(err.msg()), openai_plan_error_message(err.msg())) + } + return ctx.text(if method.to_upper() == 'HEAD' { '' } else { mapped_body }) + } + return ctx.text(if method.to_upper() == 'HEAD' { '' } else { resp.body }) +} + +fn openai_map_once_response(plan OpenAIResolvedPlan, body string, req_id string, created int) !string { + if plan.response_codec !in ['ndjson', 'json'] + || plan.output_protocol != 'openai.chat.completion' { + return openai_plan_error('openai_plugin_plan_unsupported_mapper', 'unsupported mapper ${plan.response_codec} -> ${plan.output_protocol}') + } + mut content := '' + mut tool_calls := []json2.Any{} + mut usage := map[string]int{} + if plan.response_codec == 'ndjson' { + for line in body.split_into_lines() { + mapping := openai_extract_mapped_row(line) + content += mapping.content + openai_merge_tool_calls(mut tool_calls, mapping.tool_calls) + openai_merge_usage(mut usage, mapping.usage) + } + } else { + mapping := openai_extract_mapped_row(body) + content = mapping.content + openai_merge_tool_calls(mut tool_calls, mapping.tool_calls) + openai_merge_usage(mut usage, mapping.usage) + } + mut message := map[string]json2.Any{} + message['role'] = json2.Any('assistant') + message['content'] = json2.Any(content) + if tool_calls.len > 0 { + message['tool_calls'] = json2.Any(tool_calls) + } + mut choice := map[string]json2.Any{} + choice['index'] = json2.Any(0) + choice['message'] = json2.Any(message) + choice['finish_reason'] = json2.Any(if tool_calls.len > 0 { 'tool_calls' } else { 'stop' }) + mut root := map[string]json2.Any{} + root['id'] = json2.Any('chatcmpl-${req_id}') + root['object'] = json2.Any('chat.completion') + root['created'] = json2.Any(created) + root['model'] = json2.Any(plan.model) + root['choices'] = json2.Any([json2.Any(choice)]) + if usage.len > 0 { + root['usage'] = json2.Any(openai_usage_json_obj(usage)) + } + return json2.Any(root).json_str() +} + +fn openai_executor_mapping_from_result(raw string) OpenAIFrameMapping { + parsed := json2.decode[json2.Any](raw) or { + return OpenAIFrameMapping{ + content: raw + handled: true + } + } + root := parsed.as_map() + if root.len == 0 { + return OpenAIFrameMapping{ + content: raw + handled: true + } + } + return openai_plugin_map_frame_result(raw) +} + +fn openai_completion_json_from_mapping(plan OpenAIResolvedPlan, mapping OpenAIFrameMapping, req_id string, created int) string { + mut message := map[string]json2.Any{} + message['role'] = json2.Any('assistant') + message['content'] = json2.Any(mapping.content) + if mapping.tool_calls.len > 0 { + message['tool_calls'] = json2.Any(mapping.tool_calls) + } + mut choice := map[string]json2.Any{} + choice['index'] = json2.Any(0) + choice['message'] = json2.Any(message) + choice['finish_reason'] = json2.Any(if mapping.finish_reason != '' { + mapping.finish_reason + } else if mapping.tool_calls.len > 0 { + 'tool_calls' + } else { + 'stop' + }) + mut root := map[string]json2.Any{} + root['id'] = json2.Any('chatcmpl-${req_id}') + root['object'] = json2.Any('chat.completion') + root['created'] = json2.Any(created) + root['model'] = json2.Any(plan.model) + root['choices'] = json2.Any([json2.Any(choice)]) + if mapping.usage.len > 0 { + root['usage'] = json2.Any(openai_usage_json_obj(mapping.usage)) + } + return json2.Any(root).json_str() +} + +fn openai_executor_once_body(plan OpenAIResolvedPlan, raw string, req_id string, created int) string { + parsed := json2.decode[json2.Any](raw) or { + return openai_completion_json_from_mapping(plan, OpenAIFrameMapping{ + content: raw + handled: true + }, req_id, created) + } + root := parsed.as_map() + if body_any := root['body'] { + body := body_any.str() + if body != '' { + return body + } + } + if _ := root['choices'] { + return raw + } + if _ := root['error'] { + return raw + } + return openai_completion_json_from_mapping(plan, openai_executor_mapping_from_result(raw), + req_id, created) +} + +fn openai_responses_executor_once_body(plan OpenAIResolvedPlan, raw string, req_id string, created int) string { + parsed := json2.decode[json2.Any](raw) or { return raw } + root := parsed.as_map() + if body_any := root['body'] { + body := body_any.str() + if body != '' { + return body + } + } + if (root['object'] or { json2.Any('') }).str() == 'response' { + return raw + } + if _ := root['output'] { + return raw + } + content := (root['content'] or { json2.Any('') }).str() + text := if content != '' { content } else { raw } + response_id := if req_id.trim_space() != '' { 'resp_${req_id}' } else { 'resp_vhttpd' } + mut response := { + 'id': json2.Any(response_id) + 'object': json2.Any('response') + 'created_at': json2.Any(created) + 'status': json2.Any('completed') + 'model': json2.Any(plan.model) + 'output': json2.Any([ + json2.Any({ + 'id': json2.Any('msg_${req_id}') + 'type': json2.Any('message') + 'status': json2.Any('completed') + 'role': json2.Any('assistant') + 'content': json2.Any([ + json2.Any({ + 'type': json2.Any('output_text') + 'text': json2.Any(text) + 'annotations': json2.Any([]json2.Any{}) + }), + ]) + }), + ]) + } + if usage_any := root['usage'] { + response['usage'] = usage_any + } + return json2.Any(response).json_str() +} + +fn openai_proxy_executor_once(mut app App, mut ctx Context, plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result { + resp := app.openai_call_executor(plan, method, path, req_id, trace_id) or { + return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms, + 'openai_executor_failed', err.msg()) + } + body := openai_executor_once_body(plan, resp.result, req_id, int(time.now().unix())) + ctx.res.set_status(.ok) + ctx.set_content_type('application/json; charset=utf-8') + ctx.set_custom_header('x-request-id', req_id) or {} + ctx.set_custom_header('x-vhttpd-trace-id', trace_id) or {} + ctx.set_custom_header('x-vhttpd-openai-backend', plan.backend_name) or {} + app.emit('http.request', { + 'method': method.to_upper() + 'path': normalize_path(path) + 'status': '200' + 'request_id': req_id + 'trace_id': trace_id + 'duration_ms': '${time.now().unix_milli() - start_ms}' + 'provider': 'openai' + 'backend': plan.backend_name + 'executor': plan.backend.executor + }) + return ctx.text(if method.to_upper() == 'HEAD' { '' } else { body }) +} + +fn openai_proxy_responses_executor_once(mut app App, mut ctx Context, plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result { + resp := app.openai_call_executor_op(plan, 'responses.execute', method, path, req_id, + trace_id) or { + return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms, + 'openai_executor_failed', err.msg()) + } + body := openai_responses_executor_once_body(plan, resp.result, req_id, int(time.now().unix())) + app.openai_store_response_record(plan, body, req_id, trace_id) + ctx.res.set_status(.ok) + ctx.set_content_type('application/json; charset=utf-8') + ctx.set_custom_header('x-request-id', req_id) or {} + ctx.set_custom_header('x-vhttpd-trace-id', trace_id) or {} + ctx.set_custom_header('x-vhttpd-openai-backend', plan.backend_name) or {} + app.emit('http.request', { + 'method': method.to_upper() + 'path': normalize_path(path) + 'status': '200' + 'request_id': req_id + 'trace_id': trace_id + 'duration_ms': '${time.now().unix_milli() - start_ms}' + 'provider': 'openai' + 'backend': plan.backend_name + 'executor': plan.backend.executor + 'endpoint': 'responses' + }) + return ctx.text(if method.to_upper() == 'HEAD' { '' } else { body }) +} + +fn openai_executor_stream_mappings(raw string) []OpenAIFrameMapping { + parsed := json2.decode[json2.Any](raw) or { + return [ + OpenAIFrameMapping{ + content: raw + done: true + handled: true + }, + ] + } + root := parsed.as_map() + if frames_any := root['frames'] { + mut mappings := []OpenAIFrameMapping{} + for frame in frames_any.as_array() { + mappings << openai_plugin_map_frame_result(frame.json_str()) + } + return mappings + } + return [openai_executor_mapping_from_result(raw)] +} + +fn openai_response_stream_event_from_raw(raw string) string { + parsed := json2.decode[json2.Any](raw) or { return 'data: ${raw}\n\n' } + root := parsed.as_map() + event_type := (root['event'] or { root['type'] or { json2.Any('') } }).str() + if data_any := root['data'] { + data := if data_any.str() != '' { data_any.str() } else { data_any.json_str() } + if event_type != '' { + return 'event: ${event_type}\ndata: ${data}\n\n' + } + return 'data: ${data}\n\n' + } + if event_type != '' { + return 'event: ${event_type}\ndata: ${raw}\n\n' + } + return 'data: ${raw}\n\n' +} + +fn openai_response_body_from_completed_event(raw string) string { + parsed := json2.decode[json2.Any](raw) or { return '' } + root := parsed.as_map() + event_type := (root['type'] or { root['event'] or { json2.Any('') } }).str() + if event_type != 'response.completed' { + return '' + } + response_any := root['response'] or { return '' } + mut response := response_any.as_map() + if (response['object'] or { json2.Any('') }).str() == '' { + response['object'] = json2.Any('response') + } + return json2.Any(response).json_str() +} + +fn openai_proxy_responses_executor_stream(mut app App, mut ctx Context, plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result { + ctx.takeover_conn_reusable() + ctx.conn.set_write_timeout(time.infinite) + ctx.conn.set_read_timeout(time.infinite) + mut client_conn := ctx.conn + mut headers := { + 'x-request-id': req_id + 'x-vhttpd-trace-id': trace_id + 'x-vhttpd-openai-backend': plan.backend_name + 'x-vhttpd-openai-executor': plan.backend.executor + 'x-accel-buffering': 'no' + } + write_http_stream_headers_conn(mut client_conn, 200, 'text/event-stream', headers, + true) or {} + mut registry_state := &OpenAIResponsesStreamRegistryState{} + stream_resp := app.openai_call_executor_stream_op(plan, 'responses.execute', method, + path, req_id, trace_id, fn [mut client_conn, mut registry_state] (raw string) !bool { + if registry_state.completed_body == '' { + registry_state.completed_body = openai_response_body_from_completed_event(raw) + } + write_chunk(mut client_conn, openai_response_stream_event_from_raw(raw))! + return true + }) or { + openai_write_sse_error(mut client_conn, 'openai_executor_failed', err.msg(), 'server_error') + write_final_chunk(mut client_conn) or {} + client_conn.close() or {} + app.emit('http.request', { + 'method': method.to_upper() + 'path': normalize_path(path) + 'status': '502' + 'request_id': req_id + 'trace_id': trace_id + 'duration_ms': '${time.now().unix_milli() - start_ms}' + 'provider': 'openai' + 'backend': plan.backend_name + 'executor': plan.backend.executor + 'endpoint': 'responses' + }) + return veb.no_result() + } + if !stream_resp.streamed { + mut wrote_frame := false + for mapping in openai_executor_stream_mappings(stream_resp.response.result) { + if mapping.error != '' { + openai_write_sse_error(mut client_conn, 'openai_executor_error', mapping.error, + 'server_error') + wrote_frame = true + break + } + event := { + 'type': json2.Any('response.output_text.delta') + 'delta': json2.Any(mapping.content) + 'sequence_number': json2.Any(1) + } + if mapping.content != '' { + write_chunk(mut client_conn, openai_response_stream_event_from_raw(json2.Any(event).json_str())) or {} + wrote_frame = true + } + if mapping.done { + registry_state.completed_body = '{"id":"resp_${req_id}","object":"response","status":"completed","model":"${plan.model}"}' + write_chunk(mut client_conn, openai_response_stream_event_from_raw('{"type":"response.completed","sequence_number":2,"response":${registry_state.completed_body}}')) or {} + wrote_frame = true + } + } + if !wrote_frame { + registry_state.completed_body = '{"id":"resp_${req_id}","object":"response","status":"completed","model":"${plan.model}"}' + write_chunk(mut client_conn, openai_response_stream_event_from_raw('{"type":"response.completed","sequence_number":1,"response":${registry_state.completed_body}}')) or {} + } + } + if registry_state.completed_body != '' { + app.openai_store_response_record(plan, registry_state.completed_body, req_id, + trace_id) + } + write_final_chunk(mut client_conn) or {} + app.emit('http.request', { + 'method': method.to_upper() + 'path': normalize_path(path) + 'status': '200' + 'request_id': req_id + 'trace_id': trace_id + 'duration_ms': '${time.now().unix_milli() - start_ms}' + 'provider': 'openai' + 'backend': plan.backend_name + 'executor': plan.backend.executor + 'endpoint': 'responses' + }) + return veb.no_result() +} + +fn openai_proxy_executor_stream(mut app App, mut ctx Context, plan OpenAIResolvedPlan, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result { + ctx.takeover_conn_reusable() + ctx.conn.set_write_timeout(time.infinite) + ctx.conn.set_read_timeout(time.infinite) + mut client_conn := ctx.conn + mut state := &OpenAIMappedStreamProxyState{ + conn: client_conn + method: method + status_code: 200 + response_headers: { + 'x-request-id': req_id + 'x-vhttpd-trace-id': trace_id + 'x-vhttpd-openai-backend': plan.backend_name + 'x-vhttpd-openai-executor': plan.backend.executor + } + model: plan.model + request_id: req_id + trace_id: trace_id + mapper: 'executor' + response_codec: plan.response_codec + output_protocol: plan.output_protocol + created: int(time.now().unix()) + } + stream_resp := app.openai_call_executor_stream(plan, method, path, req_id, trace_id, + fn [mut state, mut client_conn] (raw string) !bool { + mapping := openai_plugin_map_frame_result(raw) + if mapping.error != '' { + ensure_openai_mapped_stream_headers_written(mut state)! + openai_write_sse_error(mut client_conn, 'openai_executor_error', mapping.error, + 'server_error') + state.done = true + return false + } + if mapping.content != '' || mapping.tool_calls.len > 0 { + ensure_openai_mapped_stream_headers_written(mut state)! + write_chunk(mut client_conn, 'data: ${openai_stream_chunk_json(state, mapping)}\n\n')! + } + openai_merge_usage(mut state.usage, mapping.usage) + if mapping.done && !state.done { + ensure_openai_mapped_stream_headers_written(mut state)! + openai_write_stream_usage_chunk(mut state)! + write_chunk(mut client_conn, 'data: [DONE]\n\n')! + state.done = true + openai_finish_mapped_stream(mut state)! + return false + } + return true + }) or { + if !state.headers_written { + state.status_code = 502 + ensure_openai_mapped_stream_headers_written(mut state) or {} + openai_write_sse_error(mut client_conn, 'openai_executor_failed', err.msg(), + 'server_error') + state.done = true + } + if state.headers_written { + openai_finish_mapped_stream(mut state) or {} + } + client_conn.close() or {} + app.emit('http.request', { + 'method': method.to_upper() + 'path': normalize_path(path) + 'status': '502' + 'request_id': req_id + 'trace_id': trace_id + 'duration_ms': '${time.now().unix_milli() - start_ms}' + 'provider': 'openai' + 'backend': plan.backend_name + 'executor': plan.backend.executor + }) + return veb.no_result() + } + if !stream_resp.streamed { + for mapping in openai_executor_stream_mappings(stream_resp.response.result) { + if mapping.error != '' { + ensure_openai_mapped_stream_headers_written(mut state) or {} + openai_write_sse_error(mut client_conn, 'openai_executor_error', mapping.error, + 'server_error') + state.done = true + break + } + if mapping.content != '' || mapping.tool_calls.len > 0 { + ensure_openai_mapped_stream_headers_written(mut state) or {} + write_chunk(mut client_conn, 'data: ${openai_stream_chunk_json(state, + mapping)}\n\n') or {} + } + openai_merge_usage(mut state.usage, mapping.usage) + if mapping.done && !state.done { + ensure_openai_mapped_stream_headers_written(mut state) or {} + openai_write_stream_usage_chunk(mut state) or {} + write_chunk(mut client_conn, 'data: [DONE]\n\n') or {} + state.done = true + openai_finish_mapped_stream(mut state) or {} + } + } + } + if !state.done { + ensure_openai_mapped_stream_headers_written(mut state) or {} + openai_write_stream_usage_chunk(mut state) or {} + write_chunk(mut client_conn, 'data: [DONE]\n\n') or {} + } + if state.headers_written { + openai_finish_mapped_stream(mut state) or {} + } + app.emit('http.request', { + 'method': method.to_upper() + 'path': normalize_path(path) + 'status': '200' + 'request_id': req_id + 'trace_id': trace_id + 'duration_ms': '${time.now().unix_milli() - start_ms}' + 'provider': 'openai' + 'backend': plan.backend_name + 'executor': plan.backend.executor + }) + return veb.no_result() +} + +fn (mut app App) openai_handle_models(mut ctx Context, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result { + if method.to_upper() !in ['GET', 'HEAD'] { + return openai_error(mut app, mut ctx, 405, path, method, req_id, trace_id, start_ms, + 'method_not_allowed', 'method ${method} is not allowed for ${path}') + } + models := if app.openai_plugin.trim_space() != '' { + result := app.openai_plugin_models(method, path, req_id, trace_id) or { + return openai_error(mut app, mut ctx, 500, path, method, req_id, trace_id, + start_ms, 'plugin_error', err.msg()) + } + if result.handled { + result.models + } else { + app.openai_models() + } + } else { + app.openai_models() + } + mut data := []OpenAIModelObject{} + for model in models { + data << OpenAIModelObject{ + id: model + created: int(app.started_at_unix) + } + } + body := json.encode(OpenAIModelsResponse{ + data: data + }) + ctx.res.set_status(.ok) + ctx.set_content_type('application/json; charset=utf-8') + ctx.set_custom_header('x-request-id', req_id) or {} + ctx.set_custom_header('x-vhttpd-trace-id', trace_id) or {} + app.emit('http.request', { + 'method': method.to_upper() + 'path': normalize_path(path) + 'status': '200' + 'request_id': req_id + 'trace_id': trace_id + 'duration_ms': '${time.now().unix_milli() - start_ms}' + 'provider': 'openai' + }) + return ctx.text(if method.to_upper() == 'HEAD' { '' } else { body }) +} + +fn (mut app App) openai_handle_chat(mut ctx Context, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result { + if method.to_upper() !in ['POST', 'HEAD'] { + return openai_error(mut app, mut ctx, 405, path, method, req_id, trace_id, start_ms, + 'method_not_allowed', 'method ${method} is not allowed for ${path}') + } + model := openai_request_model(ctx.req.data) + plan := app.openai_resolve_plan(model, ctx.req.data, method, path, req_id, trace_id) or { + err_msg := err.msg() + status := if err_msg.starts_with('openai_plugin_') { 502 } else { 400 } + return openai_error(mut app, mut ctx, status, path, method, req_id, trace_id, + start_ms, openai_plan_error_code(err_msg), openai_plan_error_message(err_msg)) + } + if openai_is_stream_request(ctx.req.data) { + if plan.backend.kind.trim_space() == 'executor' { + return openai_proxy_executor_stream(mut app, mut ctx, plan, method, path, + req_id, trace_id, start_ms) + } + if plan.stream_mode == 'mapped' { + return openai_proxy_mapped_stream(mut app, mut ctx, plan, method, path, req_id, + trace_id, start_ms) + } + return openai_proxy_stream(mut app, mut ctx, plan, method, path, req_id, trace_id, + start_ms) + } + return openai_proxy_once(mut app, mut ctx, plan, method, path, req_id, trace_id, start_ms) +} + +fn (mut app App) openai_handle_responses(mut ctx Context, method string, path string, req_id string, trace_id string, start_ms i64) veb.Result { + if method.to_upper() !in ['POST', 'HEAD'] { + return openai_error(mut app, mut ctx, 405, path, method, req_id, trace_id, start_ms, + 'method_not_allowed', 'method ${method} is not allowed for ${path}') + } + model := openai_request_model(ctx.req.data) + plan := app.openai_resolve_responses_plan(model, ctx.req.data, method, path, req_id, + trace_id) or { + err_msg := err.msg() + status := if err_msg.starts_with('openai_plugin_') { 502 } else { 400 } + return openai_error(mut app, mut ctx, status, path, method, req_id, trace_id, + start_ms, openai_plan_error_code(err_msg), openai_plan_error_message(err_msg)) + } + if openai_is_stream_request(ctx.req.data) { + if plan.backend.kind.trim_space() == 'executor' { + return openai_proxy_responses_executor_stream(mut app, mut ctx, plan, method, + path, req_id, trace_id, start_ms) + } + return openai_proxy_stream(mut app, mut ctx, plan, method, path, req_id, trace_id, + start_ms) + } + if plan.backend.kind.trim_space() == 'executor' { + return openai_proxy_responses_executor_once(mut app, mut ctx, plan, method, path, + req_id, trace_id, start_ms) + } + return openai_proxy_once(mut app, mut ctx, plan, method, path, req_id, trace_id, start_ms) +} + +fn (mut app App) openai_handle_responses_passthrough(mut ctx Context, method string, path string, relative_target string, req_id string, trace_id string, start_ms i64) veb.Result { + if method.to_upper() !in ['GET', 'POST', 'DELETE', 'HEAD'] { + return openai_error(mut app, mut ctx, 405, path, method, req_id, trace_id, start_ms, + 'method_not_allowed', 'method ${method} is not allowed for ${path}') + } + response_id := openai_response_id_from_relative(relative_target) + relative_path := normalize_path(relative_target.all_before('?')) + if method.to_upper() in ['GET', 'HEAD'] && response_id != '' + && !relative_path.contains('/input_items') { + if record := app.openai_responses.get(response_id) { + ctx.res.set_status(.ok) + ctx.set_content_type('application/json; charset=utf-8') + ctx.set_custom_header('x-request-id', req_id) or {} + ctx.set_custom_header('x-vhttpd-trace-id', trace_id) or {} + ctx.set_custom_header('x-vhttpd-openai-backend', record.backend_name) or {} + app.emit('http.request', { + 'method': method.to_upper() + 'path': normalize_path(path) + 'status': '200' + 'request_id': req_id + 'trace_id': trace_id + 'duration_ms': '${time.now().unix_milli() - start_ms}' + 'provider': 'openai' + 'backend': record.backend_name + 'executor': record.executor + 'endpoint': 'responses.registry' + }) + return ctx.text(if method.to_upper() == 'HEAD' { '' } else { record.body }) + } + } + plan := app.openai_resolve_responses_passthrough_plan(relative_target, ctx.req.data, + method) or { + err_msg := err.msg() + status := if err_msg.starts_with('openai_plugin_') { 502 } else { 400 } + return openai_error(mut app, mut ctx, status, path, method, req_id, trace_id, + start_ms, openai_plan_error_code(err_msg), openai_plan_error_message(err_msg)) + } + if plan.backend.kind.trim_space() == 'executor' { + return openai_error(mut app, mut ctx, 502, path, method, req_id, trace_id, start_ms, + 'unsupported_backend', 'Responses passthrough endpoint ${relative_target} requires an HTTP backend') + } + if openai_is_stream_request(ctx.req.data) || openai_is_stream_target(path) { + return openai_proxy_stream(mut app, mut ctx, plan, method, path, req_id, trace_id, + start_ms) + } + return openai_proxy_once_attempt(mut app, mut ctx, plan, method, path, req_id, trace_id, + start_ms, false) +} + +fn (mut app App) openai_try_handle(mut ctx Context, method string, target string, req_id string, trace_id string, start_ms i64) ?veb.Result { + if !app.openai_enabled { + return none + } + relative := openai_relative_path(target, app.openai_base_path) or { return none } + relative_target := openai_relative_target(target, app.openai_base_path) or { return none } + if relative == '/models' { + if !app.openai_endpoints.models { + return openai_error(mut app, mut ctx, 404, target, method, req_id, trace_id, + start_ms, 'endpoint_disabled', 'OpenAI models endpoint is disabled') + } + return app.openai_handle_models(mut ctx, method, target, req_id, trace_id, start_ms) + } + if relative == '/chat/completions' { + if !app.openai_endpoints.chat_completions { + return openai_error(mut app, mut ctx, 404, target, method, req_id, trace_id, + start_ms, 'endpoint_disabled', 'OpenAI chat completions endpoint is disabled') + } + return app.openai_handle_chat(mut ctx, method, target, req_id, trace_id, start_ms) + } + if relative == '/responses' { + if !app.openai_endpoints.responses { + return openai_error(mut app, mut ctx, 404, target, method, req_id, trace_id, + start_ms, 'endpoint_disabled', 'OpenAI responses endpoint is disabled') + } + return app.openai_handle_responses(mut ctx, method, target, req_id, trace_id, + start_ms) + } + if relative.starts_with('/responses/') { + if !app.openai_endpoints.responses { + return openai_error(mut app, mut ctx, 404, target, method, req_id, trace_id, + start_ms, 'endpoint_disabled', 'OpenAI responses endpoint is disabled') + } + return app.openai_handle_responses_passthrough(mut ctx, method, target, relative_target, + req_id, trace_id, start_ms) + } + if relative == '/embeddings' { + return openai_error(mut app, mut ctx, 501, target, method, req_id, trace_id, start_ms, + 'endpoint_not_implemented', 'OpenAI endpoint ${relative} is not implemented yet') + } + return openai_error(mut app, mut ctx, 404, target, method, req_id, trace_id, start_ms, + 'endpoint_not_found', 'OpenAI endpoint ${relative} was not found') +} diff --git a/src/openai_runtime_test.v b/src/openai_runtime_test.v new file mode 100644 index 0000000..0702907 --- /dev/null +++ b/src/openai_runtime_test.v @@ -0,0 +1,388 @@ +module main + +import os +import x.json2 + +fn test_openai_relative_path_matches_configured_base_path() { + assert openai_relative_path('/v1/models', '/v1') or { '' } == '/models' + assert openai_relative_path('api/openai/chat/completions?trace=1', '/api/openai') or { '' } == '/chat/completions' + if _ := openai_relative_path('/api/other/models', '/api/openai') { + assert false + } else { + assert true + } +} + +fn test_openai_route_resolution_maps_public_model_to_upstream_model() { + mut app := App{ + openai_enabled: true + openai_base_path: '/v1' + openai_default_backend: 'default' + openai_backends: { + 'default': OpenAIBackendConfig{ + base_url: 'https://upstream.test/v1' + } + } + openai_routes: { + 'gpt-4o-mini': OpenAIRouteConfig{ + models: ['gpt-4o-mini', 'mini'] + backend: 'default' + upstream_model: 'upstream-mini' + } + } + } + route := app.openai_resolve_route('mini') or { panic(err) } + assert route.backend_name == 'default' + assert route.upstream_model == 'upstream-mini' + assert app.openai_models() == ['gpt-4o-mini', 'mini'] +} + +fn test_openai_responses_builtin_plan_uses_responses_path() { + mut app := App{ + openai_enabled: true + openai_base_path: '/v1' + openai_default_backend: 'default' + openai_backends: { + 'default': OpenAIBackendConfig{ + base_url: 'https://upstream.test/v1' + } + } + openai_routes: { + 'public': OpenAIRouteConfig{ + models: ['public-model'] + backend: 'default' + upstream_model: 'upstream-model' + } + } + } + plan := app.openai_resolve_responses_plan('public-model', '{"model":"public-model","input":"hi"}', + 'POST', '/v1/responses', 'req_resp', 'trace_resp') or { panic(err) } + assert plan.path == '/responses' + assert plan.output_protocol == 'openai.response' + assert plan.body.contains('"model":"upstream-model"') +} + +fn test_openai_replace_model_in_body_keeps_other_fields() { + body := openai_replace_model_in_body('{"model":"public","messages":[{"role":"user","content":"hi"}],"stream":true}', + 'upstream') + root := json2.decode[json2.Any](body) or { panic(err) }.as_map() + assert (root['model'] or { json2.Any('') }).str() == 'upstream' + assert (root['stream'] or { json2.Any(false) }).bool() + assert (root['messages'] or { json2.Any([]json2.Any{}) }).as_array().len == 1 +} + +fn test_openai_config_parses_backends_and_routes() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_config_test') + os.mkdir_all(temp_dir) or { panic(err) } + config_file := os.join_path(temp_dir, 'vhttpd.toml') + os.write_file(config_file, ' +[openai] +enabled = true +base_path = "/openai/v1" +default_backend = "default" +plugin = "planner" + +[plugins.planner] +kind = "vjsx" +entry = "plugins/openai-planner.mts" +runtime_profile = "node" + +[openai.backends.default] +kind = "openai_http" +base_url = "https://api.openai.test/v1" +api_key_env = "TEST_OPENAI_KEY" + +[openai.backends.exec] +kind = "executor" +executor = "custom_executor" + +[openai.routes.gpt_demo] +models = ["gpt-demo", "demo"] +backend = "default" +upstream_model = "gpt-4o-mini" +') or { + panic(err) + } + defer { + os.rm(config_file) or {} + os.rmdir_all(temp_dir) or {} + } + cfg := load_vhttpd_config(['--config', config_file]) or { panic(err) } + assert cfg.openai.enabled + assert cfg.openai.base_path == '/openai/v1' + assert cfg.openai.plugin == 'planner' + assert cfg.openai.endpoints.responses + assert cfg.plugins['planner'].runtime_profile == 'node' + assert cfg.openai.backends['default'].base_url == 'https://api.openai.test/v1' + assert cfg.openai.backends['exec'].kind == 'executor' + assert cfg.openai.backends['exec'].executor == 'custom_executor' + assert cfg.openai.routes['gpt_demo'].models == ['gpt-demo', 'demo'] + assert cfg.openai.routes['gpt_demo'].upstream_model == 'gpt-4o-mini' +} + +fn test_openai_vjsx_plugin_can_return_upstream_plan() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_plugin_plan_test') + plugin_dir := os.join_path(temp_dir, 'plugins') + os.mkdir_all(plugin_dir) or { panic(err) } + plugin_file := os.join_path(plugin_dir, 'openai-planner.mts') + os.write_file(plugin_file, " +export function openai(req) { + if (req.op !== 'chat.route') { + return { not_handled: true }; + } + const payload = JSON.parse(req.payload); + const body = JSON.parse(payload.body); + body.model = 'plugin-upstream-model'; + return { + backend: 'mock', + method: 'POST', + path: '/chat/completions', + headers: { 'x-plugin-plan': 'yes' }, + body: JSON.stringify(body), + stream_mode: 'passthrough', + }; +} +") or { + panic(err) + } + defer { + os.rmdir_all(temp_dir) or {} + } + plugins := { + 'planner': PluginConfig{ + kind: 'vjsx' + app_entry: plugin_file + runtime_profile: 'node' + thread_count: 1 + } + } + mut app := App{ + started_at_unix: 123 + openai_enabled: true + openai_base_path: '/v1' + openai_plugin: 'planner' + openai_default_backend: 'mock' + openai_backends: { + 'mock': OpenAIBackendConfig{ + base_url: 'https://mock.openai.test/v1' + } + } + plugin_configs: plugins + plugin_vjsx: build_vjsx_plugin_runtimes(plugins) + } + defer { + app.close_all_plugins() + } + plan := app.openai_resolve_plan('public-model', '{"model":"public-model","messages":[]}', + 'POST', '/v1/chat/completions', 'req_plugin', 'trace_plugin') or { panic(err) } + assert plan.backend_name == 'mock' + assert plan.path == '/chat/completions' + assert plan.headers['x-plugin-plan'] == 'yes' + root := json2.decode[json2.Any](plan.body) or { panic(err) }.as_map() + assert (root['model'] or { json2.Any('') }).str() == 'plugin-upstream-model' +} + +fn test_openai_vjsx_plugin_models_uses_same_openai_entry() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_plugin_models_test') + os.mkdir_all(temp_dir) or { panic(err) } + plugin_file := os.join_path(temp_dir, 'openai-planner.mts') + os.write_file(plugin_file, " +export function openai(req) { + if (req.op === 'models') { + return { models: ['plugin-b', 'plugin-a', 'plugin-a'] }; + } + return { not_handled: true }; +} +") or { + panic(err) + } + defer { + os.rmdir_all(temp_dir) or {} + } + plugins := { + 'planner': PluginConfig{ + kind: 'vjsx' + app_entry: plugin_file + runtime_profile: 'node' + thread_count: 1 + } + } + mut app := App{ + openai_enabled: true + openai_base_path: '/v1' + openai_plugin: 'planner' + plugin_configs: plugins + plugin_vjsx: build_vjsx_plugin_runtimes(plugins) + } + defer { + app.close_all_plugins() + } + result := app.openai_plugin_models('GET', '/v1/models', 'req_models', 'trace_models') or { + panic(err) + } + assert result.handled + assert result.models == ['plugin-a', 'plugin-b'] +} + +fn test_openai_vjsx_plugin_not_handled_falls_back_to_builtin_route() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_plugin_not_handled_test') + os.mkdir_all(temp_dir) or { panic(err) } + plugin_file := os.join_path(temp_dir, 'openai-planner.mts') + os.write_file(plugin_file, ' +export function openai(_req) { + return { not_handled: true }; +} +') or { + panic(err) + } + defer { + os.rmdir_all(temp_dir) or {} + } + plugins := { + 'planner': PluginConfig{ + kind: 'vjsx' + app_entry: plugin_file + runtime_profile: 'node' + thread_count: 1 + } + } + mut app := App{ + openai_enabled: true + openai_base_path: '/v1' + openai_plugin: 'planner' + openai_default_backend: 'mock' + openai_backends: { + 'mock': OpenAIBackendConfig{ + base_url: 'https://mock.openai.test/v1' + } + } + openai_routes: { + 'public': OpenAIRouteConfig{ + models: ['public-model'] + backend: 'mock' + upstream_model: 'builtin-upstream-model' + } + } + plugin_configs: plugins + plugin_vjsx: build_vjsx_plugin_runtimes(plugins) + } + defer { + app.close_all_plugins() + } + plan := app.openai_resolve_plan('public-model', '{"model":"public-model","messages":[]}', + 'POST', '/v1/chat/completions', 'req_fallback', 'trace_fallback') or { panic(err) } + assert plan.backend_name == 'mock' + root := json2.decode[json2.Any](plan.body) or { panic(err) }.as_map() + assert (root['model'] or { json2.Any('') }).str() == 'builtin-upstream-model' +} + +fn test_openai_plugin_plan_validation_rejects_missing_backend() { + raw := '{"method":"POST","path":"/chat/completions","body":"{}"}' + plan := openai_upstream_plan_from_plugin_json(raw) or { panic(err) } + mut app := App{} + _ := app + if plan.backend.trim_space() == '' { + err := openai_plan_error('openai_plugin_plan_missing_backend', 'plugin plan must include backend') + assert openai_plan_error_code(err.msg()) == 'openai_plugin_plan_missing_backend' + assert openai_plan_error_message(err.msg()) == 'plugin plan must include backend' + return + } + assert false +} + +fn test_openai_plugin_plan_validation_rejects_invalid_method_and_path() { + openai_validate_plan_method('TRACE') or { + assert openai_plan_error_code(err.msg()) == 'openai_plugin_plan_invalid_method' + assert openai_plan_error_message(err.msg()).contains('TRACE') + } + openai_validate_plan_path('chat/completions') or { + assert openai_plan_error_code(err.msg()) == 'openai_plugin_plan_invalid_path' + assert openai_plan_error_message(err.msg()).contains('start with /') + } + assert openai_validate_stream_mode('mapped') or { panic(err) } == 'mapped' + openai_validate_response_codec('xml', 'mapped') or { + assert openai_plan_error_code(err.msg()) == 'openai_plugin_plan_unsupported_response_codec' + assert openai_plan_error_message(err.msg()).contains('xml') + } + openai_validate_output_protocol('custom.protocol', 'mapped') or { + assert openai_plan_error_code(err.msg()) == 'openai_plugin_plan_unsupported_output_protocol' + assert openai_plan_error_message(err.msg()).contains('custom.protocol') + } + assert openai_validate_mapper('plugin') or { panic(err) } == 'plugin' + openai_validate_mapper('remote') or { + assert openai_plan_error_code(err.msg()) == 'openai_plugin_plan_unsupported_mapper' + assert openai_plan_error_message(err.msg()).contains('remote') + } +} + +fn test_openai_plugin_plan_sanitizes_hop_by_hop_headers() { + headers := openai_sanitize_plan_headers({ + 'x-ok': 'yes' + 'connection': 'close' + 'transfer-encoding': 'chunked' + 'host': 'bad' + 'x-bad': 'line\r\nbreak' + }) + assert headers['x-ok'] == 'yes' + assert 'connection' !in headers + assert 'transfer-encoding' !in headers + assert 'host' !in headers + assert 'x-bad' !in headers +} + +fn test_openai_mapped_once_ndjson_aggregates_chat_completion() { + body := '{"message":{"content":"你"},"done":false}\n' + + '{"message":{"content":"好"},"done":false}\n' + '{"done":true}\n' + mapped := openai_map_once_response(OpenAIResolvedPlan{ + model: 'public-model' + stream_mode: 'mapped' + response_codec: 'ndjson' + output_protocol: 'openai.chat.completion' + }, body, 'req_once', 123) or { panic(err) } + root := json2.decode[json2.Any](mapped) or { panic(err) }.as_map() + assert (root['object'] or { json2.Any('') }).str() == 'chat.completion' + choices := (root['choices'] or { json2.Any([]json2.Any{}) }).as_array() + assert choices.len == 1 + message := (choices[0].as_map()['message'] or { json2.Any(map[string]json2.Any{}) }).as_map() + assert (message['content'] or { json2.Any('') }).str() == '你好' +} + +fn test_openai_mapped_once_ndjson_aggregates_tool_calls() { + body := + '{"message":{"tool_calls":[{"index":0,"id":"call_search","type":"function","function":{"name":"search","arguments":"{\\"q\\":\\"vh"}}]},"done":false}\n' + + '{"message":{"tool_calls":[{"index":0,"function":{"arguments":"ttpd\\"}"}}]},"done":false}\n' + + '{"done":true}\n' + mapped := openai_map_once_response(OpenAIResolvedPlan{ + model: 'public-model' + stream_mode: 'mapped' + response_codec: 'ndjson' + output_protocol: 'openai.chat.completion' + }, body, 'req_tools', 123) or { panic(err) } + root := json2.decode[json2.Any](mapped) or { panic(err) }.as_map() + choices := (root['choices'] or { json2.Any([]json2.Any{}) }).as_array() + message := (choices[0].as_map()['message'] or { json2.Any(map[string]json2.Any{}) }).as_map() + tool_calls := (message['tool_calls'] or { json2.Any([]json2.Any{}) }).as_array() + assert tool_calls.len == 1 + call := tool_calls[0].as_map() + assert (call['id'] or { json2.Any('') }).str() == 'call_search' + fn_obj := (call['function'] or { json2.Any(map[string]json2.Any{}) }).as_map() + assert (fn_obj['name'] or { json2.Any('') }).str() == 'search' + assert (fn_obj['arguments'] or { json2.Any('') }).str() == '{"q":"vhttpd"}' + assert (choices[0].as_map()['finish_reason'] or { json2.Any('') }).str() == 'tool_calls' +} + +fn test_openai_mapped_once_ndjson_normalizes_usage() { + body := '{"message":{"content":"hi"},"done":false}\n' + + '{"done":true,"prompt_eval_count":7,"eval_count":11}\n' + mapped := openai_map_once_response(OpenAIResolvedPlan{ + model: 'public-model' + stream_mode: 'mapped' + response_codec: 'ndjson' + output_protocol: 'openai.chat.completion' + }, body, 'req_usage', 123) or { panic(err) } + root := json2.decode[json2.Any](mapped) or { panic(err) }.as_map() + usage := (root['usage'] or { json2.Any(map[string]json2.Any{}) }).as_map() + assert (usage['prompt_tokens'] or { json2.Any(0) }).int() == 7 + assert (usage['completion_tokens'] or { json2.Any(0) }).int() == 11 + assert (usage['total_tokens'] or { json2.Any(0) }).int() == 18 +} diff --git a/src/plugin_runtime.v b/src/plugin_runtime.v new file mode 100644 index 0000000..017d1ab --- /dev/null +++ b/src/plugin_runtime.v @@ -0,0 +1,118 @@ +module main + +import log + +pub struct PluginCallRequest { +pub: + plugin string + capability string + op string + request_id string @[json: 'request_id'] + trace_id string @[json: 'trace_id'] + payload string + metadata map[string]string +} + +pub struct PluginCallResponse { +pub: + ok bool + result string + error string +} + +pub type PluginStreamFrameFn = fn (string) !bool + +pub struct PluginStreamCallResponse { +pub: + streamed bool + response PluginCallResponse +} + +fn plugin_config_app_entry(cfg PluginConfig) string { + if cfg.app_entry.trim_space() != '' { + return cfg.app_entry.trim_space() + } + return cfg.entry.trim_space() +} + +fn vjsx_plugin_runtime_config(name string, cfg PluginConfig) !VjsxRuntimeFacadeConfig { + app_entry := plugin_config_app_entry(cfg) + embedded_cfg := resolve_embedded_host_runtime_config([]string{}, EmbeddedHostRuntimeConfig{ + app_entry: app_entry + module_root: cfg.module_root + build_root: cfg.build_root + signature_root: cfg.signature_root + signature_include: cfg.signature_include.clone() + signature_exclude: cfg.signature_exclude.clone() + runtime_profile: cfg.runtime_profile + lane_count: cfg.thread_count + max_requests: cfg.max_requests + enable_fs: cfg.enable_fs + enable_process: cfg.enable_process + enable_network: cfg.enable_network + }, EmbeddedHostCliOverrides{}) or { + return error('plugin_runtime_config_failed:${name}:${err.msg()}') + } + return VjsxRuntimeFacadeConfig{ + app_entry: embedded_cfg.app_entry + module_root: embedded_cfg.module_root + build_root: embedded_cfg.build_root + signature_root: embedded_cfg.signature_root + signature_include: embedded_cfg.signature_include.clone() + signature_exclude: embedded_cfg.signature_exclude.clone() + runtime_profile: embedded_cfg.runtime_profile + thread_count: embedded_cfg.lane_count + max_requests: embedded_cfg.max_requests + enable_fs: embedded_cfg.enable_fs + enable_process: embedded_cfg.enable_process + enable_network: embedded_cfg.enable_network + } +} + +fn build_vjsx_plugin_runtimes(configs map[string]PluginConfig) map[string]InProcVjsxExecutor { + mut runtimes := map[string]InProcVjsxExecutor{} + for name, cfg in configs { + if cfg.kind.trim_space().to_lower() !in ['', 'vjsx'] { + continue + } + runtime_cfg := vjsx_plugin_runtime_config(name, cfg) or { + log.warn('[vhttpd] plugin runtime unavailable name=${name} kind=${cfg.kind} entry=${plugin_config_app_entry(cfg)} error=${err.msg()}') + continue + } + runtimes[name] = new_inproc_vjsx_executor(runtime_cfg) + } + return runtimes +} + +fn (mut app App) close_all_plugins() { + for _, executor in app.plugin_vjsx { + executor.close() + } + app.plugin_vjsx = map[string]InProcVjsxExecutor{} +} + +fn (mut app App) call_plugin(req PluginCallRequest) !PluginCallResponse { + name := req.plugin.trim_space() + if name == '' { + return error('plugin_missing_name') + } + cfg := app.plugin_configs[name] or { return error('plugin_not_configured:${name}') } + if cfg.kind.trim_space().to_lower() !in ['', 'vjsx'] { + return error('plugin_unsupported_kind:${name}:${cfg.kind}') + } + executor := app.plugin_vjsx[name] or { return error('plugin_runtime_unavailable:${name}') } + return executor.call_plugin(mut app, req) +} + +fn (mut app App) call_plugin_stream(req PluginCallRequest, on_frame PluginStreamFrameFn) !PluginStreamCallResponse { + name := req.plugin.trim_space() + if name == '' { + return error('plugin_missing_name') + } + cfg := app.plugin_configs[name] or { return error('plugin_not_configured:${name}') } + if cfg.kind.trim_space().to_lower() !in ['', 'vjsx'] { + return error('plugin_unsupported_kind:${name}:${cfg.kind}') + } + executor := app.plugin_vjsx[name] or { return error('plugin_runtime_unavailable:${name}') } + return executor.call_plugin_stream(mut app, req, on_frame) +} diff --git a/src/server_shutdown_hooks.v b/src/server_shutdown_hooks.v index 9483659..f58a472 100644 --- a/src/server_shutdown_hooks.v +++ b/src/server_shutdown_hooks.v @@ -8,6 +8,7 @@ fn shutdown_app_runtime(mut app App, runtime_cfg ServerRuntimeConfig) { }) runtime_cfg.executor_plan.lifecycle.stop(mut app) app.logic_executor.close() + app.close_all_plugins() // Graceful provider shutdown is now spec/runtime-driven. app.stop_all_providers() os.rm(runtime_cfg.internal_admin_socket) or {} diff --git a/src/worker_backend_transport.v b/src/worker_backend_transport.v index 1b82402..2d5c629 100644 --- a/src/worker_backend_transport.v +++ b/src/worker_backend_transport.v @@ -10,7 +10,8 @@ import time fn write_frame(mut conn unix.StreamConn, payload string) ! { size := payload.len - header := [u8((size >> 24) & 0xff), u8((size >> 16) & 0xff), u8((size >> 8) & 0xff), u8(size & 0xff)] + header := [u8((size >> 24) & 0xff), u8((size >> 16) & 0xff), u8((size >> 8) & 0xff), + u8(size & 0xff)] conn.write_ptr(&header[0], 4)! conn.write_string(payload)! } @@ -99,7 +100,8 @@ fn try_decode_stream_start(raw string) ?WorkerStreamFrame { fn try_decode_upstream_plan(raw string) ?WorkerUpstreamPlanFrame { frame := json.decode(WorkerUpstreamPlanFrame, raw) or { return none } - if ((frame.mode == 'stream' && frame.strategy == 'upstream_plan') || frame.mode == 'upstream_plan') && frame.event == 'start' { + if ((frame.mode == 'stream' && frame.strategy == 'upstream_plan') + || frame.mode == 'upstream_plan') && frame.event == 'start' { return frame } return none @@ -198,7 +200,7 @@ fn read_websocket_upstream_response(mut conn unix.StreamConn) !WorkerWebSocketUp fn (mut app App) worker_backend_dispatch_websocket_upstream(req WorkerWebSocketUpstreamDispatchRequest) !WorkerWebSocketUpstreamDispatchResponse { socket, mut conn := app.worker_backend_connect_selected()! - + app.on_worker_request_started(socket) defer { app.on_worker_request_finished(socket) @@ -238,7 +240,7 @@ fn status_reason_phrase(status int) string { } } -fn write_http_stream_headers_conn(mut conn net.TcpConn, status int, content_type string, extra_headers map[string]string, chunked bool) ! { +fn write_http_stream_headers_conn_with_close(mut conn net.TcpConn, status int, content_type string, extra_headers map[string]string, chunked bool, close_conn bool) ! { mut code := status if code <= 0 { code = 200 @@ -246,7 +248,9 @@ fn write_http_stream_headers_conn(mut conn net.TcpConn, status int, content_type mut sb := strings.new_builder(512) sb.write_string('HTTP/1.1 ${code} ${status_reason_phrase(code)}\r\n') sb.write_string('Server: vhttpd\r\n') - sb.write_string('Connection: close\r\n') + if close_conn { + sb.write_string('Connection: close\r\n') + } if chunked { sb.write_string('Transfer-Encoding: chunked\r\n') } @@ -265,8 +269,14 @@ fn write_http_stream_headers_conn(mut conn net.TcpConn, status int, content_type conn.write_string(sb.str())! } +fn write_http_stream_headers_conn(mut conn net.TcpConn, status int, content_type string, extra_headers map[string]string, chunked bool) ! { + write_http_stream_headers_conn_with_close(mut conn, status, content_type, extra_headers, + chunked, true)! +} + fn write_http_stream_headers(mut ctx Context, status int, content_type string, extra_headers map[string]string, chunked bool) ! { - write_http_stream_headers_conn(mut ctx.conn, status, content_type, extra_headers, chunked)! + write_http_stream_headers_conn(mut ctx.conn, status, content_type, extra_headers, + chunked)! } fn write_chunk(mut conn net.TcpConn, data string) ! { @@ -278,6 +288,10 @@ fn write_chunk(mut conn net.TcpConn, data string) ! { conn.write_string('\r\n')! } +fn write_final_chunk(mut conn net.TcpConn) ! { + conn.write_string('0\r\n\r\n')! +} + fn write_sse_message(mut conn net.TcpConn, frame WorkerStreamFrame) ! { mut sb := strings.new_builder(256) if frame.sse_id != '' { @@ -357,8 +371,8 @@ fn (mut app App) execute_websocket_dispatch_commands_result(commands []WorkerWeb } return WorkerWebSocketDispatchCommandsResult{ close_frame: close_frame - has_close: has_close - failures: failures + has_close: has_close + failures: failures } } diff --git a/v.mod b/v.mod index 2e7aa75..a113253 100644 --- a/v.mod +++ b/v.mod @@ -1,5 +1,6 @@ Module { name: 'vhttpd' + base_url: 'src' description: 'V HTTP daemon and provider runtime toolkit' version: '0.0.1' } From e369e78d2f82d108518c0c1d95b3b5bdb0cbd2e3 Mon Sep 17 00:00:00 2001 From: weigang Date: Wed, 6 May 2026 09:59:04 +0800 Subject: [PATCH 03/10] openai: add gateway integration coverage --- src/openai_gateway_integration_test.v | 1525 +++++++++++++++++++++++++ 1 file changed, 1525 insertions(+) create mode 100644 src/openai_gateway_integration_test.v diff --git a/src/openai_gateway_integration_test.v b/src/openai_gateway_integration_test.v new file mode 100644 index 0000000..ef740f2 --- /dev/null +++ b/src/openai_gateway_integration_test.v @@ -0,0 +1,1525 @@ +module main + +import net +import net.http +import os +import time +import veb + +fn openai_integration_free_port_pair() (int, int) { + seed := int((time.now().unix_milli() + os.getpid()) % 10000) + for i in 0 .. 1000 { + port := 30000 + ((seed + i) % 20000) + mut first := net.listen_tcp(.ip, '127.0.0.1:${port}') or { continue } + mut second := net.listen_tcp(.ip, '127.0.0.1:${port + 1}') or { + first.close() or {} + continue + } + first.close() or {} + second.close() or {} + return port, port + 1 + } + panic('openai integration could not find free TCP port pair') +} + +fn openai_integration_wait_for_http(url string) { + for _ in 0 .. 80 { + http.fetch(url: url, method: .get) or { + time.sleep(25 * time.millisecond) + continue + } + return + } +} + +fn openai_integration_wait_for_file(path string) { + for _ in 0 .. 80 { + if os.exists(path) { + return + } + time.sleep(25 * time.millisecond) + } +} + +fn openai_integration_read_http_request(mut conn net.TcpConn) string { + mut raw := '' + mut buf := []u8{len: 4096} + for _ in 0 .. 80 { + n := conn.read(mut buf) or { 0 } + if n <= 0 { + break + } + raw += buf[..n].bytestr() + header := raw.all_before('\r\n\r\n') + if header.len == raw.len { + continue + } + mut content_length := 0 + for line in header.split('\r\n') { + if line.to_lower().starts_with('content-length:') { + content_length = line.all_after(':').trim_space().int() + } + } + body_len := raw.len - header.len - 4 + if body_len >= content_length { + break + } + } + return raw +} + +fn openai_integration_read_http_response_until(mut conn net.TcpConn, marker string) string { + mut raw := '' + mut buf := []u8{len: 4096} + conn.set_read_timeout(2 * time.second) + for _ in 0 .. 80 { + n := conn.read(mut buf) or { 0 } + if n <= 0 { + break + } + raw += buf[..n].bytestr() + if raw.contains(marker) { + break + } + } + return raw +} + +fn openai_integration_mock_upstream(port int, mode string, request_log string, ready_file string) { + mut listener := net.listen_tcp(.ip, '127.0.0.1:${port}') or { panic(err) } + defer { + listener.close() or {} + } + os.write_file(ready_file, 'ready') or {} + mut conn := listener.accept() or { return } + defer { + conn.close() or {} + } + raw := openai_integration_read_http_request(mut conn) + os.write_file(request_log, raw) or {} + if mode == 'stream' { + conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: text/event-stream\r\nConnection: close\r\n\r\n') or {} + conn.write_string('data: {"id":"chunk-1","choices":[{"delta":{"content":"hello"}}]}\n\n') or {} + conn.write_string('data: [DONE]\n\n') or {} + return + } + if mode == 'stream_keepalive_after_done' { + conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: text/event-stream\r\nConnection: keep-alive\r\n\r\n') or {} + conn.write_string('data: {"id":"chunk-keepalive","choices":[{"delta":{"content":"hello"}}]}\n\n') or {} + conn.write_string('data: [DONE]\n\n') or {} + time.sleep(6 * time.second) + return + } + if mode == 'stream_chunked' { + conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: text/event-stream\r\nTransfer-Encoding: chunked\r\nConnection: close\r\n\r\n') or {} + frame1 := 'data: {"id":"chunked-1","choices":[{"delta":{"content":"hello"}}]}\n\n' + frame2 := 'data: [DONE]\n\n' + conn.write_string('${frame1.len:x}\r\n${frame1}\r\n') or {} + conn.write_string('${frame2.len:x}\r\n${frame2}\r\n') or {} + conn.write_string('0\r\n\r\n') or {} + return + } + if mode == 'responses_stream' { + conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: text/event-stream\r\nConnection: close\r\n\r\n') or {} + conn.write_string('event: response.created\ndata: {"type":"response.created","response":{"id":"resp_mock","object":"response","status":"in_progress"},"sequence_number":1}\n\n') or {} + conn.write_string('event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hello","sequence_number":2}\n\n') or {} + conn.write_string('event: response.completed\ndata: {"type":"response.completed","response":{"id":"resp_mock","object":"response","status":"completed"},"sequence_number":3}\n\n') or {} + return + } + if mode == 'responses_json' { + conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nConnection: close\r\n\r\n') or {} + conn.write_string('{"id":"resp_mock","object":"response","status":"completed","output":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"response ok"}]}]}') or {} + return + } + if mode == 'responses_stateful' { + conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nConnection: close\r\n\r\n') or {} + if raw.starts_with('POST /v1/responses/resp_123/cancel HTTP/') { + conn.write_string('{"id":"resp_123","object":"response","status":"cancelled"}') or {} + } else { + conn.write_string('{"id":"resp_123","object":"response","status":"completed"}') or {} + } + return + } + if mode == 'ollama_ndjson' { + conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: application/x-ndjson\r\nConnection: close\r\n\r\n') or {} + conn.write_string('{"message":{"role":"assistant","content":"你"},"done":false}\n') or {} + conn.write_string('{"message":{"role":"assistant","content":"好"},"done":false}\n') or {} + conn.write_string('{"done":true}\n') or {} + return + } + if mode == 'custom_ndjson' { + conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: application/x-ndjson\r\nConnection: close\r\n\r\n') or {} + conn.write_string('{"delta":"plugin-","finished":false}\n') or {} + conn.write_string('{"delta":"mapped","finished":false}\n') or {} + conn.write_string('{"finished":true}\n') or {} + return + } + if mode == 'tool_call_ndjson' { + conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: application/x-ndjson\r\nConnection: close\r\n\r\n') or {} + conn.write_string('{"message":{"role":"assistant","tool_calls":[{"index":0,"id":"call_search","type":"function","function":{"name":"search","arguments":"{\\"q\\":\\"vh"}}]},"done":false}\n') or {} + conn.write_string('{"message":{"role":"assistant","tool_calls":[{"index":0,"function":{"arguments":"ttpd\\"}"}}]},"done":false}\n') or {} + conn.write_string('{"done":true}\n') or {} + return + } + if mode == 'usage_ndjson' { + conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: application/x-ndjson\r\nConnection: close\r\n\r\n') or {} + conn.write_string('{"message":{"role":"assistant","content":"usage ok"},"done":false}\n') or {} + conn.write_string('{"done":true,"prompt_eval_count":5,"eval_count":9}\n') or {} + return + } + if mode == 'json_error' { + conn.write_string('HTTP/1.1 429 Too Many Requests\r\nContent-Type: application/json\r\nConnection: close\r\n\r\n') or {} + conn.write_string('{"error":{"message":"provider quota exceeded","type":"rate_limit_error","code":"rate_limit_exceeded"}}') or {} + return + } + if mode == 'stream_error' { + conn.write_string('HTTP/1.1 503 Service Unavailable\r\nContent-Type: application/json\r\nConnection: close\r\n\r\n') or {} + conn.write_string('{"error":{"message":"provider overloaded","type":"server_error","code":"provider_overloaded"}}') or {} + return + } + conn.write_string('HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nConnection: close\r\n\r\n') or {} + conn.write_string('{"id":"cmpl-mock","object":"chat.completion","choices":[{"message":{"role":"assistant","content":"ok"}}]}') or {} +} + +fn openai_integration_mock_fallback_upstream(port int, request_log string, ready_file string) { + mut listener := net.listen_tcp(.ip, '127.0.0.1:${port}') or { panic(err) } + defer { + listener.close() or {} + } + os.write_file(ready_file, 'ready') or {} + mut first := listener.accept() or { return } + raw_first := openai_integration_read_http_request(mut first) + first.write_string('HTTP/1.1 503 Service Unavailable\r\nContent-Type: application/json\r\nConnection: close\r\n\r\n') or {} + first.write_string('{"error":{"message":"primary overloaded","type":"server_error","code":"primary_overloaded"}}') or {} + first.close() or {} + mut second := listener.accept() or { return } + raw_second := openai_integration_read_http_request(mut second) + os.write_file(request_log, raw_first + '\n---SECOND---\n' + raw_second) or {} + if raw_second.starts_with('POST /v1/fallback/api/chat HTTP/') { + second.write_string('HTTP/1.1 200 OK\r\nContent-Type: application/x-ndjson\r\nConnection: close\r\n\r\n') or {} + second.write_string('{"message":{"role":"assistant","content":"mapped "},"done":false}\n') or {} + second.write_string('{"message":{"role":"assistant","content":"fallback"},"done":false}\n') or {} + second.write_string('{"done":true}\n') or {} + second.close() or {} + return + } + if raw_second.starts_with('POST /v1/fallback/chat/completions HTTP/') { + if raw_second.contains('"stream":true') { + second.write_string('HTTP/1.1 200 OK\r\nContent-Type: text/event-stream\r\nConnection: close\r\n\r\n') or {} + second.write_string('data: {"id":"chunk-fallback","choices":[{"delta":{"content":"fallback stream"}}]}\n\n') or {} + second.write_string('data: [DONE]\n\n') or {} + second.close() or {} + return + } + second.write_string('HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nConnection: close\r\n\r\n') or {} + second.write_string('{"id":"cmpl-fallback","object":"chat.completion","choices":[{"message":{"role":"assistant","content":"fallback ok"}}]}') or {} + second.close() or {} + return + } + second.write_string('HTTP/1.1 503 Service Unavailable\r\nContent-Type: application/json\r\nConnection: close\r\n\r\n') or {} + second.write_string('{"error":{"message":"fallback was not used","type":"server_error","code":"fallback_not_used"}}') or {} + second.close() or {} +} + +fn openai_integration_start_gateway(port int, upstream_port int, plugin_file string) { + plugins := if plugin_file.trim_space() == '' { + map[string]PluginConfig{} + } else { + { + 'planner': PluginConfig{ + kind: 'vjsx' + app_entry: plugin_file + runtime_profile: 'node' + thread_count: 1 + } + } + } + mut app := App{ + event_log: '' + started_at_unix: time.now().unix() + openai_enabled: true + openai_base_path: '/v1' + openai_plugin: if plugin_file.trim_space() == '' { '' } else { 'planner' } + openai_default_backend: 'mock' + openai_endpoints: OpenAIEndpointsConfig{} + openai_backends: { + 'mock': OpenAIBackendConfig{ + base_url: 'http://127.0.0.1:${upstream_port}/v1' + } + 'backup': OpenAIBackendConfig{ + base_url: 'http://127.0.0.1:${upstream_port}/v1' + } + 'exec': OpenAIBackendConfig{ + kind: 'executor' + executor: 'planner' + } + } + openai_routes: { + 'public': OpenAIRouteConfig{ + models: ['public-model'] + backend: 'mock' + upstream_model: 'builtin-upstream-model' + } + } + plugin_configs: plugins + plugin_vjsx: build_vjsx_plugin_runtimes(plugins) + openai_responses: new_memory_state_store[OpenAIResponseRecord]() + upstream_sessions: map[string]UpstreamRuntimeSession{} + mcp_sessions: map[string]McpSession{} + ws_hub_conns: map[string]HubConn{} + ws_hub_room_members: map[string]map[string]bool{} + ws_hub_conn_rooms: map[string]map[string]bool{} + ws_hub_conn_meta: map[string]map[string]string{} + ws_hub_pending: map[string][]HubPendingMessage{} + feishu_runtime: map[string]FeishuProviderRuntime{} + websocket_upstream_started: map[string]bool{} + providers: ProviderHost{ + registry: map[string]Provider{} + specs: map[string]ProviderSpec{} + } + fixture_websocket_runtime: map[string]FixtureWebSocketUpstreamRuntime{} + provider_instance_specs: map[string]ProviderInstanceSpec{} + codex_instances: map[string]CodexProviderRuntime{} + feishu_buffers: map[string]FeishuStreamBuffer{} + } + veb.run_at[App, Context](mut app, + host: '127.0.0.1' + port: port + family: .ip + show_startup_message: false + ) or {} +} + +fn openai_integration_write_plugin(temp_dir string) string { + plugin_file := os.join_path(temp_dir, 'openai-planner.mts') + os.write_file(plugin_file, " +export function openai(req) { + if (req.op !== 'chat.route') return { not_handled: true }; + const payload = JSON.parse(req.payload); + const body = JSON.parse(payload.body); + body.model = 'plugin-upstream-model'; + return { + backend: 'mock', + method: 'POST', + path: '/chat/completions', + headers: { 'x-plugin-plan': 'yes' }, + body: JSON.stringify(body), + stream_mode: 'passthrough', + }; +} +") or { + panic(err) + } + return plugin_file +} + +fn openai_integration_write_bad_plugin(temp_dir string) string { + plugin_file := os.join_path(temp_dir, 'openai-bad-planner.mts') + os.write_file(plugin_file, " +export function openai(req) { + if (req.op !== 'chat.route') return { not_handled: true }; + return { + backend: 'mock', + method: 'TRACE', + path: '/chat/completions', + body: '{}', + stream_mode: 'passthrough', + }; +} +") or { + panic(err) + } + return plugin_file +} + +fn openai_integration_write_ollama_plugin(temp_dir string) string { + plugin_file := os.join_path(temp_dir, 'openai-ollama-planner.mts') + os.write_file(plugin_file, " +export function openai(req) { + if (req.op !== 'chat.route') return { not_handled: true }; + const payload = JSON.parse(req.payload); + const body = JSON.parse(payload.body); + return { + backend: 'mock', + method: 'POST', + path: '/api/chat', + headers: { 'x-plugin-plan': 'ollama' }, + body: JSON.stringify({ + model: 'qwen2.5', + messages: body.messages, + stream: body.stream === true, + }), + stream_mode: 'mapped', + response_codec: 'ndjson', + output_protocol: 'openai.chat.completion', + }; +} +") or { + panic(err) + } + return plugin_file +} + +fn openai_integration_write_tool_call_plugin(temp_dir string) string { + plugin_file := os.join_path(temp_dir, 'openai-tool-call-planner.mts') + os.write_file(plugin_file, " +export function openai(req) { + if (req.op !== 'chat.route') return { not_handled: true }; + return { + backend: 'mock', + method: 'POST', + path: '/api/chat', + body: JSON.stringify({ stream: true }), + stream_mode: 'mapped', + response_codec: 'ndjson', + output_protocol: 'openai.chat.completion', + }; +} +") or { + panic(err) + } + return plugin_file +} + +fn openai_integration_write_usage_plugin(temp_dir string) string { + plugin_file := os.join_path(temp_dir, 'openai-usage-planner.mts') + os.write_file(plugin_file, " +export function openai(req) { + if (req.op !== 'chat.route') return { not_handled: true }; + return { + backend: 'mock', + method: 'POST', + path: '/api/chat', + body: JSON.stringify({ stream: false }), + stream_mode: 'mapped', + response_codec: 'ndjson', + output_protocol: 'openai.chat.completion', + }; +} +") or { + panic(err) + } + return plugin_file +} + +fn openai_integration_write_executor_plugin(temp_dir string) string { + plugin_file := os.join_path(temp_dir, 'openai-executor-planner.mts') + os.write_file(plugin_file, " +async function* executorFrames(body) { + yield { content: 'executor ', done: false }; + yield { content: body.messages?.[0]?.content ?? 'ok', done: false }; + yield { usage: { prompt_tokens: 3, completion_tokens: 4, total_tokens: 7 }, done: true }; +} + +async function* responseEvents(body) { + const input = Array.isArray(body.input) ? body.input.map((item) => item.content || '').join(' ') : String(body.input || 'ok'); + yield { type: 'response.created', response: { id: 'resp_exec', object: 'response', status: 'in_progress' }, sequence_number: 1 }; + yield { type: 'response.output_text.delta', delta: 'executor ' + input, sequence_number: 2 }; + yield { type: 'response.completed', response: { id: 'resp_exec', object: 'response', status: 'completed' }, sequence_number: 3 }; +} + +export function openai(req) { + if (req.op === 'chat.route') { + return { + backend: 'exec', + method: 'POST', + path: '/executor/chat', + body: JSON.parse(req.payload).body, + stream_mode: 'executor', + }; + } + if (req.op === 'responses.route') { + return { + backend: 'exec', + method: 'POST', + path: '/executor/responses', + body: JSON.parse(req.payload).body, + stream_mode: 'executor', + output_protocol: 'openai.response', + }; + } + if (req.op === 'chat.execute') { + const payload = JSON.parse(req.payload); + const body = JSON.parse(payload.body); + if (payload.stream) { + return executorFrames(body); + } + return { + content: 'executor ' + (body.messages?.[0]?.content ?? 'ok'), + usage: { prompt_tokens: 3, completion_tokens: 4, total_tokens: 7 }, + done: true, + }; + } + if (req.op === 'responses.execute') { + const payload = JSON.parse(req.payload); + const body = JSON.parse(payload.body); + const input = Array.isArray(body.input) ? body.input.map((item) => item.content || '').join(' ') : String(body.input || 'ok'); + if (payload.stream) { + return responseEvents(body); + } + return { + id: 'resp_exec', + object: 'response', + status: 'completed', + output: [{ type: 'message', role: 'assistant', content: [{ type: 'output_text', text: 'executor ' + input }] }], + }; + } + return { not_handled: true }; +} +") or { + panic(err) + } + return plugin_file +} + +fn openai_integration_write_frame_mapper_plugin(temp_dir string) string { + plugin_file := os.join_path(temp_dir, 'openai-frame-mapper.mts') + os.write_file(plugin_file, " +export function openai(req) { + if (req.op === 'chat.route') { + const payload = JSON.parse(req.payload); + const body = JSON.parse(payload.body); + return { + backend: 'mock', + method: 'POST', + path: '/custom/stream', + body: JSON.stringify({ prompt: body.messages?.[0]?.content ?? '', stream: true }), + stream_mode: 'mapped', + response_codec: 'ndjson', + output_protocol: 'openai.chat.completion', + mapper: 'plugin', + }; + } + if (req.op === 'chat.map_frame') { + const payload = JSON.parse(req.payload); + const frame = JSON.parse(payload.frame); + return { + content: frame.delta ? frame.delta.toUpperCase() : '', + done: frame.finished === true, + }; + } + return { not_handled: true }; +} +") or { + panic(err) + } + return plugin_file +} + +fn openai_integration_write_plugin_tool_call_mapper(temp_dir string) string { + plugin_file := os.join_path(temp_dir, 'openai-plugin-tool-call-mapper.mts') + os.write_file(plugin_file, " +export function openai(req) { + if (req.op === 'chat.route') { + return { + backend: 'mock', + method: 'POST', + path: '/custom/stream', + body: JSON.stringify({ stream: true }), + stream_mode: 'mapped', + response_codec: 'ndjson', + output_protocol: 'openai.chat.completion', + mapper: 'plugin', + }; + } + if (req.op === 'chat.map_frame') { + const payload = JSON.parse(req.payload); + const frame = JSON.parse(payload.frame); + if (frame.finished) return { done: true }; + return { + tool_calls: [{ + index: 0, + id: 'call_plugin', + type: 'function', + function: { name: 'lookup', arguments: frame.delta }, + }], + finish_reason: 'tool_calls', + }; + } + return { not_handled: true }; +} +") or { + panic(err) + } + return plugin_file +} + +fn openai_integration_write_mapper_error_plugin(temp_dir string) string { + plugin_file := os.join_path(temp_dir, 'openai-mapper-error.mts') + os.write_file(plugin_file, " +export function openai(req) { + if (req.op === 'chat.route') { + return { + backend: 'mock', + method: 'POST', + path: '/custom/stream', + body: JSON.stringify({ stream: true }), + stream_mode: 'mapped', + response_codec: 'ndjson', + output_protocol: 'openai.chat.completion', + mapper: 'plugin', + }; + } + if (req.op === 'chat.map_frame') { + return { error: { message: 'mapper refused frame' } }; + } + return { not_handled: true }; +} +") or { + panic(err) + } + return plugin_file +} + +fn openai_integration_write_fallback_plugin(temp_dir string) string { + plugin_file := os.join_path(temp_dir, 'openai-fallback-planner.mts') + os.write_file(plugin_file, " +export function openai(req) { + if (req.op === 'chat.route') { + const payload = JSON.parse(req.payload); + return { + backend: 'mock', + method: 'POST', + path: '/primary/chat/completions', + body: payload.body, + stream_mode: 'passthrough', + }; + } + if (req.op === 'chat.fallback') { + const payload = JSON.parse(req.payload); + if (payload.failed_backend !== 'mock' || payload.status_code !== 503) { + return { not_handled: true }; + } + return { + backend: 'backup', + method: 'POST', + path: '/fallback/chat/completions', + body: payload.body, + stream_mode: 'passthrough', + }; + } + return { not_handled: true }; +} +") or { + panic(err) + } + return plugin_file +} + +fn openai_integration_write_mapped_fallback_plugin(temp_dir string) string { + plugin_file := os.join_path(temp_dir, 'openai-mapped-fallback-planner.mts') + os.write_file(plugin_file, " +export function openai(req) { + if (req.op === 'chat.route') { + const payload = JSON.parse(req.payload); + const body = JSON.parse(payload.body); + return { + backend: 'mock', + method: 'POST', + path: '/primary/api/chat', + body: JSON.stringify({ model: 'primary-local', messages: body.messages, stream: true }), + stream_mode: 'mapped', + response_codec: 'ndjson', + output_protocol: 'openai.chat.completion', + }; + } + if (req.op === 'chat.fallback') { + const payload = JSON.parse(req.payload); + if (payload.failed_backend !== 'mock' || payload.status_code !== 503) { + return { not_handled: true }; + } + return { + backend: 'backup', + method: 'POST', + path: '/fallback/api/chat', + body: JSON.stringify({ model: 'backup-local', stream: true }), + stream_mode: 'mapped', + response_codec: 'ndjson', + output_protocol: 'openai.chat.completion', + }; + } + return { not_handled: true }; +} +") or { + panic(err) + } + return plugin_file +} + +fn test_openai_gateway_plugin_non_stream_passthrough_hits_mock_upstream() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_non_stream_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + plugin_file := openai_integration_write_plugin(temp_dir) + spawn openai_integration_mock_upstream(upstream_port, 'json', request_log, ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file) + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/chat/completions' + method: .post + header: header + data: '{"model":"public-model","messages":[{"role":"user","content":"hi"}]}' + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('"cmpl-mock"') + raw := os.read_file(request_log) or { panic(err) } + assert raw.starts_with('POST /v1/chat/completions HTTP/') + assert raw.to_lower().contains('x-plugin-plan: yes') + assert raw.contains('"model":"plugin-upstream-model"') +} + +fn test_openai_gateway_stream_passthrough_forwards_sse_bytes() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_stream_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + spawn openai_integration_mock_upstream(upstream_port, 'stream', request_log, ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, '') + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + header.add(.accept, 'text/event-stream') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/chat/completions' + method: .post + header: header + data: '{"model":"public-model","stream":true,"messages":[{"role":"user","content":"hi"}]}' + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('data: {"id":"chunk-1"') + assert resp.body.contains('data: [DONE]') + raw := os.read_file(request_log) or { panic(err) } + assert raw.contains('"model":"builtin-upstream-model"') + assert raw.contains('"stream":true') +} + +fn test_openai_gateway_stream_passthrough_dechunks_upstream_sse() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_stream_chunked_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + spawn openai_integration_mock_upstream(upstream_port, 'stream_chunked', request_log, + ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, '') + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + header.add(.accept, 'text/event-stream') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/chat/completions' + method: .post + header: header + data: '{"model":"public-model","stream":true,"messages":[{"role":"user","content":"hi"}]}' + ) or { panic(err) } + frame1 := 'data: {"id":"chunked-1","choices":[{"delta":{"content":"hello"}}]}\n\n' + frame2 := 'data: [DONE]\n\n' + assert resp.status_code == 200 + assert resp.body.contains(frame1) + assert resp.body.contains(frame2) + assert !resp.body.contains('${frame1.len:x}\r\n') + assert !resp.body.contains('${frame2.len:x}\r\n') + assert !resp.body.contains('\r\n0\r\n') +} + +fn test_openai_gateway_stream_passthrough_writes_chunked_response_boundary() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_stream_response_chunked_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + spawn openai_integration_mock_upstream(upstream_port, 'stream', request_log, ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, '') + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + body := '{"model":"public-model","stream":true,"messages":[{"role":"user","content":"hi"}]}' + mut conn := net.dial_tcp('127.0.0.1:${gateway_port}') or { panic(err) } + defer { + conn.close() or {} + } + conn.write_string('POST /v1/chat/completions HTTP/1.1\r\nHost: 127.0.0.1:${gateway_port}\r\nContent-Type: application/json\r\nAccept: text/event-stream\r\nContent-Length: ${body.len}\r\n\r\n${body}') or { + panic(err) + } + raw := openai_integration_read_http_response_until(mut conn, '\r\n0\r\n\r\n') + assert raw.starts_with('HTTP/1.1 200 OK') + assert raw.to_lower().contains('transfer-encoding: chunked') + assert !raw.to_lower().contains('connection: close') + assert raw.contains('data: {"id":"chunk-1"') + assert raw.contains('data: [DONE]') + assert raw.contains('\r\n0\r\n\r\n') + conn.write_string('GET /health HTTP/1.1\r\nHost: 127.0.0.1:${gateway_port}\r\n\r\n') or { + panic(err) + } + second := openai_integration_read_http_response_until(mut conn, '\r\n\r\nOK') + assert second.starts_with('HTTP/1.1 200 OK') + assert second.ends_with('\r\n\r\nOK') +} + +fn test_openai_gateway_stream_passthrough_finishes_on_done_before_upstream_close() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_stream_done_boundary_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + spawn openai_integration_mock_upstream(upstream_port, 'stream_keepalive_after_done', + request_log, ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, '') + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + body := '{"model":"public-model","stream":true,"messages":[{"role":"user","content":"hi"}]}' + mut conn := net.dial_tcp('127.0.0.1:${gateway_port}') or { panic(err) } + defer { + conn.close() or {} + } + conn.write_string('POST /v1/chat/completions HTTP/1.1\r\nHost: 127.0.0.1:${gateway_port}\r\nContent-Type: application/json\r\nAccept: text/event-stream\r\nContent-Length: ${body.len}\r\n\r\n${body}') or { + panic(err) + } + raw := openai_integration_read_http_response_until(mut conn, '\r\n0\r\n\r\n') + assert raw.starts_with('HTTP/1.1 200 OK') + assert raw.to_lower().contains('transfer-encoding: chunked') + assert raw.contains('data: {"id":"chunk-keepalive"') + assert raw.contains('data: [DONE]') + assert raw.contains('\r\n0\r\n\r\n') +} + +fn test_openai_gateway_mapped_ollama_ndjson_stream_outputs_openai_sse() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_ollama_stream_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + plugin_file := openai_integration_write_ollama_plugin(temp_dir) + spawn openai_integration_mock_upstream(upstream_port, 'ollama_ndjson', request_log, + ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file) + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + header.add(.accept, 'text/event-stream') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/chat/completions' + method: .post + header: header + data: '{"model":"public-model","stream":true,"messages":[{"role":"user","content":"hi"}]}' + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('data: {"id":"chatcmpl-') + assert resp.body.contains('"object":"chat.completion.chunk"') + assert resp.body.contains('"content":"你"') + assert resp.body.contains('"content":"好"') + assert resp.body.contains('data: [DONE]') + raw := os.read_file(request_log) or { panic(err) } + assert raw.starts_with('POST /v1/api/chat HTTP/') + assert raw.to_lower().contains('x-plugin-plan: ollama') + assert raw.contains('"model":"qwen2.5"') + assert raw.contains('"stream":true') +} + +fn test_openai_gateway_mapped_ndjson_tool_calls_output_openai_delta() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_tool_call_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + plugin_file := openai_integration_write_tool_call_plugin(temp_dir) + spawn openai_integration_mock_upstream(upstream_port, 'tool_call_ndjson', request_log, + ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file) + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + header.add(.accept, 'text/event-stream') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/chat/completions' + method: .post + header: header + data: '{"model":"public-model","stream":true,"messages":[]}' + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('"tool_calls"') + assert resp.body.contains('"id":"call_search"') + assert resp.body.contains('"name":"search"') + assert resp.body.contains('"arguments":"{\\"q\\":\\"vh"') + assert resp.body.contains('"arguments":"ttpd\\"}"') + assert resp.body.contains('"finish_reason":"tool_calls"') + assert resp.body.contains('data: [DONE]') +} + +fn test_openai_gateway_mapped_ndjson_tool_calls_aggregate_non_stream() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_tool_call_once_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + plugin_file := openai_integration_write_tool_call_plugin(temp_dir) + spawn openai_integration_mock_upstream(upstream_port, 'tool_call_ndjson', request_log, + ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file) + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/chat/completions' + method: .post + header: header + data: '{"model":"public-model","stream":false,"messages":[]}' + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('"object":"chat.completion"') + assert resp.body.contains('"message"') + assert resp.body.contains('"tool_calls"') + assert resp.body.contains('"id":"call_search"') + assert resp.body.contains('"name":"search"') + assert resp.body.contains('"arguments":"{\\"q\\":\\"vhttpd\\"}"') + assert resp.body.contains('"finish_reason":"tool_calls"') + assert !resp.body.contains('data:') +} + +fn test_openai_gateway_mapped_ndjson_usage_aggregates_non_stream() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_usage_once_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + plugin_file := openai_integration_write_usage_plugin(temp_dir) + spawn openai_integration_mock_upstream(upstream_port, 'usage_ndjson', request_log, + ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file) + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/chat/completions' + method: .post + header: header + data: '{"model":"public-model","stream":false,"messages":[]}' + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('"content":"usage ok"') + assert resp.body.contains('"usage"') + assert resp.body.contains('"prompt_tokens":5') + assert resp.body.contains('"completion_tokens":9') + assert resp.body.contains('"total_tokens":14') + assert !resp.body.contains('data:') +} + +fn test_openai_gateway_mapped_ndjson_usage_outputs_stream_final_chunk() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_usage_stream_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + plugin_file := openai_integration_write_usage_plugin(temp_dir) + spawn openai_integration_mock_upstream(upstream_port, 'usage_ndjson', request_log, + ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file) + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + header.add(.accept, 'text/event-stream') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/chat/completions' + method: .post + header: header + data: '{"model":"public-model","stream":true,"messages":[]}' + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('"content":"usage ok"') + assert resp.body.contains('"choices":[]') + assert resp.body.contains('"usage"') + assert resp.body.contains('"prompt_tokens":5') + assert resp.body.contains('"completion_tokens":9') + assert resp.body.contains('"total_tokens":14') + assert resp.body.contains('data: [DONE]') + assert resp.body.index('"usage"') or { -1 } < resp.body.index('data: [DONE]') or { -1 } +} + +fn test_openai_gateway_executor_backend_non_stream_uses_vjsx_app() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_executor_once_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + plugin_file := openai_integration_write_executor_plugin(temp_dir) + spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file) + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/chat/completions' + method: .post + header: header + data: '{"model":"public-model","messages":[{"role":"user","content":"handled"}]}' + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('"object":"chat.completion"') + assert resp.body.contains('"content":"executor handled"') + assert resp.body.contains('"usage"') + assert resp.body.contains('"total_tokens":7') +} + +fn test_openai_gateway_executor_backend_stream_uses_vjsx_frames() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_executor_stream_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + plugin_file := openai_integration_write_executor_plugin(temp_dir) + spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file) + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + header.add(.accept, 'text/event-stream') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/chat/completions' + method: .post + header: header + data: '{"model":"public-model","stream":true,"messages":[{"role":"user","content":"stream"}]}' + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('"content":"executor "') + assert resp.body.contains('"content":"stream"') + assert resp.body.contains('"choices":[]') + assert resp.body.contains('"total_tokens":7') + assert resp.body.contains('data: [DONE]') +} + +fn test_openai_gateway_responses_passthrough_non_stream() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_responses_once_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + spawn openai_integration_mock_upstream(upstream_port, 'responses_json', request_log, + ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, '') + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/responses' + method: .post + header: header + data: '{"model":"public-model","input":"hello"}' + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('"object":"response"') + assert resp.body.contains('response ok') + raw := os.read_file(request_log) or { panic(err) } + assert raw.starts_with('POST /v1/responses HTTP/') + assert raw.contains('"model":"builtin-upstream-model"') +} + +fn test_openai_gateway_responses_passthrough_stream() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_responses_stream_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + spawn openai_integration_mock_upstream(upstream_port, 'responses_stream', request_log, + ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, '') + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + header.add(.accept, 'text/event-stream') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/responses' + method: .post + header: header + data: '{"model":"public-model","stream":true,"input":"hello"}' + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('event: response.created') + assert resp.body.contains('response.output_text.delta') + assert resp.body.contains('response.completed') + raw := os.read_file(request_log) or { panic(err) } + assert raw.starts_with('POST /v1/responses HTTP/') +} + +fn test_openai_gateway_responses_executor_stream_uses_async_iterable_events() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_responses_executor_stream_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + plugin_file := openai_integration_write_executor_plugin(temp_dir) + spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file) + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + header.add(.accept, 'text/event-stream') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/responses' + method: .post + header: header + data: '{"model":"public-model","stream":true,"input":"stream"}' + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('event: response.created') + assert resp.body.contains('response.output_text.delta') + assert resp.body.contains('executor stream') + assert resp.body.contains('response.completed') +} + +fn test_openai_gateway_responses_executor_non_stream_registers_response() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_responses_executor_registry_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + plugin_file := openai_integration_write_executor_plugin(temp_dir) + spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file) + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + create_resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/responses' + method: .post + header: header + data: '{"model":"public-model","input":"remember me"}' + ) or { panic(err) } + assert create_resp.status_code == 200 + assert create_resp.body.contains('"id":"resp_exec"') + retrieve_resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/responses/resp_exec' + method: .get + ) or { panic(err) } + assert retrieve_resp.status_code == 200 + assert retrieve_resp.body.contains('"id":"resp_exec"') + assert retrieve_resp.body.contains('executor remember me') +} + +fn test_openai_gateway_responses_executor_stream_registers_completed_response() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_responses_executor_stream_registry_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + plugin_file := openai_integration_write_executor_plugin(temp_dir) + spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file) + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + header.add(.accept, 'text/event-stream') + stream_resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/responses' + method: .post + header: header + data: '{"model":"public-model","stream":true,"input":"stream"}' + ) or { panic(err) } + assert stream_resp.status_code == 200 + assert stream_resp.body.contains('response.completed') + retrieve_resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/responses/resp_exec' + method: .get + ) or { panic(err) } + assert retrieve_resp.status_code == 200 + assert retrieve_resp.body.contains('"id":"resp_exec"') + assert retrieve_resp.body.contains('"status":"completed"') +} + +fn test_openai_gateway_responses_retrieve_preserves_query() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_responses_retrieve_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + spawn openai_integration_mock_upstream(upstream_port, 'responses_stateful', request_log, + ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, '') + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/responses/resp_123?include[]=output_text' + method: .get + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('"id":"resp_123"') + raw := os.read_file(request_log) or { panic(err) } + assert raw.starts_with('GET /v1/responses/resp_123?include%5B%5D=output_text HTTP/') +} + +fn test_openai_gateway_responses_cancel_passthrough() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_responses_cancel_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + spawn openai_integration_mock_upstream(upstream_port, 'responses_stateful', request_log, + ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, '') + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/responses/resp_123/cancel' + method: .post + data: '{}' + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('"status":"cancelled"') + raw := os.read_file(request_log) or { panic(err) } + assert raw.starts_with('POST /v1/responses/resp_123/cancel HTTP/') +} + +fn test_openai_gateway_plugin_frame_mapper_outputs_openai_sse() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_plugin_mapper_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + plugin_file := openai_integration_write_frame_mapper_plugin(temp_dir) + spawn openai_integration_mock_upstream(upstream_port, 'custom_ndjson', request_log, + ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file) + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + header.add(.accept, 'text/event-stream') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/chat/completions' + method: .post + header: header + data: '{"model":"public-model","stream":true,"messages":[{"role":"user","content":"hi"}]}' + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('"content":"PLUGIN-"') + assert resp.body.contains('"content":"MAPPED"') + assert resp.body.contains('data: [DONE]') + raw := os.read_file(request_log) or { panic(err) } + assert raw.starts_with('POST /v1/custom/stream HTTP/') + assert raw.contains('"prompt":"hi"') +} + +fn test_openai_gateway_plugin_frame_mapper_can_emit_tool_calls() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_plugin_tool_call_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + plugin_file := openai_integration_write_plugin_tool_call_mapper(temp_dir) + spawn openai_integration_mock_upstream(upstream_port, 'custom_ndjson', request_log, + ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file) + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + header.add(.accept, 'text/event-stream') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/chat/completions' + method: .post + header: header + data: '{"model":"public-model","stream":true,"messages":[]}' + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('"tool_calls"') + assert resp.body.contains('"id":"call_plugin"') + assert resp.body.contains('"name":"lookup"') + assert resp.body.contains('"arguments":"plugin-"') + assert resp.body.contains('"arguments":"mapped"') + assert resp.body.contains('"finish_reason":"tool_calls"') + assert resp.body.contains('data: [DONE]') +} + +fn test_openai_gateway_non_stream_upstream_error_is_openai_error() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_json_error_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + spawn openai_integration_mock_upstream(upstream_port, 'json_error', request_log, ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, '') + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/chat/completions' + method: .post + header: header + data: '{"model":"public-model","messages":[]}' + ) or { panic(err) } + assert resp.status_code == 429 + assert resp.body.contains('"message":"provider quota exceeded"') + assert resp.body.contains('"type":"rate_limit_error"') + assert resp.body.contains('"code":"rate_limit_exceeded"') +} + +fn test_openai_gateway_non_stream_plugin_fallback_retries_backup_plan() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_fallback_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + plugin_file := openai_integration_write_fallback_plugin(temp_dir) + spawn openai_integration_mock_fallback_upstream(upstream_port, request_log, ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file) + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/chat/completions' + method: .post + header: header + data: '{"model":"public-model","messages":[]}' + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('"cmpl-fallback"') + assert resp.body.contains('fallback ok') + raw := os.read_file(request_log) or { panic(err) } + assert raw.contains('POST /v1/primary/chat/completions HTTP/') + assert raw.contains('POST /v1/fallback/chat/completions HTTP/') +} + +fn test_openai_gateway_stream_plugin_fallback_retries_before_sse_headers() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_stream_fallback_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + plugin_file := openai_integration_write_fallback_plugin(temp_dir) + spawn openai_integration_mock_fallback_upstream(upstream_port, request_log, ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file) + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + header.add(.accept, 'text/event-stream') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/chat/completions' + method: .post + header: header + data: '{"model":"public-model","stream":true,"messages":[]}' + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('data: {"id":"chunk-fallback"') + assert resp.body.contains('fallback stream') + assert resp.body.contains('data: [DONE]') + raw := os.read_file(request_log) or { panic(err) } + assert raw.contains('POST /v1/primary/chat/completions HTTP/') + assert raw.contains('POST /v1/fallback/chat/completions HTTP/') +} + +fn test_openai_gateway_mapped_stream_plugin_fallback_retries_before_sse_headers() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_mapped_fallback_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + plugin_file := openai_integration_write_mapped_fallback_plugin(temp_dir) + spawn openai_integration_mock_fallback_upstream(upstream_port, request_log, ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file) + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + header.add(.accept, 'text/event-stream') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/chat/completions' + method: .post + header: header + data: '{"model":"public-model","stream":true,"messages":[{"role":"user","content":"hi"}]}' + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('"object":"chat.completion.chunk"') + assert resp.body.contains('"content":"mapped "') + assert resp.body.contains('"content":"fallback"') + assert resp.body.contains('data: [DONE]') + raw := os.read_file(request_log) or { panic(err) } + assert raw.contains('POST /v1/primary/api/chat HTTP/') + assert raw.contains('POST /v1/fallback/api/chat HTTP/') +} + +fn test_openai_gateway_stream_upstream_error_is_openai_error() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_stream_error_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + spawn openai_integration_mock_upstream(upstream_port, 'stream_error', request_log, + ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, '') + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + header.add(.accept, 'text/event-stream') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/chat/completions' + method: .post + header: header + data: '{"model":"public-model","stream":true,"messages":[]}' + ) or { panic(err) } + assert resp.status_code == 503 + assert resp.body.contains('"message":"provider overloaded"') + assert resp.body.contains('"code":"provider_overloaded"') + assert !resp.body.contains('data:') +} + +fn test_openai_gateway_plugin_mapper_error_is_openai_sse_error() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_mapper_error_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + request_log := os.join_path(temp_dir, 'upstream.request.txt') + ready_file := os.join_path(temp_dir, 'upstream.ready') + plugin_file := openai_integration_write_mapper_error_plugin(temp_dir) + spawn openai_integration_mock_upstream(upstream_port, 'custom_ndjson', request_log, + ready_file) + openai_integration_wait_for_file(ready_file) + spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file) + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + header.add(.accept, 'text/event-stream') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/chat/completions' + method: .post + header: header + data: '{"model":"public-model","stream":true,"messages":[]}' + ) or { panic(err) } + assert resp.status_code == 200 + assert resp.body.contains('data: {"error":') + assert resp.body.contains('"message":"mapper refused frame"') + assert resp.body.contains('"code":"mapper_error"') + assert resp.body.contains('data: [DONE]') +} + +fn test_openai_gateway_invalid_plugin_plan_returns_openai_error() { + temp_dir := os.join_path(os.temp_dir(), 'vhttpd_openai_gateway_bad_plan_integration_test') + os.rmdir_all(temp_dir) or {} + os.mkdir_all(temp_dir) or { panic(err) } + defer { + os.rmdir_all(temp_dir) or {} + } + upstream_port, gateway_port := openai_integration_free_port_pair() + plugin_file := openai_integration_write_bad_plugin(temp_dir) + spawn openai_integration_start_gateway(gateway_port, upstream_port, plugin_file) + openai_integration_wait_for_http('http://127.0.0.1:${gateway_port}/health') + mut header := http.new_header() + header.add(.content_type, 'application/json') + resp := http.fetch( + url: 'http://127.0.0.1:${gateway_port}/v1/chat/completions' + method: .post + header: header + data: '{"model":"public-model","messages":[]}' + ) or { panic(err) } + assert resp.status_code == 502 + assert resp.body.contains('"code":"openai_plugin_plan_invalid_method"') + assert resp.body.contains('unsupported upstream method TRACE') +} From 24197257c61b956fb5e4cf3435a303c3d75b333d Mon Sep 17 00:00:00 2001 From: weigang Date: Wed, 6 May 2026 09:59:11 +0800 Subject: [PATCH 04/10] docs: add OpenAI gateway examples --- docs/OPENAI_AGGREGATION_GATEWAY_PLAN.md | 646 ++++++++++++++++++ .../openai-gateway-dashscope-coding.toml | 45 ++ examples/config/openai-gateway.toml | 64 ++ .../vjsx/openai-dashscope-coding-plugin.mts | 74 ++ examples/vjsx/openai-executor-app.mts | 101 +++ examples/vjsx/openai-gateway-plugin.mts | 212 ++++++ 6 files changed, 1142 insertions(+) create mode 100644 docs/OPENAI_AGGREGATION_GATEWAY_PLAN.md create mode 100644 examples/config/openai-gateway-dashscope-coding.toml create mode 100644 examples/config/openai-gateway.toml create mode 100644 examples/vjsx/openai-dashscope-coding-plugin.mts create mode 100644 examples/vjsx/openai-executor-app.mts create mode 100644 examples/vjsx/openai-gateway-plugin.mts diff --git a/docs/OPENAI_AGGREGATION_GATEWAY_PLAN.md b/docs/OPENAI_AGGREGATION_GATEWAY_PLAN.md new file mode 100644 index 0000000..df46a4d --- /dev/null +++ b/docs/OPENAI_AGGREGATION_GATEWAY_PLAN.md @@ -0,0 +1,646 @@ +# OpenAI Aggregation Gateway Plan + +## Goal + +Build vhttpd into an OpenAI-compatible aggregation gateway. + +The important boundary is: + +- vhttpd owns network execution, stream lifecycle, SSE writing, upstream HTTP, + timeout/cancellation, tracing, auth envelope, and backpressure. +- vjsx owns protocol intelligence: OpenAI compatibility mapping, model routing, + backend-specific request/response shaping, validation, and policy. + +In short, vhttpd should keep the data plane. vjsx should act as a protocol +plugin and planning/mapping layer. + +## Why This Shape + +OpenAI-compatible aggregation has two very different responsibilities. + +The first is physical IO: accepting client HTTP requests, holding long-running +connections, reading upstream streams, detecting disconnects, enforcing +timeouts, and writing SSE frames. This belongs in vhttpd because it is closer to +the server runtime and existing stream/upstream machinery. + +The second is protocol adaptation: deciding which backend should serve a model, +converting OpenAI requests into upstream-specific requests, normalizing provider +quirks, and mapping chunks back into OpenAI-compatible responses. This is a good +fit for vjsx because TypeScript has mature libraries and schemas for this +ecosystem, and protocol logic can evolve faster outside the core server. + +## Runtime Boundary + +```text +client + -> vhttpd /v1/* + -> vhttpd parses request, auth, trace, lifecycle + -> vjsx protocol plugin returns a declarative plan + -> vhttpd executes upstream HTTP/executor plan + -> vhttpd decodes frames: sse | ndjson | json | text + -> optional vjsx frame mapper + -> vhttpd writes OpenAI-compatible JSON/SSE +``` + +vjsx should not own sockets for this feature. It should return plans and mapping +decisions. vhttpd should own the actual fetch, stream read, and client write. + +## Responsibilities + +### vhttpd Owns + +- `/v1/*` HTTP dispatch surface. +- Client connection takeover and SSE/chunked response writing. +- Upstream HTTP execution. +- Upstream stream decoding at the transport/framing layer. +- Cancellation when the client disconnects. +- Timeout and retry hooks. +- Request ids, trace ids, response headers, access logs, and admin snapshots. +- Fast-path passthrough for already OpenAI-compatible upstream streams. + +### vjsx Owns + +- Model alias and route selection. +- Backend-specific request construction. +- OpenAI request normalization and validation. +- Upstream response and error mapping. +- Provider-specific quirks. +- Optional policy: fallback, tenant routing, capability selection. + +## Configuration Shape + +Prefer named map sections, matching the existing vhttpd style. + +```toml +[openai] +enabled = true +base_path = "/v1" +plugin = "openai-gateway" + +[openai.endpoints] +models = true +chat_completions = true +responses = true +embeddings = false + +[openai.routes.gpt4omini] +models = ["gpt-4o-mini", "gpt-4o-mini-*"] +backend = "openai-main" +upstream_model = "gpt-4o-mini" + +[openai.routes.local-chat] +models = ["llama3.1", "qwen2.5"] +backend = "ollama-local" + +[openai.routes.agent] +models = ["my-agent", "company-assistant"] +backend = "agent-vjsx" + +[openai.backends.openai-main] +kind = "openai_http" +base_url = "https://api.openai.com/v1" +api_key_env = "OPENAI_API_KEY" +stream_mode = "passthrough" + +[openai.backends.ollama-local] +kind = "http" +base_url = "http://127.0.0.1:11434" +stream_mode = "mapped" +protocol_plugin = "openai_ollama" + +[openai.backends.agent-vjsx] +kind = "executor" +executor = "agent-vjsx" +stream_mode = "vhttpd_sse" + +[plugins.agent-vjsx] +kind = "vjsx" +entry = "plugins/agent-executor.mts" +runtime_profile = "node" +enable_network = true + +[plugins.openai-gateway] +kind = "vjsx" +entry = "plugins/openai-gateway.mts" +runtime_profile = "node" +thread_count = 1 +enable_network = false +``` + +Site-level overrides should be allowed later: + +```toml +[sites.ai_gateway] +host = "127.0.0.1" +port = 19890 +openai.enabled = true +openai.base_path = "/v1" +``` + +## Protocol Plugin Contract + +The plugin should return declarative plans, not perform network IO. + +TypeScript shape: + +```ts +type OpenAIPluginRequest = { + plugin: string; + capability: "openai"; + op: + | "models" + | "chat.route" + | "chat.execute" + | "chat.fallback" + | "chat.map_frame" + | "responses.route" + | "responses.execute" + | string; + request_id: string; + trace_id: string; + payload: string; + metadata: Record; +}; + +type OpenAIModelsResult = + | { not_handled: true } + | { models: string[] } + | { data: Array<{ id: string }> }; + +type OpenAIChatRoutePlan = + | { not_handled: true } + | { + backend: string; + method?: "GET" | "POST" | "PUT" | "PATCH" | "DELETE" | "HEAD"; + path?: `/${string}`; + headers?: Record; + body?: string; + upstream_model?: string; + stream_mode?: "passthrough" | "mapped"; + response_codec?: "sse" | "json" | "ndjson" | "text"; + output_protocol?: "openai.chat.completion"; + mapper?: "builtin" | "plugin"; + }; +``` + +Example: + +```ts +export function openai(req) { + switch (req.op) { + case "models": + return { models: ["gpt-4o-mini"] }; + + case "chat.route": { + const payload = JSON.parse(req.payload); + return { + backend: "openai-main", + method: "POST", + path: "/chat/completions", + headers: {}, + body: payload.body, + stream_mode: "passthrough", + }; + } + + default: + return { not_handled: true }; + } +} +``` + +Example upstream plan: + +```ts +return { + backend: "ollama-local", + method: "POST", + path: "/api/chat", + headers: {}, + body: JSON.stringify({ model: "llama3.1", messages, stream: true }), + stream_mode: "mapped", + response_codec: "ndjson", + output_protocol: "openai.chat.completion", + mapper: "builtin", +}; +``` + +vhttpd executes the plan and exposes framed upstream data back to the plugin +only when mapping is required. + +Plugin frame mapper example: + +```ts +export function openai(req) { + if (req.op === "chat.map_frame") { + const payload = JSON.parse(req.payload); + const frame = JSON.parse(payload.frame); + return { + content: frame.delta ?? "", + tool_calls: frame.tool_calls ?? undefined, + finish_reason: frame.tool_calls ? "tool_calls" : undefined, + done: frame.finished === true, + }; + } + return { not_handled: true }; +} +``` + +Plugin fallback example: + +```ts +export function openai(req) { + if (req.op === "chat.fallback") { + const payload = JSON.parse(req.payload); + if (payload.failed_backend !== "primary" || payload.status_code < 500) { + return { not_handled: true }; + } + return { + backend: "backup", + method: "POST", + path: "/chat/completions", + body: payload.body, + stream_mode: "passthrough", + }; + } + return { not_handled: true }; +} +``` + +## Executor Backend Contract + +An executor backend is used when vhttpd should not directly call the upstream +HTTP API. This is the escape hatch for private SDKs, non-OpenAI-compatible +protocols, multi-step agent logic, or provider-specific network behavior. + +Configuration: + +```toml +[openai.backends.custom_executor] +kind = "executor" +executor = "custom_executor" + +[plugins.custom_executor] +kind = "vjsx" +entry = "examples/vjsx/openai-executor-app.mts" +runtime_profile = "node" +enable_network = true +``` + +The OpenAI routing plugin can select this backend: + +```ts +return { + backend: "custom_executor", + method: "POST", + path: "/executor/chat", + body: payload.body, + stream_mode: "executor", +}; +``` + +The executor app only needs to implement the `openai(req)` entry and handle +`req.op === "chat.execute"` for Chat Completions or +`req.op === "responses.execute"` for Responses. + +Request shape: + +```ts +type OpenAIExecutorRequest = { + plugin: string; + capability: "openai"; + op: "chat.execute" | "responses.execute"; + request_id: string; + trace_id: string; + payload: string; + metadata: { + model?: string; + backend?: string; + }; +}; + +type OpenAIExecutorPayload = { + method: string; + path: string; + model: string; + stream: boolean; + body: string; + backend: string; + request_id: string; + trace_id: string; + response_codec?: string; + output_protocol?: "openai.chat.completion" | "openai.response"; +}; +``` + +Minimal executor: + +```ts +export async function openai(req) { + if (req.op !== "chat.execute") { + return { not_handled: true }; + } + const payload = JSON.parse(req.payload); + const body = JSON.parse(payload.body); + + // Call a private SDK or non-OpenAI HTTP API here. + if (payload.stream) { + return { + frames: [ + { content: "hello", done: false }, + { usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, done: true }, + ], + }; + } + + return { + content: "hello", + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + done: true, + }; +} +``` + +Non-stream normalized result: + +```ts +return { + content: "hello", + usage: { + prompt_tokens: 10, + completion_tokens: 3, + total_tokens: 13, + }, + done: true, +}; +``` + +vhttpd turns that into an OpenAI `chat.completion` response. + +Non-stream full OpenAI body: + +```ts +return { + body: JSON.stringify({ + id: "chatcmpl-custom", + object: "chat.completion", + choices: [ + { + index: 0, + message: { role: "assistant", content: "hello" }, + finish_reason: "stop", + }, + ], + }), +}; +``` + +Stream result: + +```ts +return { + frames: [ + { content: "hello ", done: false }, + { content: "world", done: false }, + { + usage: { + prompt_tokens: 10, + completion_tokens: 2, + total_tokens: 12, + }, + done: true, + }, + ], +}; +``` + +vhttpd writes these frames as OpenAI SSE and appends `data: [DONE]`. + +Tool call frame: + +```ts +return { + frames: [ + { + tool_calls: [ + { + index: 0, + id: "call_1", + type: "function", + function: { + name: "search", + arguments: "{\"q\":\"vhttpd\"}", + }, + }, + ], + finish_reason: "tool_calls", + done: true, + }, + ], +}; +``` + +Error result: + +```ts +return { + error: { + message: "custom provider failed", + }, +}; +``` + +Current executor behavior: + +- Executor apps may perform network access when their plugin config enables it. +- vhttpd still owns the client-facing OpenAI HTTP/SSE response. +- Non-stream executor results are normalized into OpenAI JSON unless `body` is + returned. +- Stream executor results can return either buffered `frames: [...]` or an async + iterable. Async iterable results are pulled by vhttpd through vjsx + `RuntimeSession.stream_value(...)`, so each yielded frame is written as SSE + before the next frame is requested. + +Current plan validation: + +- `backend` is required and must name a configured backend. +- `method` defaults to `POST` and must be one of `GET`, `POST`, `PUT`, + `PATCH`, `DELETE`, `HEAD`. +- `path` defaults to `/chat/completions`, must start with `/`, and must not + contain newlines. +- `stream_mode` defaults to `passthrough`; `mapped` is supported for OpenAI + chat completion mapping. +- `mapped` currently supports `response_codec = "ndjson"` for streaming and + `response_codec = "ndjson" | "json"` for non-stream aggregation. +- `output_protocol` defaults to `openai.chat.completion`. +- `mapper` defaults to `builtin`; `plugin` calls `openai(req)` with + `req.op = "chat.map_frame"` per decoded upstream frame. +- hop-by-hop headers such as `Connection`, `Content-Length`, + `Transfer-Encoding`, `Host`, and `Upgrade` are ignored. + +## Stream Modes + +### passthrough + +For OpenAI-compatible upstreams. vhttpd forwards the request upstream and writes +the upstream response back to the client with minimal intervention. + +Useful for: + +- OpenAI official API. +- OpenAI-compatible providers. +- Other aggregation gateways. + +vjsx is used for route/build-start/error hooks, not per-token mapping. + +### mapped + +For non-OpenAI upstreams such as Ollama NDJSON. vhttpd decodes the upstream +framing and calls vjsx or a built-in mapper to emit OpenAI-compatible chunks. + +Useful for: + +- Ollama `/api/chat` NDJSON. +- custom JSONL/NDJSON model servers. +- providers with incompatible stream shape. + +### vhttpd_sse + +For executor backends where PHP/vjsx returns normalized events or frames, but +vhttpd still owns the client-facing SSE writer. + +Useful for: + +- inproc vjsx agent executors. +- PHP application executors. +- local business logic pretending to be an OpenAI model. + +## Initial MVP + +1. Add OpenAI config structs and admin snapshot fields. +2. Add `ProviderRouteKind.openai`. +3. Add `/v1/models` and `/v1/chat/completions` dispatch behind `[openai]`. +4. Implement `openai_http` backend with non-stream and SSE passthrough. +5. Add vjsx hook for route/buildUpstream. +6. Add built-in OpenAI SSE writer: + - `data: {...}\n\n` + - `data: [DONE]\n\n` +7. Add fixture tests for OpenAI-compatible mock upstream. + +## Current Implementation Slice + +The first slice keeps the network path in vhttpd and implements the +OpenAI-compatible passthrough path directly: + +- `[openai]` config, named `[openai.backends.*]`, and named + `[openai.routes.*]`. +- `[plugins.*]` config for capability plugins that do not replace the site + executor. +- `/v1/models` generated from configured route models, or from the + OpenAI plugin `models` operation. +- `/v1/chat/completions` routed by request `model`, or planned by the OpenAI + plugin `chat.route` operation. +- `openai_http` upstream backend with configured `base_url` and API key from + `api_key` or `api_key_env`. +- non-stream request passthrough, with optional model rewrite when + `upstream_model` is configured. +- stream request passthrough where vhttpd takes over the client connection and + forwards upstream SSE bytes. +- mapped Ollama-style NDJSON streams where vhttpd decodes upstream JSON lines + and emits OpenAI chat completion SSE chunks. +- mapped non-stream `ndjson`/`json` responses aggregated into an OpenAI chat + completion response. +- executor backends using a vjsx app via `chat.execute`, for providers that + need custom SDK/network logic outside vhttpd's HTTP/mapped fetch path. +- `/v1/responses` create endpoint with non-stream and stream passthrough. The + built-in route resolver reuses `[openai.routes.*]` and sends upstream traffic + to `/responses`. +- Responses stateful passthrough for paths under `/v1/responses/*`, including + retrieve, cancel, input item listing, and future upstream-defined subroutes. + vhttpd preserves the query string and still applies backend auth, trace + headers, and error normalization. +- Responses executor backends using `responses.execute`. Non-stream executors + may return a Response object or `{ body }`; stream executors may return an + async iterable of typed Responses events. +- In-memory Responses registry for executor-owned responses. vhttpd stores + completed executor Responses in a TTL-backed `MemoryStateStore` and serves + `GET /v1/responses/{id}` locally when the id is known; unknown ids continue + to upstream passthrough. +- plugin frame mapper hook for provider-specific stream frames that the + built-in mapper does not understand. +- upstream non-2xx responses normalized into OpenAI error envelopes. +- streaming upstream non-2xx responses normalized before SSE headers are + written. +- plugin frame mapper errors normalized into OpenAI-style SSE error frames. +- `chat.fallback` plugin hook: vhttpd retries once with a fallback plan when + upstream fetch fails or returns non-2xx. +- stream-safe fallback for passthrough streams: fallback is allowed only before + client SSE headers are written; after streaming begins, vhttpd sends an + OpenAI-style SSE error instead of switching backend. +- stream-safe fallback for mapped NDJSON streams using the same pre-SSE-header + boundary. +- tool call chunk normalization for mapped streams: built-in and plugin mappers + can emit OpenAI-compatible `delta.tool_calls` and `finish_reason = + "tool_calls"`. +- non-stream mapped NDJSON tool calls are aggregated into final + `message.tool_calls`, including incremental `function.arguments` chunks. +- non-stream mapped usage normalization from OpenAI-style `usage` or + Ollama-style `prompt_eval_count`/`eval_count` into OpenAI + `usage.prompt_tokens`, `usage.completion_tokens`, and `usage.total_tokens`. +- stream mapped usage emits a final OpenAI-compatible chunk with `choices: []` + and `usage` before `data: [DONE]` when upstream usage is available. +- optional vjsx OpenAI plugin hook through a single `openai(req)` entry. vhttpd + passes `req.op` values such as `models`, `chat.route`, and + `responses.route`; `{ not_handled: true }` falls back to built-in config + behavior. + +The plugin hook is intentionally scoped: it can route/build the upstream plan, +but it does not own sockets, fetch, or client streaming. + +## Second Phase + +1. Add provider-specific error code taxonomy. +2. Add retry/fallback policy limits and observability fields. +3. Add provider-specific Responses routing examples beyond passthrough and + executor. + +## Later Phases + +- durable persistence for Responses objects when executor state must survive + process restart or be shared across vhttpd instances. +- embeddings +- tool call chunk normalization +- usage aggregation +- tenant-aware routing +- weighted routing and health checks +- per-key quota hooks +- request/response audit events +- admin UI/runtime snapshots + +## Testing Strategy + +Default tests should avoid npm and network dependencies. + +Use local mock upstreams in V tests for: + +- OpenAI-compatible JSON response. +- OpenAI-compatible SSE response. +- upstream disconnect. +- malformed SSE. +- timeout/cancellation. +- route miss. + +Use optional vjsx/npm integration fixtures for: + +- `openai` SDK non-stream and stream. +- AI SDK `generateText`. +- AI SDK `streamText`. + +The optional fixture should not run in the default `v test` suite. + +## Key Design Rule + +Do not let protocol plugins own the socket. + +The plugin can decide, normalize, and map. vhttpd should execute, stream, cancel, +observe, and write. diff --git a/examples/config/openai-gateway-dashscope-coding.toml b/examples/config/openai-gateway-dashscope-coding.toml new file mode 100644 index 0000000..95e6652 --- /dev/null +++ b/examples/config/openai-gateway-dashscope-coding.toml @@ -0,0 +1,45 @@ +[server] +host = "127.0.0.1" +port = 18082 + +[paths] +root = "../.." + +[openai] +enabled = true +base_path = "/v1" +default_backend = "bailian_coding" +plugin = "openai_gateway" + +[plugins.openai_gateway] +kind = "vjsx" +entry = "${paths.root}/examples/vjsx/openai-dashscope-coding-plugin.mts" +runtime_profile = "node" +thread_count = 1 +enable_network = false + +[openai.backends.bailian_coding] +kind = "openai_http" +base_url = "https://coding.dashscope.aliyuncs.com/v1" +api_key_env = "BAILIAN_CODING_API_KEY" +timeout_ms = 60000 + +[openai.backends.ollama] +kind = "http" +base_url = "http://127.0.0.1:11434" +timeout_ms = 60000 + +[openai.routes.bailian_coding_models] +models = [ + "qwen3.6-plus", + "qwen3.5-plus", + "qwen3-coder-plus", + "glm-5", + "kimi-k2.5", + "MiniMax-M2.5", +] +backend = "bailian_coding" + +[openai.routes.minimax_m2] +models = ["minimax-m2:cloud"] +backend = "ollama" diff --git a/examples/config/openai-gateway.toml b/examples/config/openai-gateway.toml new file mode 100644 index 0000000..35ee5ef --- /dev/null +++ b/examples/config/openai-gateway.toml @@ -0,0 +1,64 @@ +[server] +host = "127.0.0.1" +port = 18081 + +[openai] +enabled = true +base_path = "/v1" +default_backend = "openai" +plugin = "openai_gateway" + +[plugins.openai_gateway] +kind = "vjsx" +entry = "${paths.root}/examples/vjsx/openai-gateway-plugin.mts" +runtime_profile = "node" +thread_count = 1 +enable_network = false + +[plugins.custom_executor] +kind = "vjsx" +entry = "${paths.root}/examples/vjsx/openai-executor-app.mts" +runtime_profile = "node" +thread_count = 2 +enable_network = true + +[openai.backends.openai] +kind = "openai_http" +base_url = "https://api.openai.com/v1" +api_key_env = "OPENAI_API_KEY" +timeout_ms = 60000 + +[openai.backends.ollama] +kind = "http" +base_url = "http://127.0.0.1:11434" +timeout_ms = 60000 + +[openai.backends.custom] +kind = "http" +base_url = "http://127.0.0.1:19090" +timeout_ms = 60000 + +[openai.backends.custom_executor] +kind = "executor" +executor = "custom_executor" +timeout_ms = 60000 + +[openai.routes.gpt_4o_mini] +models = ["gpt-4o-mini"] +backend = "openai" + +[openai.routes.gpt_4_1_mini] +models = ["gpt-4.1-mini"] +backend = "openai" + +[openai.routes.llama3_1] +models = ["llama3.1"] +backend = "ollama" + +[openai.routes.custom_agent] +models = ["custom-agent"] +backend = "custom" + +[openai.routes.executor_agent] +models = ["executor-agent"] +backend = "custom_executor" diff --git a/examples/vjsx/openai-dashscope-coding-plugin.mts b/examples/vjsx/openai-dashscope-coding-plugin.mts new file mode 100644 index 0000000..e2bff9e --- /dev/null +++ b/examples/vjsx/openai-dashscope-coding-plugin.mts @@ -0,0 +1,74 @@ +type PluginRequest = { + op: string; + payload: string; +}; + +type ChatPayload = { + model: string; + stream: boolean; + body: string; +}; + +function payload(req: PluginRequest): T { + return JSON.parse(req.payload || "{}") as T; +} + +function chatBody(input: ChatPayload): Record { + return JSON.parse(input.body || "{}"); +} + +function routeChat(req: PluginRequest) { + const input = payload(req); + const body = chatBody(input); + const model = input.model || body.model || ""; + + if (model === "llama3.1" || model === "minimax-m2:cloud") { + return { + backend: "ollama", + method: "POST", + path: "/api/chat", + body: JSON.stringify({ + model: model === "minimax-m2:cloud" ? "minimax_m2" : "llama3.1", + messages: body.messages || [], + tools: body.tools, + stream: input.stream, + }), + stream_mode: "mapped", + response_codec: "ndjson", + output_protocol: "openai.chat.completion", + mapper: "builtin", + }; + } + + body.model = model || body.model; + + return { + backend: "bailian_coding", + method: "POST", + path: "/chat/completions", + body: JSON.stringify(body), + stream_mode: "passthrough", + }; +} + +export function openai(req: PluginRequest) { + switch (req.op) { + case "models": + return { + models: [ + "qwen3.6-plus", + "qwen3.5-plus", + "qwen3-coder-plus", + "glm-5", + "kimi-k2.5", + "MiniMax-M2.5", + "llama3.1", + "minimax-m2:cloud", + ], + }; + case "chat.route": + return routeChat(req); + default: + return { not_handled: true }; + } +} diff --git a/examples/vjsx/openai-executor-app.mts b/examples/vjsx/openai-executor-app.mts new file mode 100644 index 0000000..dffa75e --- /dev/null +++ b/examples/vjsx/openai-executor-app.mts @@ -0,0 +1,101 @@ +type PluginRequest = { + op: string; + payload: string; +}; + +function payload(req: PluginRequest): Record { + return JSON.parse(req.payload || "{}"); +} + +async function* streamFrames(prompt: string) { + yield { content: "executor: ", done: false }; + yield { content: prompt || "ok", done: false }; + yield { + usage: { + prompt_tokens: Math.max(1, prompt.length), + completion_tokens: 2, + total_tokens: Math.max(1, prompt.length) + 2, + }, + done: true, + }; +} + +async function* responseEvents(prompt: string) { + yield { + type: "response.created", + response: { + id: "resp_vhttpd_executor", + object: "response", + status: "in_progress", + }, + sequence_number: 1, + }; + yield { + type: "response.output_text.delta", + delta: `executor: ${prompt || "ok"}`, + output_index: 0, + content_index: 0, + sequence_number: 2, + }; + yield { + type: "response.completed", + response: { + id: "resp_vhttpd_executor", + object: "response", + status: "completed", + }, + sequence_number: 3, + }; +} + +export async function openai(req: PluginRequest) { + if (req.op !== "chat.execute" && req.op !== "responses.execute") { + return { not_handled: true }; + } + + const p = payload(req); + const body = JSON.parse(p.body || "{}"); + const prompt = (body.messages || []).map((m: any) => m.content).join("\n"); + + if (req.op === "responses.execute") { + if (p.stream) { + return responseEvents(prompt); + } + + return { + id: "resp_vhttpd_executor", + object: "response", + status: "completed", + model: p.model, + output: [{ + id: "msg_vhttpd_executor", + type: "message", + status: "completed", + role: "assistant", + content: [{ + type: "output_text", + text: `executor: ${prompt || "ok"}`, + annotations: [], + }], + }], + }; + } + + // This is where a real executor app can call a private SDK or a + // non-OpenAI-compatible HTTP service. The result returned to vhttpd is + // normalized frames/data; vhttpd still owns the client-facing OpenAI response. + + if (p.stream) { + return streamFrames(prompt); + } + + return { + content: `executor: ${prompt || "ok"}`, + usage: { + prompt_tokens: Math.max(1, prompt.length), + completion_tokens: 2, + total_tokens: Math.max(1, prompt.length) + 2, + }, + done: true, + }; +} diff --git a/examples/vjsx/openai-gateway-plugin.mts b/examples/vjsx/openai-gateway-plugin.mts new file mode 100644 index 0000000..d54f8eb --- /dev/null +++ b/examples/vjsx/openai-gateway-plugin.mts @@ -0,0 +1,212 @@ +type PluginRequest = { + op: string; + payload: string; + request_id?: string; + trace_id?: string; + metadata?: Record; +}; + +type ChatPayload = { + model: string; + stream: boolean; + body: string; +}; + +type FallbackPayload = { + body: string; + failed_backend: string; + status_code: number; + error_code: string; + error_message: string; +}; + +type MapFramePayload = { + frame: string; +}; + +const publicModels = [ + "gpt-4o-mini", + "llama3.1", + "custom-agent", + "executor-agent", +]; + +function jsonPayload(req: PluginRequest): T { + return JSON.parse(req.payload || "{}") as T; +} + +function chatBody(payload: ChatPayload): Record { + return JSON.parse(payload.body || "{}"); +} + +function openaiPassthrough(payload: ChatPayload) { + const body = chatBody(payload); + return { + backend: "openai", + method: "POST", + path: "/chat/completions", + body: JSON.stringify(body), + stream_mode: "passthrough", + }; +} + +function ollamaMapped(payload: ChatPayload) { + const body = chatBody(payload); + return { + backend: "ollama", + method: "POST", + path: "/api/chat", + body: JSON.stringify({ + model: "llama3.1", + messages: body.messages || [], + tools: body.tools, + stream: payload.stream, + }), + stream_mode: "mapped", + response_codec: "ndjson", + output_protocol: "openai.chat.completion", + mapper: "builtin", + }; +} + +function customMapped(payload: ChatPayload) { + const body = chatBody(payload); + return { + backend: "custom", + method: "POST", + path: "/chat", + body: JSON.stringify({ + prompt: (body.messages || []).map((m: any) => m.content).join("\n"), + stream: payload.stream, + }), + stream_mode: "mapped", + response_codec: "ndjson", + output_protocol: "openai.chat.completion", + mapper: "plugin", + }; +} + +function routeChat(req: PluginRequest) { + const payload = jsonPayload(req); + const model = payload.model || chatBody(payload).model || ""; + + if (model === "llama3.1") { + return ollamaMapped(payload); + } + + if (model === "custom-agent") { + return customMapped(payload); + } + + if (model === "executor-agent") { + return { + backend: "custom_executor", + method: "POST", + path: "/executor/chat", + body: payload.body, + stream_mode: "executor", + }; + } + + return openaiPassthrough(payload); +} + +function routeResponses(req: PluginRequest) { + const payload = jsonPayload(req); + const body = chatBody(payload); + const model = payload.model || body.model || ""; + + if (model === "executor-agent") { + return { + backend: "custom_executor", + method: "POST", + path: "/executor/responses", + body: payload.body, + stream_mode: "executor", + output_protocol: "openai.response", + }; + } + + return { + backend: "openai", + method: "POST", + path: "/responses", + body: JSON.stringify(body), + stream_mode: "passthrough", + output_protocol: "openai.response", + }; +} + +function mapCustomFrame(req: PluginRequest) { + const payload = jsonPayload(req); + const frame = JSON.parse(payload.frame || "{}"); + + if (frame.error) { + return { error: { message: String(frame.error) } }; + } + + if (frame.tool_call) { + return { + tool_calls: [{ + index: frame.tool_call.index || 0, + id: frame.tool_call.id, + type: "function", + function: { + name: frame.tool_call.name, + arguments: frame.tool_call.arguments || "", + }, + }], + finish_reason: "tool_calls", + done: frame.done === true, + }; + } + + return { + content: frame.delta || frame.text || "", + usage: frame.usage, + done: frame.done === true, + }; +} + +function fallback(req: PluginRequest) { + const payload = jsonPayload(req); + + if (payload.failed_backend === "openai" && payload.status_code >= 500) { + const original = JSON.parse(payload.body || "{}"); + original.model = "llama3.1"; + original.stream = original.stream === true; + return { + backend: "ollama", + method: "POST", + path: "/api/chat", + body: JSON.stringify({ + model: "llama3.1", + messages: original.messages || [], + stream: original.stream, + }), + stream_mode: "mapped", + response_codec: "ndjson", + output_protocol: "openai.chat.completion", + mapper: "builtin", + }; + } + + return { not_handled: true }; +} + +export function openai(req: PluginRequest) { + switch (req.op) { + case "models": + return { models: publicModels }; + case "chat.route": + return routeChat(req); + case "responses.route": + return routeResponses(req); + case "chat.map_frame": + return mapCustomFrame(req); + case "chat.fallback": + return fallback(req); + default: + return { not_handled: true }; + } +} From a3e05eacc65889b172c68f4e25429116e846ee0d Mon Sep 17 00:00:00 2001 From: weigang Date: Sun, 10 May 2026 17:36:30 +0800 Subject: [PATCH 05/10] state-store: avoid generic map value wrappers --- src/state_store.v | 76 ++++++++++++++++++++++++++---------------- src/state_store_test.v | 20 +++++++++++ 2 files changed, 67 insertions(+), 29 deletions(-) diff --git a/src/state_store.v b/src/state_store.v index 0f794ba..83fadf3 100644 --- a/src/state_store.v +++ b/src/state_store.v @@ -1,7 +1,9 @@ module main +import json import sync import time +import x.json2 pub interface StateStore[T] { mut: @@ -17,23 +19,23 @@ mut: clear() } -struct StoredValue[T] { +struct StoredValue { mut: - value T - created_at_ms i64 - updated_at_ms i64 - expires_at_ms i64 + value json2.Any + created_at_ms i64 + updated_at_ms i64 + expires_at_ms i64 } pub struct MemoryStateStore[T] { mut: mu sync.Mutex - data map[string]StoredValue[T] + data map[string]StoredValue } pub fn new_memory_state_store[T]() MemoryStateStore[T] { return MemoryStateStore[T]{ - data: map[string]StoredValue[T]{} + data: map[string]StoredValue{} } } @@ -48,10 +50,24 @@ fn state_store_expires_at_ms(ttl time.Duration) i64 { return state_store_now_ms() + ttl.milliseconds() } -fn state_store_is_expired[T](record StoredValue[T], now_ms i64) bool { +fn state_store_is_expired(record StoredValue, now_ms i64) bool { return record.expires_at_ms > 0 && record.expires_at_ms <= now_ms } +fn state_store_encode_value[T](val T) !json2.Any { + $if T is string { + return json2.Any(val) + } $else $if T is $struct { + return json2.Any(json2.map_from[T](val)) + } $else { + return json2.decode[json2.Any](json.encode(val))! + } +} + +fn state_store_decode_value[T](val json2.Any) !T { + return json2.decode[T](val.json_str())! +} + pub fn (mut store MemoryStateStore[T]) get(key string) !T { store.mu.@lock() defer { @@ -59,11 +75,11 @@ pub fn (mut store MemoryStateStore[T]) get(key string) !T { } if record := store.data[key] { now_ms := state_store_now_ms() - if state_store_is_expired[T](record, now_ms) { + if state_store_is_expired(record, now_ms) { store.data.delete(key) return error('state_store_key_expired:${key}') } - return record.value + return state_store_decode_value[T](record.value)! } return error('state_store_key_missing:${key}') } @@ -79,15 +95,15 @@ pub fn (mut store MemoryStateStore[T]) set_with_ttl(key string, val T, ttl time. store.mu.unlock() } if existing := store.data[key] { - store.data[key] = StoredValue[T]{ - value: val + store.data[key] = StoredValue{ + value: state_store_encode_value[T](val)! created_at_ms: existing.created_at_ms updated_at_ms: now_ms expires_at_ms: state_store_expires_at_ms(ttl) } } else { - store.data[key] = StoredValue[T]{ - value: val + store.data[key] = StoredValue{ + value: state_store_encode_value[T](val)! created_at_ms: now_ms updated_at_ms: now_ms expires_at_ms: state_store_expires_at_ms(ttl) @@ -110,7 +126,7 @@ pub fn (mut store MemoryStateStore[T]) exists(key string) bool { } if record := store.data[key] { now_ms := state_store_now_ms() - if state_store_is_expired[T](record, now_ms) { + if state_store_is_expired(record, now_ms) { store.data.delete(key) return false } @@ -128,7 +144,7 @@ pub fn (mut store MemoryStateStore[T]) keys() []string { mut keys := []string{} mut expired := []string{} for key, record in store.data { - if state_store_is_expired[T](record, now_ms) { + if state_store_is_expired(record, now_ms) { expired << key continue } @@ -150,11 +166,11 @@ pub fn (mut store MemoryStateStore[T]) list() []T { mut values := []T{} mut expired := []string{} for key, record in store.data { - if state_store_is_expired[T](record, now_ms) { + if state_store_is_expired(record, now_ms) { expired << key continue } - values << record.value + values << state_store_decode_value[T](record.value) or { continue } } for key in expired { store.data.delete(key) @@ -169,11 +185,13 @@ pub fn (mut store MemoryStateStore[T]) patch(key string, updater fn (mut T) !) ! } if mut record := store.data[key] { now_ms := state_store_now_ms() - if state_store_is_expired[T](record, now_ms) { + if state_store_is_expired(record, now_ms) { store.data.delete(key) return error('state_store_key_expired:${key}') } - updater(mut record.value)! + mut value := state_store_decode_value[T](record.value)! + updater(mut value)! + record.value = state_store_encode_value[T](value)! record.updated_at_ms = now_ms store.data[key] = record return @@ -189,7 +207,7 @@ pub fn (mut store MemoryStateStore[T]) prune_expired() int { now_ms := state_store_now_ms() mut expired := []string{} for key, record in store.data { - if state_store_is_expired[T](record, now_ms) { + if state_store_is_expired(record, now_ms) { expired << key } } @@ -214,7 +232,7 @@ pub fn (mut store MemoryStateStore[string]) compare_and_swap_set_with_ttl(key st store.mu.unlock() } if mut existing := store.data[key] { - if state_store_is_expired[string](existing, now_ms) { + if state_store_is_expired(existing, now_ms) { store.data.delete(key) if expected_found { return false @@ -223,11 +241,11 @@ pub fn (mut store MemoryStateStore[string]) compare_and_swap_set_with_ttl(key st if !expected_found { return false } - if existing.value != expected_value { + if state_store_decode_value[string](existing.value)! != expected_value { return false } - store.data[key] = StoredValue[string]{ - value: next_value + store.data[key] = StoredValue{ + value: state_store_encode_value[string](next_value)! created_at_ms: existing.created_at_ms updated_at_ms: now_ms expires_at_ms: state_store_expires_at_ms(ttl) @@ -238,8 +256,8 @@ pub fn (mut store MemoryStateStore[string]) compare_and_swap_set_with_ttl(key st if expected_found { return false } - store.data[key] = StoredValue[string]{ - value: next_value + store.data[key] = StoredValue{ + value: state_store_encode_value[string](next_value)! created_at_ms: now_ms updated_at_ms: now_ms expires_at_ms: state_store_expires_at_ms(ttl) @@ -254,14 +272,14 @@ pub fn (mut store MemoryStateStore[string]) compare_and_swap_delete(key string, store.mu.unlock() } if existing := store.data[key] { - if state_store_is_expired[string](existing, now_ms) { + if state_store_is_expired(existing, now_ms) { store.data.delete(key) return !expected_found } if !expected_found { return false } - if existing.value != expected_value { + if state_store_decode_value[string](existing.value)! != expected_value { return false } store.data.delete(key) diff --git a/src/state_store_test.v b/src/state_store_test.v index 96ba2fb..7a08569 100644 --- a/src/state_store_test.v +++ b/src/state_store_test.v @@ -2,6 +2,12 @@ module main import time +struct MemoryStateStoreTestRecord { + id string + status string + count int +} + fn test_memory_state_store_set_get_and_keys() { mut store := new_memory_state_store[string]() store.set('alpha', 'a') or { panic(err) } @@ -14,6 +20,20 @@ fn test_memory_state_store_set_get_and_keys() { assert store.list().len == 2 } +fn test_memory_state_store_roundtrips_struct_values() { + mut store := new_memory_state_store[MemoryStateStoreTestRecord]() + store.set('resp', MemoryStateStoreTestRecord{ + id: 'resp_1' + status: 'completed' + count: 2 + }) or { panic(err) } + + record := store.get('resp') or { panic(err) } + assert record.id == 'resp_1' + assert record.status == 'completed' + assert record.count == 2 +} + fn test_memory_state_store_ttl_expiry_and_prune() { mut store := new_memory_state_store[string]() store.set_with_ttl('short', 'x', 20 * time.millisecond) or { panic(err) } From 9b6e00d7396246186bdc46bd6c938c9ce5c47325 Mon Sep 17 00:00:00 2001 From: weigang Date: Sun, 10 May 2026 17:55:01 +0800 Subject: [PATCH 06/10] docs: clarify embedded vjsx runtime assets --- README.md | 9 +++++---- scripts/doctor.sh | 4 ++-- scripts/runtime_doctor.sh | 6 +++--- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index f48fb28..8be840e 100644 --- a/README.md +++ b/README.md @@ -212,7 +212,7 @@ Practical summary: `vhttpd` now splits local dependencies into install profiles so you do not need to guess the full system package list up front. - `core`: base build dependencies for `vhttpd` -- `vjsx`: embedded runtime support, including the required QuickJS archive placement under `~/.vmodules/vjsx/libs/` +- `vjsx`: embedded runtime build support, including the required QuickJS archive placement under `~/.vmodules/vjsx/libs/` - `db`: alias for the default DB-capable dependency surface - `full`: everything above @@ -242,15 +242,16 @@ What these targets do: - `make deps-core`: installs `openssl`, `Boehm GC`, `pkg-config`, and basic build tooling - `make deps-core` also installs SQLite/MySQL/PostgreSQL client development packages because default `vhttpd` builds now include DB support -- `make deps-vjsx`: ensures `~/.vmodules/vjsx` exists and, on Linux, builds QuickJS and places it at `~/.vmodules/vjsx/libs/qjs_linux_x64.a` +- `make deps-vjsx`: ensures the local `vjsx` module checkout exists for builds and, on Linux, builds QuickJS and places it at `~/.vmodules/vjsx/libs/qjs_linux_x64.a` - `make deps-db`: alias for the same default DB-capable build dependency set - `make doctor`: checks the current machine for the required commands, `pkg-config` entries, and `vjsx` QuickJS archive placement -Important `vjsx` note: +Important `vjsx` build note: - On Linux, `vjsx` does not just need "QuickJS installed somewhere". - The archive must exist at [~/.vmodules/vjsx/libs/qjs_linux_x64.a](/Users/guweigang/.vmodules/vjsx/libs/qjs_linux_x64.a). - `make deps-vjsx` is the supported way to prepare that path locally. +- This is a build-time module layout. Packaged `vhttpd` binaries do not need a local `~/.vmodules/vjsx` checkout just to load embedded JavaScript or TypeScript runtime assets. ## Build @@ -344,7 +345,7 @@ The binary is rewritten during packaging so it prefers these bundled copies: That means end users no longer need to preinstall MySQL/PostgreSQL/OpenSSL/Boehm runtime packages just to launch the release binary. -`vjsx` runtime JS assets are embedded into the `vjsx` binary integration, so packaged `vhttpd` releases no longer need to ship `runtime/vjsx` JS files. `VJSX_ASSET_ROOT` is still supported as an explicit development override when you need to test a local replacement for built-in `vjsx` runtime JS. +`vjsx` runtime assets, including the TypeScript compiler runtime under `thirdparty/typescript/lib`, are embedded into the `vjsx` binary integration. Packaged `vhttpd` releases no longer need to ship `runtime/vjsx` JS files or a local `~/.vmodules/vjsx/thirdparty/typescript` tree. `VJSX_ASSET_ROOT` is still supported as an explicit development override when you need to test a local replacement for built-in `vjsx` runtime assets. After installation, users can verify the machine with: diff --git a/scripts/doctor.sh b/scripts/doctor.sh index 631e4bf..0d6e056 100755 --- a/scripts/doctor.sh +++ b/scripts/doctor.sh @@ -68,9 +68,9 @@ check_pkg openssl check_pkg bdw-gc if [ -e "$vjsx_dir" ]; then - ok "vjsx module path ${vjsx_dir}" + ok "vjsx build module path ${vjsx_dir}" else - warn "vjsx module path ${vjsx_dir} is absent; run ./scripts/install_deps.sh vjsx if you need embedded runtime support" + warn "vjsx build module path ${vjsx_dir} is absent; run ./scripts/install_deps.sh vjsx before building embedded runtime support" fi if [ -e "$vjsx_dir" ]; then diff --git a/scripts/runtime_doctor.sh b/scripts/runtime_doctor.sh index 22fba10..3978f1b 100755 --- a/scripts/runtime_doctor.sh +++ b/scripts/runtime_doctor.sh @@ -109,13 +109,13 @@ fi if [ -n "$vjsx_asset_root_override" ]; then ok "vjsx runtime asset override ${vjsx_asset_root_override}" else - ok "vjsx runtime assets are embedded; VJSX_ASSET_ROOT is unset" + ok "vjsx runtime assets, including TypeScript runtime assets, are embedded; VJSX_ASSET_ROOT is unset" fi if [ -L "${HOME}/.vmodules/vjsx" ]; then - warn "legacy vjsx compatibility symlink ${HOME}/.vmodules/vjsx is present but no longer required" + warn "legacy vjsx compatibility symlink ${HOME}/.vmodules/vjsx is present but no longer required at runtime" else - ok "legacy vjsx compatibility symlink ${HOME}/.vmodules/vjsx is absent" + ok "legacy vjsx compatibility symlink ${HOME}/.vmodules/vjsx is absent; runtime assets are embedded" fi if [ "$status" -ne 0 ]; then From c263a2193f696c50ff2db8677724c744c92a6058 Mon Sep 17 00:00:00 2001 From: weigang Date: Sun, 10 May 2026 17:55:07 +0800 Subject: [PATCH 07/10] test: isolate vjsx TypeScript asset root --- src/inproc_vjsx_executor_test.v | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/inproc_vjsx_executor_test.v b/src/inproc_vjsx_executor_test.v index f21ef53..a06e755 100644 --- a/src/inproc_vjsx_executor_test.v +++ b/src/inproc_vjsx_executor_test.v @@ -862,13 +862,22 @@ fn test_inproc_vjsx_executor_dispatch_http_supports_redirect_helper() { fn test_inproc_vjsx_executor_dispatch_http_supports_typescript_module_entry() { temp_dir := os.join_path(os.temp_dir(), 'vhttpd_vjsx_executor_ts_test') + asset_root := os.join_path(temp_dir, 'empty-asset-root') os.mkdir_all(temp_dir) or { panic(err) } + os.mkdir_all(asset_root) or { panic(err) } app_file := os.join_path(temp_dir, 'handler.mts') os.write_file(app_file, 'function handler(ctx) { return { status: 206, headers: { "content-type": "application/json; charset=utf-8" }, body: JSON.stringify({ ok: true, message: "hello " + ctx.queryParam("name", "guest"), laneId: ctx.runtime.laneId }) }; }\nglobalThis.__vhttpd_handle = handler;\nexport default handler;\n') or { panic(err) } + old_asset_root := os.getenv('VJSX_ASSET_ROOT') + os.setenv('VJSX_ASSET_ROOT', asset_root, true) defer { - os.rm(app_file) or {} + if old_asset_root == '' { + os.unsetenv('VJSX_ASSET_ROOT') + } else { + os.setenv('VJSX_ASSET_ROOT', old_asset_root, true) + } + os.rmdir_all(temp_dir) or {} } mut executor := new_inproc_vjsx_executor(VjsxRuntimeFacadeConfig{ thread_count: 1 From c8fe8556455e3eb72ac2ad8eb68db3b0901eb69e Mon Sep 17 00:00:00 2001 From: weigang Date: Sun, 10 May 2026 18:02:31 +0800 Subject: [PATCH 08/10] ci: build V from latest master --- .github/workflows/vhttpd-binaries.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/vhttpd-binaries.yml b/.github/workflows/vhttpd-binaries.yml index e15898f..3d21e8a 100644 --- a/.github/workflows/vhttpd-binaries.yml +++ b/.github/workflows/vhttpd-binaries.yml @@ -29,7 +29,6 @@ jobs: env: VHTTPD_VJSX_ROOT: /usr/local/share/vhttpd/vjsx V_REPO: https://github.com/guweigang/vlang - V_REF: 06438457b7fed78397588e7c0797b9e0d7483257 strategy: fail-fast: false matrix: @@ -96,7 +95,6 @@ jobs: unzip -q /tmp/v-bootstrap.zip -d /tmp/v-bootstrap rm -rf /tmp/v git clone "$V_REPO" /tmp/v - git -C /tmp/v checkout "$V_REF" chmod +x /tmp/v-bootstrap/v/v ( cd /tmp/v From dd0f9ff8e37eb72718ea042b59f181f79b018f71 Mon Sep 17 00:00:00 2001 From: weigang Date: Sun, 10 May 2026 20:32:45 +0800 Subject: [PATCH 09/10] build: use cc for V compilation --- .github/workflows/vhttpd-binaries.yml | 4 ++-- Makefile | 17 +++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/.github/workflows/vhttpd-binaries.yml b/.github/workflows/vhttpd-binaries.yml index 3d21e8a..70f459f 100644 --- a/.github/workflows/vhttpd-binaries.yml +++ b/.github/workflows/vhttpd-binaries.yml @@ -98,8 +98,8 @@ jobs: chmod +x /tmp/v-bootstrap/v/v ( cd /tmp/v - /tmp/v-bootstrap/v/v -o v cmd/v - ./v -o v cmd/v + /tmp/v-bootstrap/v/v -cc cc -o v cmd/v + ./v -cc cc -o v cmd/v ) chmod +x /tmp/v/v echo "/tmp/v" >> "$GITHUB_PATH" diff --git a/Makefile b/Makefile index bb619dc..12efd00 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,7 @@ ROOT := $(CURDIR) SRC_DIR := $(ROOT)/src VHTTPD_BIN ?= $(ROOT)/vhttpd +V_CC ?= cc VPHP_V_GC ?= auto VPHP_V_GC_STRIPPED := $(strip $(VPHP_V_GC)) RESOLVED_VPHP_V_GC := $(shell if [ -n "$(VPHP_V_GC_STRIPPED)" ] && [ "$(VPHP_V_GC_STRIPPED)" != "auto" ]; then printf "%s" "$(VPHP_V_GC_STRIPPED)"; elif pkg-config --exists bdw-gc 2>/dev/null; then printf boehm; else printf none; fi) @@ -79,12 +80,12 @@ ifeq ($(WITH_DB),1) endif build: prepare-build-src - v $(V_FLAGS) $(V_DB_FLAGS) $(V_GC_FLAG) -o $(VHTTPD_BIN) $(BUILD_STAGE_DIR) + v -cc $(V_CC) $(V_FLAGS) $(V_DB_FLAGS) $(V_GC_FLAG) -o $(VHTTPD_BIN) $(BUILD_STAGE_DIR) vhttpd: build prod: prepare-build-src - v $(V_FLAGS) $(V_DB_FLAGS) $(V_GC_FLAG) $(V_PROD_FLAGS) $(V_NOCACHE_FLAGS) -o $(VHTTPD_BIN) $(BUILD_STAGE_DIR) + v -cc $(V_CC) $(V_FLAGS) $(V_DB_FLAGS) $(V_GC_FLAG) $(V_PROD_FLAGS) $(V_NOCACHE_FLAGS) -o $(VHTTPD_BIN) $(BUILD_STAGE_DIR) build-prod: prod @@ -127,22 +128,22 @@ psr-matrix: test: test-fast test-fast: - v test $(FAST_TEST_FILES) + v -cc $(V_CC) test $(FAST_TEST_FILES) test-inproc: - v test $(INPROC_TEST_FILES) + v -cc $(V_CC) test $(INPROC_TEST_FILES) test-codexbot: - v test $(CODEXBOT_TEST_FILES) + v -cc $(V_CC) test $(CODEXBOT_TEST_FILES) test-codexbot-fast: - v test $(CODEXBOT_FAST_TEST_FILES) + v -cc $(V_CC) test $(CODEXBOT_FAST_TEST_FILES) test-codexbot-lifecycle: - v test $(CODEXBOT_LIFECYCLE_TEST_FILES) + v -cc $(V_CC) test $(CODEXBOT_LIFECYCLE_TEST_FILES) test-profile-codexbot: @/bin/zsh $(ROOT)/tools/profile_codexbot_tests.sh $(ROOT) test-all: - v test $(SRC_DIR) + v -cc $(V_CC) test $(SRC_DIR) From 76247e8d1edc69e7fdd8860a0442982be4e6d1ca Mon Sep 17 00:00:00 2001 From: weigang Date: Sun, 10 May 2026 20:34:04 +0800 Subject: [PATCH 10/10] ci: pass cc to production build --- .github/workflows/vhttpd-binaries.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/vhttpd-binaries.yml b/.github/workflows/vhttpd-binaries.yml index 70f459f..9ae848b 100644 --- a/.github/workflows/vhttpd-binaries.yml +++ b/.github/workflows/vhttpd-binaries.yml @@ -156,7 +156,7 @@ jobs: shell: bash run: | set -euo pipefail - make prod VPHP_V_GC=boehm WITH_DB=1 + make prod V_CC=cc VPHP_V_GC=boehm WITH_DB=1 - name: Smoke test shell: bash