From a6e9af4ab155736e0080e66153f831f856603abb Mon Sep 17 00:00:00 2001 From: Claude Code Date: Sun, 8 Mar 2026 06:37:22 +0200 Subject: [PATCH 1/2] fix: panic recovery returns proper error result; safe tokenizer nil handling [#318] Three independent fixes for the crash chain reported in #318: 1. Named return values in panic recovery (handleCallToolVariant, handleCallTool): recover() now sets callResult via named returns instead of returning (nil, nil), preventing the second unrecovered panic in mcp-go's HandleMessage. 2. Safe tokenizer access (SafeGetDefaultEncoding): Replaces unsafe type assertion `tokenizer.(*DefaultTokenizer).GetDefaultEncoding()` with a nil-safe helper that handles nil interface, nil underlying value, and non-DefaultTokenizer implementations. 3. Skip tiktoken validation for disabled tokenizer (NewTokenizer): When enabled=false, encoding validation is skipped so the fallback path always succeeds even with a corrupted tiktoken cache. 4. Guard against nil serverConfig in GenerateServerID calls. Co-Authored-By: Claude Opus 4.6 --- internal/server/mcp.go | 40 +++++++++--- internal/server/mcp_panic_recovery_test.go | 72 ++++++++++++++++++++++ internal/server/tokens/tokenizer.go | 32 ++++++++-- internal/server/tokens/tokenizer_test.go | 67 ++++++++++++++++++++ 4 files changed, 196 insertions(+), 15 deletions(-) create mode 100644 internal/server/mcp_panic_recovery_test.go diff --git a/internal/server/mcp.go b/internal/server/mcp.go index 1ff37ee6..17aee5fa 100644 --- a/internal/server/mcp.go +++ b/internal/server/mcp.go @@ -1064,17 +1064,22 @@ func (p *MCPProxyServer) handleCallToolDestructive(ctx context.Context, request } // handleCallToolVariant is the common handler for all call_tool_* variants (Spec 018) -func (p *MCPProxyServer) handleCallToolVariant(ctx context.Context, request mcp.CallToolRequest, toolVariant string) (*mcp.CallToolResult, error) { +func (p *MCPProxyServer) handleCallToolVariant(ctx context.Context, request mcp.CallToolRequest, toolVariant string) (callResult *mcp.CallToolResult, callErr error) { // Spec 024: Track start time and context for internal tool call logging internalStartTime := time.Now() - // Add panic recovery to ensure server resilience + // Panic recovery with named return values to prevent returning (nil, nil). + // Issue #318: unnamed returns caused recover() to return zero values, + // triggering a second unrecovered panic in mcp-go when dereferencing nil *CallToolResult. defer func() { if r := recover(); r != nil { p.logger.Error("Recovered from panic in handleCallToolVariant", zap.Any("panic", r), zap.String("tool_variant", toolVariant), - zap.Any("request", request)) + zap.Any("request", request), + zap.Stack("stacktrace")) + callResult = mcp.NewToolResultError(fmt.Sprintf("Internal proxy error: recovered from panic: %v", r)) + callErr = nil } }() @@ -1326,15 +1331,21 @@ func (p *MCPProxyServer) handleCallToolVariant(ctx context.Context, request mcp. tokenMetrics = &storage.TokenMetrics{ InputTokens: inputTokens, Model: model, - Encoding: tokenizer.(*tokens.DefaultTokenizer).GetDefaultEncoding(), + Encoding: tokens.SafeGetDefaultEncoding(tokenizer), } } } + // Derive server ID safely (serverConfig may be nil if storage lookup failed) + var serverID string + if serverConfig != nil { + serverID = storage.GenerateServerID(serverConfig) + } + // Record tool call for history (even if error) toolCallRecord := &storage.ToolCallRecord{ ID: fmt.Sprintf("%d-%s", time.Now().UnixNano(), actualToolName), - ServerID: storage.GenerateServerID(serverConfig), + ServerID: serverID, ServerName: serverName, ToolName: actualToolName, Arguments: args, @@ -1493,13 +1504,16 @@ func (p *MCPProxyServer) handleCallToolVariant(ctx context.Context, request mcp. } // handleCallTool is the LEGACY call_tool handler - returns error directing to new variants (Spec 018) -func (p *MCPProxyServer) handleCallTool(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { - // Add panic recovery to ensure server resilience +func (p *MCPProxyServer) handleCallTool(ctx context.Context, request mcp.CallToolRequest) (callResult *mcp.CallToolResult, callErr error) { + // Panic recovery with named return values (issue #318). defer func() { if r := recover(); r != nil { p.logger.Error("Recovered from panic in handleCallTool", zap.Any("panic", r), - zap.Any("request", request)) + zap.Any("request", request), + zap.Stack("stacktrace")) + callResult = mcp.NewToolResultError(fmt.Sprintf("Internal proxy error: recovered from panic: %v", r)) + callErr = nil } }() @@ -1705,15 +1719,21 @@ func (p *MCPProxyServer) handleCallTool(ctx context.Context, request mcp.CallToo tokenMetrics = &storage.TokenMetrics{ InputTokens: inputTokens, Model: model, - Encoding: tokenizer.(*tokens.DefaultTokenizer).GetDefaultEncoding(), + Encoding: tokens.SafeGetDefaultEncoding(tokenizer), } } } + // Derive server ID safely (serverConfig may be nil if storage lookup failed) + var serverID string + if serverConfig != nil { + serverID = storage.GenerateServerID(serverConfig) + } + // Record tool call for history (even if error) toolCallRecord := &storage.ToolCallRecord{ ID: fmt.Sprintf("%d-%s", time.Now().UnixNano(), actualToolName), - ServerID: storage.GenerateServerID(serverConfig), + ServerID: serverID, ServerName: serverName, ToolName: actualToolName, Arguments: args, diff --git a/internal/server/mcp_panic_recovery_test.go b/internal/server/mcp_panic_recovery_test.go new file mode 100644 index 00000000..97935a80 --- /dev/null +++ b/internal/server/mcp_panic_recovery_test.go @@ -0,0 +1,72 @@ +package server + +import ( + "context" + "testing" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/smart-mcp-proxy/mcpproxy-go/internal/contracts" +) + +func TestHandleCallToolVariant_PanicRecovery(t *testing.T) { + // Issue #318: When handleCallToolVariant panics (e.g., nil tokenizer dereference), + // the recover() block must return a proper error result, not (nil, nil). + // Returning (nil, nil) causes a second unrecovered panic in mcp-go's HandleMessage + // when it dereferences the nil *CallToolResult. + proxy := createTestMCPProxyServer(t) + + // Create a request that will reach upstream call and trigger a panic. + // We use a server:tool format with a server that doesn't exist in upstream manager. + // The upstream manager's CallTool will be called, which may panic or error. + // To reliably trigger a panic, we'll test the recovery mechanism directly. + t.Run("panic returns error result not nil", func(t *testing.T) { + // Use a tool name with ":" to skip the "no server prefix" error path + // and reach the upstream call code. The upstream manager has no servers, + // so CallTool will error — but we want to test the panic path. + // + // We'll craft a scenario that causes a nil pointer dereference. + // An empty serverConfig from storage + GenerateServerID panics on nil. + request := mcp.CallToolRequest{} + request.Params.Arguments = map[string]interface{}{ + "name": "nonexistent-server:some_tool", + } + + result, err := proxy.handleCallToolVariant(context.Background(), request, contracts.ToolVariantRead) + + // After fix: must never return (nil, nil). Either result or err must be set. + if err != nil { + // An error is acceptable + return + } + require.NotNil(t, result, "panic recovery must return a non-nil result, not (nil, nil)") + assert.True(t, result.IsError, "recovered panic should be an error result") + }) +} + +func TestHandleCallToolVariant_PanicRecoveryReturnsErrorResult(t *testing.T) { + // Directly test that the named return values mechanism works by verifying + // the function signature contract: on any internal panic, we get back a + // usable *CallToolResult (never nil). + proxy := createTestMCPProxyServer(t) + + // Request a tool on a valid-looking but nonexistent server + request := mcp.CallToolRequest{} + request.Params.Arguments = map[string]interface{}{ + "name": "fake:tool", + } + + result, err := proxy.handleCallToolVariant(context.Background(), request, contracts.ToolVariantWrite) + + // The function must NEVER return (nil, nil) + assert.True(t, result != nil || err != nil, + "handleCallToolVariant must never return (nil, nil) — panic recovery must set named return values") + + if result != nil && err == nil { + // If we got a result without error, it should be marked as an error result + // (either from normal error handling or from panic recovery) + assert.True(t, result.IsError, "error results should have IsError=true") + } +} diff --git a/internal/server/tokens/tokenizer.go b/internal/server/tokens/tokenizer.go index 8dfc49fb..ddead6f4 100644 --- a/internal/server/tokens/tokenizer.go +++ b/internal/server/tokens/tokenizer.go @@ -36,16 +36,22 @@ type DefaultTokenizer struct { enabled bool } -// NewTokenizer creates a new tokenizer instance +// NewTokenizer creates a new tokenizer instance. +// When enabled=false, encoding validation is skipped so that a disabled +// tokenizer can always be created (even with a corrupted tiktoken cache). +// See issue #318. func NewTokenizer(defaultEncoding string, logger *zap.SugaredLogger, enabled bool) (*DefaultTokenizer, error) { if defaultEncoding == "" { defaultEncoding = DefaultEncoding } - // Validate encoding exists - _, err := tiktoken.GetEncoding(defaultEncoding) - if err != nil { - return nil, fmt.Errorf("invalid encoding %q: %w", defaultEncoding, err) + // Only validate encoding when tokenizer is enabled. + // A disabled tokenizer never calls tiktoken, so a corrupted cache is harmless. + if enabled { + _, err := tiktoken.GetEncoding(defaultEncoding) + if err != nil { + return nil, fmt.Errorf("invalid encoding %q: %w", defaultEncoding, err) + } } return &DefaultTokenizer{ @@ -191,3 +197,19 @@ func (t *DefaultTokenizer) GetDefaultEncoding() string { defer t.mu.RUnlock() return t.defaultEncoding } + +// SafeGetDefaultEncoding extracts the default encoding from a Tokenizer interface +// without risking a panic. Handles nil interface, nil underlying *DefaultTokenizer, +// and non-DefaultTokenizer implementations. Returns DefaultEncoding as fallback. +// See issue #318: a nil *DefaultTokenizer assigned to a Tokenizer interface creates +// a non-nil interface with nil concrete value, causing panics on type assertion. +func SafeGetDefaultEncoding(t Tokenizer) string { + if t == nil { + return DefaultEncoding + } + dt, ok := t.(*DefaultTokenizer) + if !ok || dt == nil { + return DefaultEncoding + } + return dt.GetDefaultEncoding() +} diff --git a/internal/server/tokens/tokenizer_test.go b/internal/server/tokens/tokenizer_test.go index 57d7b18c..d463d2fb 100644 --- a/internal/server/tokens/tokenizer_test.go +++ b/internal/server/tokens/tokenizer_test.go @@ -328,3 +328,70 @@ func TestSupportedEncodings(t *testing.T) { assert.Contains(t, encodings, "p50k_base") assert.Contains(t, encodings, "r50k_base") } + +func TestNewTokenizerDisabledWithInvalidEncoding(t *testing.T) { + logger := zap.NewNop().Sugar() + + // When disabled, NewTokenizer should succeed even with an invalid encoding + // (e.g., tiktoken cache is corrupted). This prevents the fallback path + // from also failing and leaving a nil tokenizer. See issue #318. + tokenizer, err := NewTokenizer("invalid_encoding_corrupted", logger, false) + require.NoError(t, err, "disabled tokenizer should not validate encoding") + require.NotNil(t, tokenizer, "disabled tokenizer should never be nil") + assert.False(t, tokenizer.IsEnabled()) + + // All methods should return zero without error when disabled + count, err := tokenizer.CountTokens("Hello, world!") + require.NoError(t, err) + assert.Equal(t, 0, count) + + count, err = tokenizer.CountTokensForModel("Hello", "gpt-4") + require.NoError(t, err) + assert.Equal(t, 0, count) + + count, err = tokenizer.CountTokensInJSON(map[string]string{"key": "value"}) + require.NoError(t, err) + assert.Equal(t, 0, count) +} + +func TestNilTokenizerInterfaceSafety(t *testing.T) { + // Simulates the scenario from issue #318 where a nil *DefaultTokenizer + // is assigned to a Tokenizer interface, creating a non-nil interface + // with a nil underlying value. SafeGetDefaultEncoding must handle this. + var nilTokenizer *DefaultTokenizer + + // Assign nil concrete to interface — interface is non-nil but underlying is nil. + // In Go, the interface value itself is non-nil (it holds type info), but the + // concrete pointer inside is nil. This is the exact bug scenario. + var iface Tokenizer = nilTokenizer + _ = iface // used below + + // Calling methods on this would panic — that's the bug. + // SafeGetDefaultEncoding should handle this safely without panic. + encoding := SafeGetDefaultEncoding(iface) + assert.Equal(t, DefaultEncoding, encoding, "should return default encoding for nil underlying") +} + +func TestSafeGetDefaultEncoding(t *testing.T) { + logger := zap.NewNop().Sugar() + + t.Run("valid DefaultTokenizer", func(t *testing.T) { + tokenizer, err := NewTokenizer("o200k_base", logger, true) + require.NoError(t, err) + + encoding := SafeGetDefaultEncoding(tokenizer) + assert.Equal(t, "o200k_base", encoding) + }) + + t.Run("nil interface", func(t *testing.T) { + encoding := SafeGetDefaultEncoding(nil) + assert.Equal(t, DefaultEncoding, encoding) + }) + + t.Run("nil DefaultTokenizer in interface", func(t *testing.T) { + var nilDT *DefaultTokenizer + var iface Tokenizer = nilDT + encoding := SafeGetDefaultEncoding(iface) + assert.Equal(t, DefaultEncoding, encoding) + }) +} From 7e9c6983a690f0fa5287ee8ea698e7a22f936c3f Mon Sep 17 00:00:00 2001 From: Claude Code Date: Sun, 8 Mar 2026 07:05:47 +0200 Subject: [PATCH 2/2] test: exercise actual panic-and-recover path in handleCallToolVariant [#318] Rewrite panic recovery tests to trigger real panics by nil-ing out proxy.storage, causing nil pointer dereference inside the handler. This verifies the named-return-value recover block actually catches panics and returns a proper error result (not nil, nil). Three subtests: - normal error path (server not found) returns error result - panic in handleCallToolVariant returns error result via recover - panic in legacy handleCallTool returns error result via recover Co-Authored-By: Claude Opus 4.6 --- internal/server/mcp_panic_recovery_test.go | 104 ++++++++++++--------- 1 file changed, 62 insertions(+), 42 deletions(-) diff --git a/internal/server/mcp_panic_recovery_test.go b/internal/server/mcp_panic_recovery_test.go index 97935a80..6488cea7 100644 --- a/internal/server/mcp_panic_recovery_test.go +++ b/internal/server/mcp_panic_recovery_test.go @@ -16,19 +16,10 @@ func TestHandleCallToolVariant_PanicRecovery(t *testing.T) { // the recover() block must return a proper error result, not (nil, nil). // Returning (nil, nil) causes a second unrecovered panic in mcp-go's HandleMessage // when it dereferences the nil *CallToolResult. - proxy := createTestMCPProxyServer(t) - - // Create a request that will reach upstream call and trigger a panic. - // We use a server:tool format with a server that doesn't exist in upstream manager. - // The upstream manager's CallTool will be called, which may panic or error. - // To reliably trigger a panic, we'll test the recovery mechanism directly. - t.Run("panic returns error result not nil", func(t *testing.T) { - // Use a tool name with ":" to skip the "no server prefix" error path - // and reach the upstream call code. The upstream manager has no servers, - // so CallTool will error — but we want to test the panic path. - // - // We'll craft a scenario that causes a nil pointer dereference. - // An empty serverConfig from storage + GenerateServerID panics on nil. + + t.Run("normal error path returns error result", func(t *testing.T) { + proxy := createTestMCPProxyServer(t) + request := mcp.CallToolRequest{} request.Params.Arguments = map[string]interface{}{ "name": "nonexistent-server:some_tool", @@ -36,37 +27,66 @@ func TestHandleCallToolVariant_PanicRecovery(t *testing.T) { result, err := proxy.handleCallToolVariant(context.Background(), request, contracts.ToolVariantRead) - // After fix: must never return (nil, nil). Either result or err must be set. - if err != nil { - // An error is acceptable - return + // Must never return (nil, nil) + assert.True(t, result != nil || err != nil, + "handleCallToolVariant must never return (nil, nil)") + if result != nil { + assert.True(t, result.IsError, "error results should have IsError=true") } - require.NotNil(t, result, "panic recovery must return a non-nil result, not (nil, nil)") + }) + + t.Run("panic in handler returns error result via recover", func(t *testing.T) { + proxy := createTestMCPProxyServer(t) + + // Nil out storage to trigger a guaranteed nil pointer panic inside + // handleCallToolVariant at p.storage.GetUpstreamServer(). This exercises + // the actual recover() path — the function panics, the deferred recover + // catches it, and sets callResult via named return values. + proxy.storage = nil + + request := mcp.CallToolRequest{} + request.Params.Arguments = map[string]interface{}{ + "name": "panic-server:panic_tool", + } + + result, err := proxy.handleCallToolVariant(context.Background(), request, contracts.ToolVariantRead) + + // Before fix: would return (nil, nil), crashing mcp-go. + // After fix: must return a proper error result. + require.NoError(t, err, "recovered panic should not return an error") + require.NotNil(t, result, "recovered panic must return a non-nil CallToolResult, not (nil, nil)") assert.True(t, result.IsError, "recovered panic should be an error result") + + // Verify the error message mentions the panic + if len(result.Content) > 0 { + if text, ok := result.Content[0].(mcp.TextContent); ok { + assert.Contains(t, text.Text, "Internal proxy error") + assert.Contains(t, text.Text, "recovered from panic") + } + } }) -} -func TestHandleCallToolVariant_PanicRecoveryReturnsErrorResult(t *testing.T) { - // Directly test that the named return values mechanism works by verifying - // the function signature contract: on any internal panic, we get back a - // usable *CallToolResult (never nil). - proxy := createTestMCPProxyServer(t) - - // Request a tool on a valid-looking but nonexistent server - request := mcp.CallToolRequest{} - request.Params.Arguments = map[string]interface{}{ - "name": "fake:tool", - } - - result, err := proxy.handleCallToolVariant(context.Background(), request, contracts.ToolVariantWrite) - - // The function must NEVER return (nil, nil) - assert.True(t, result != nil || err != nil, - "handleCallToolVariant must never return (nil, nil) — panic recovery must set named return values") - - if result != nil && err == nil { - // If we got a result without error, it should be marked as an error result - // (either from normal error handling or from panic recovery) - assert.True(t, result.IsError, "error results should have IsError=true") - } + t.Run("panic in legacy handleCallTool returns error result via recover", func(t *testing.T) { + proxy := createTestMCPProxyServer(t) + + // Same approach: nil storage triggers panic in handleCallTool + proxy.storage = nil + + request := mcp.CallToolRequest{} + request.Params.Arguments = map[string]interface{}{ + "name": "panic-server:panic_tool", + } + + result, err := proxy.handleCallTool(context.Background(), request) + + require.NoError(t, err, "recovered panic should not return an error") + require.NotNil(t, result, "recovered panic must return a non-nil CallToolResult, not (nil, nil)") + assert.True(t, result.IsError, "recovered panic should be an error result") + + if len(result.Content) > 0 { + if text, ok := result.Content[0].(mcp.TextContent); ok { + assert.Contains(t, text.Text, "Internal proxy error") + } + } + }) }