Skip to content

Commit 988a7f0

Browse files
Improve conformance test with dynamic tool calls and JSON normalization
- Add dynamic tool call testing (list_available_toolsets, get_toolset_tools, enable_toolset) - Parse and sort embedded JSON in text fields for proper comparison - Separate progress output (stderr) from summary (stdout) for CI - Add test type field to distinguish standard vs dynamic tests
1 parent 3b0cc2a commit 988a7f0

File tree

1 file changed

+180
-61
lines changed

1 file changed

+180
-61
lines changed

script/conformance-test

Lines changed: 180 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -4,53 +4,62 @@ set -e
44
# Conformance test script for comparing MCP server behavior between branches
55
# Builds both main and current branch, runs various flag combinations,
66
# and produces a conformance report with timing and diffs.
7+
#
8+
# Output:
9+
# - Progress/status messages go to stderr (for visibility in CI)
10+
# - Final report summary goes to stdout (for piping/capture)
711

812
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
913
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
1014
REPORT_DIR="$PROJECT_DIR/conformance-report"
1115
CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
1216

13-
# Colors for output
17+
# Colors for output (only used on stderr)
1418
RED='\033[0;31m'
1519
GREEN='\033[0;32m'
1620
YELLOW='\033[1;33m'
1721
BLUE='\033[0;34m'
1822
NC='\033[0m' # No Color
1923

20-
echo -e "${BLUE}=== MCP Server Conformance Test ===${NC}"
21-
echo "Current branch: $CURRENT_BRANCH"
22-
echo "Report directory: $REPORT_DIR"
24+
# Helper to print to stderr
25+
log() {
26+
echo -e "$@" >&2
27+
}
28+
29+
log "${BLUE}=== MCP Server Conformance Test ===${NC}"
30+
log "Current branch: $CURRENT_BRANCH"
31+
log "Report directory: $REPORT_DIR"
2332

2433
# Find the common ancestor
2534
MERGE_BASE=$(git merge-base HEAD origin/main)
26-
echo "Comparing against merge-base: $MERGE_BASE"
27-
echo ""
35+
log "Comparing against merge-base: $MERGE_BASE"
36+
log ""
2837

2938
# Create report directory
3039
rm -rf "$REPORT_DIR"
3140
mkdir -p "$REPORT_DIR"/{main,branch,diffs}
3241

3342
# Build binaries
34-
echo -e "${YELLOW}Building binaries...${NC}"
43+
log "${YELLOW}Building binaries...${NC}"
3544

36-
echo "Building current branch ($CURRENT_BRANCH)..."
45+
log "Building current branch ($CURRENT_BRANCH)..."
3746
go build -o "$REPORT_DIR/branch/github-mcp-server" ./cmd/github-mcp-server
3847
BRANCH_BUILD_OK=$?
3948

40-
echo "Building main branch (using temp worktree at merge-base)..."
49+
log "Building main branch (using temp worktree at merge-base)..."
4150
TEMP_WORKTREE=$(mktemp -d)
4251
git worktree add --quiet "$TEMP_WORKTREE" "$MERGE_BASE"
4352
(cd "$TEMP_WORKTREE" && go build -o "$REPORT_DIR/main/github-mcp-server" ./cmd/github-mcp-server)
4453
MAIN_BUILD_OK=$?
4554
git worktree remove --force "$TEMP_WORKTREE"
4655

4756
if [ $BRANCH_BUILD_OK -ne 0 ] || [ $MAIN_BUILD_OK -ne 0 ]; then
48-
echo -e "${RED}Build failed!${NC}"
57+
log "${RED}Build failed!${NC}"
4958
exit 1
5059
fi
5160

52-
echo -e "${GREEN}Both binaries built successfully${NC}"
53-
echo ""
61+
log "${GREEN}Both binaries built successfully${NC}"
62+
log ""
5463

5564
# MCP JSON-RPC messages
5665
INIT_MSG='{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"conformance-test","version":"1.0.0"}}}'
@@ -59,13 +68,40 @@ LIST_TOOLS_MSG='{"jsonrpc":"2.0","id":2,"method":"tools/list","params":{}}'
5968
LIST_RESOURCES_MSG='{"jsonrpc":"2.0","id":3,"method":"resources/listTemplates","params":{}}'
6069
LIST_PROMPTS_MSG='{"jsonrpc":"2.0","id":4,"method":"prompts/list","params":{}}'
6170

71+
# Dynamic toolset management tool calls (for dynamic mode testing)
72+
LIST_TOOLSETS_MSG='{"jsonrpc":"2.0","id":10,"method":"tools/call","params":{"name":"list_available_toolsets","arguments":{}}}'
73+
GET_TOOLSET_TOOLS_MSG='{"jsonrpc":"2.0","id":11,"method":"tools/call","params":{"name":"get_toolset_tools","arguments":{"toolset":"repos"}}}'
74+
ENABLE_TOOLSET_MSG='{"jsonrpc":"2.0","id":12,"method":"tools/call","params":{"name":"enable_toolset","arguments":{"toolset":"repos"}}}'
75+
LIST_TOOLSETS_AFTER_MSG='{"jsonrpc":"2.0","id":13,"method":"tools/call","params":{"name":"list_available_toolsets","arguments":{}}}'
76+
6277
# Function to normalize JSON for comparison
6378
# Sorts all arrays (including nested ones) and formats consistently
79+
# Also handles embedded JSON strings in "text" fields (from tool call responses)
6480
normalize_json() {
6581
local file="$1"
6682
if [ -s "$file" ]; then
67-
# Deep sort: sort all arrays recursively, then sort keys
68-
jq -S 'walk(if type == "array" then sort_by(tostring) else . end)' "$file" 2>/dev/null > "${file}.tmp" && mv "${file}.tmp" "$file"
83+
# First, try to parse and re-serialize any JSON embedded in text fields
84+
# This handles tool call responses where the result is JSON-in-a-string
85+
jq -S '
86+
# Function to sort arrays recursively
87+
def deep_sort:
88+
if type == "array" then
89+
[.[] | deep_sort] | sort_by(tostring)
90+
elif type == "object" then
91+
to_entries | map(.value |= deep_sort) | from_entries
92+
else
93+
.
94+
end;
95+
96+
# Walk the structure, and for any "text" field that looks like JSON array/object, parse and sort it
97+
walk(
98+
if type == "object" and .text and (.text | type == "string") and ((.text | startswith("[")) or (.text | startswith("{"))) then
99+
.text = ((.text | fromjson | deep_sort) | tojson)
100+
else
101+
.
102+
end
103+
) | deep_sort
104+
' "$file" 2>/dev/null > "${file}.tmp" && mv "${file}.tmp" "$file"
69105
fi
70106
}
71107

@@ -118,23 +154,84 @@ run_mcp_test() {
118154
echo "$duration"
119155
}
120156

121-
# Test configurations - array of "name|flags"
157+
# Function to run MCP server with dynamic tool calls (for dynamic mode testing)
158+
run_mcp_dynamic_test() {
159+
local binary="$1"
160+
local name="$2"
161+
local flags="$3"
162+
local output_prefix="$4"
163+
164+
local start_time end_time duration
165+
start_time=$(date +%s.%N)
166+
167+
# Run the server with dynamic tool calls in sequence:
168+
# 1. Initialize
169+
# 2. List available toolsets (before enable)
170+
# 3. Get tools for repos toolset
171+
# 4. Enable repos toolset
172+
# 5. List available toolsets (after enable - should show repos as enabled)
173+
output=$(
174+
(
175+
echo "$INIT_MSG"
176+
echo "$INITIALIZED_MSG"
177+
echo "$LIST_TOOLSETS_MSG"
178+
sleep 0.1
179+
echo "$GET_TOOLSET_TOOLS_MSG"
180+
sleep 0.1
181+
echo "$ENABLE_TOOLSET_MSG"
182+
sleep 0.1
183+
echo "$LIST_TOOLSETS_AFTER_MSG"
184+
sleep 0.3
185+
) | GITHUB_PERSONAL_ACCESS_TOKEN=1 $binary stdio $flags 2>/dev/null
186+
)
187+
188+
end_time=$(date +%s.%N)
189+
duration=$(echo "$end_time - $start_time" | bc)
190+
191+
# Parse and save each response by matching JSON-RPC id
192+
echo "$output" | while IFS= read -r line; do
193+
id=$(echo "$line" | jq -r '.id // empty' 2>/dev/null)
194+
case "$id" in
195+
1) echo "$line" | jq -S '.' > "${output_prefix}_initialize.json" 2>/dev/null ;;
196+
10) echo "$line" | jq -S '.' > "${output_prefix}_list_toolsets_before.json" 2>/dev/null ;;
197+
11) echo "$line" | jq -S '.' > "${output_prefix}_get_toolset_tools.json" 2>/dev/null ;;
198+
12) echo "$line" | jq -S '.' > "${output_prefix}_enable_toolset.json" 2>/dev/null ;;
199+
13) echo "$line" | jq -S '.' > "${output_prefix}_list_toolsets_after.json" 2>/dev/null ;;
200+
esac
201+
done
202+
203+
# Create empty files if not created
204+
touch "${output_prefix}_initialize.json" "${output_prefix}_list_toolsets_before.json" \
205+
"${output_prefix}_get_toolset_tools.json" "${output_prefix}_enable_toolset.json" \
206+
"${output_prefix}_list_toolsets_after.json"
207+
208+
# Normalize all JSON files
209+
for endpoint in initialize list_toolsets_before get_toolset_tools enable_toolset list_toolsets_after; do
210+
normalize_json "${output_prefix}_${endpoint}.json"
211+
done
212+
213+
echo "$duration"
214+
}
215+
216+
# Test configurations - array of "name|flags|type"
217+
# type can be "standard" or "dynamic" (for dynamic tool call testing)
122218
declare -a TEST_CONFIGS=(
123-
"default|"
124-
"read-only|--read-only"
125-
"dynamic-toolsets|--dynamic-toolsets"
126-
"read-only+dynamic|--read-only --dynamic-toolsets"
127-
"toolsets-repos|--toolsets=repos"
128-
"toolsets-issues|--toolsets=issues"
129-
"toolsets-pull_requests|--toolsets=pull_requests"
130-
"toolsets-repos,issues|--toolsets=repos,issues"
131-
"toolsets-all|--toolsets=all"
132-
"tools-get_me|--tools=get_me"
133-
"tools-get_me,list_issues|--tools=get_me,list_issues"
134-
"toolsets-repos+read-only|--toolsets=repos --read-only"
135-
"toolsets-all+dynamic|--toolsets=all --dynamic-toolsets"
136-
"toolsets-repos+dynamic|--toolsets=repos --dynamic-toolsets"
137-
"toolsets-repos,issues+dynamic|--toolsets=repos,issues --dynamic-toolsets"
219+
"default||standard"
220+
"read-only|--read-only|standard"
221+
"dynamic-toolsets|--dynamic-toolsets|standard"
222+
"read-only+dynamic|--read-only --dynamic-toolsets|standard"
223+
"toolsets-repos|--toolsets=repos|standard"
224+
"toolsets-issues|--toolsets=issues|standard"
225+
"toolsets-pull_requests|--toolsets=pull_requests|standard"
226+
"toolsets-repos,issues|--toolsets=repos,issues|standard"
227+
"toolsets-all|--toolsets=all|standard"
228+
"tools-get_me|--tools=get_me|standard"
229+
"tools-get_me,list_issues|--tools=get_me,list_issues|standard"
230+
"toolsets-repos+read-only|--toolsets=repos --read-only|standard"
231+
"toolsets-all+dynamic|--toolsets=all --dynamic-toolsets|standard"
232+
"toolsets-repos+dynamic|--toolsets=repos --dynamic-toolsets|standard"
233+
"toolsets-repos,issues+dynamic|--toolsets=repos,issues --dynamic-toolsets|standard"
234+
"dynamic-tool-calls|--dynamic-toolsets|dynamic"
138235
)
139236

140237
# Summary arrays
@@ -143,50 +240,63 @@ declare -a MAIN_TIMES
143240
declare -a BRANCH_TIMES
144241
declare -a DIFF_STATUS
145242

146-
echo -e "${YELLOW}Running conformance tests...${NC}"
147-
echo ""
243+
log "${YELLOW}Running conformance tests...${NC}"
244+
log ""
148245

149246
for config in "${TEST_CONFIGS[@]}"; do
150-
IFS='|' read -r test_name flags <<< "$config"
247+
IFS='|' read -r test_name flags test_type <<< "$config"
151248

152-
echo -e "${BLUE}Test: ${test_name}${NC}"
153-
echo " Flags: ${flags:-<none>}"
249+
log "${BLUE}Test: ${test_name}${NC}"
250+
log " Flags: ${flags:-<none>}"
251+
log " Type: ${test_type}"
154252

155253
# Create output directories
156254
mkdir -p "$REPORT_DIR/main/$test_name"
157255
mkdir -p "$REPORT_DIR/branch/$test_name"
158256
mkdir -p "$REPORT_DIR/diffs/$test_name"
159257

160-
# Run main version
161-
main_time=$(run_mcp_test "$REPORT_DIR/main/github-mcp-server" "main" "$flags" "$REPORT_DIR/main/$test_name/output")
162-
echo " Main: ${main_time}s"
163-
164-
# Run branch version
165-
branch_time=$(run_mcp_test "$REPORT_DIR/branch/github-mcp-server" "branch" "$flags" "$REPORT_DIR/branch/$test_name/output")
166-
echo " Branch: ${branch_time}s"
258+
if [ "$test_type" = "dynamic" ]; then
259+
# Run dynamic tool call test
260+
main_time=$(run_mcp_dynamic_test "$REPORT_DIR/main/github-mcp-server" "main" "$flags" "$REPORT_DIR/main/$test_name/output")
261+
log " Main: ${main_time}s"
262+
263+
branch_time=$(run_mcp_dynamic_test "$REPORT_DIR/branch/github-mcp-server" "branch" "$flags" "$REPORT_DIR/branch/$test_name/output")
264+
log " Branch: ${branch_time}s"
265+
266+
endpoints="initialize list_toolsets_before get_toolset_tools enable_toolset list_toolsets_after"
267+
else
268+
# Run standard test
269+
main_time=$(run_mcp_test "$REPORT_DIR/main/github-mcp-server" "main" "$flags" "$REPORT_DIR/main/$test_name/output")
270+
log " Main: ${main_time}s"
271+
272+
branch_time=$(run_mcp_test "$REPORT_DIR/branch/github-mcp-server" "branch" "$flags" "$REPORT_DIR/branch/$test_name/output")
273+
log " Branch: ${branch_time}s"
274+
275+
endpoints="initialize tools resources prompts"
276+
fi
167277

168278
# Calculate time difference
169279
time_diff=$(echo "$branch_time - $main_time" | bc)
170280
if (( $(echo "$time_diff > 0" | bc -l) )); then
171-
echo -e " Δ Time: ${RED}+${time_diff}s (slower)${NC}"
281+
log " Δ Time: ${RED}+${time_diff}s (slower)${NC}"
172282
else
173-
echo -e " Δ Time: ${GREEN}${time_diff}s (faster)${NC}"
283+
log " Δ Time: ${GREEN}${time_diff}s (faster)${NC}"
174284
fi
175285

176286
# Generate diffs for each endpoint
177287
has_diff=false
178-
for endpoint in initialize tools resources prompts; do
288+
for endpoint in $endpoints; do
179289
main_file="$REPORT_DIR/main/$test_name/output_${endpoint}.json"
180290
branch_file="$REPORT_DIR/branch/$test_name/output_${endpoint}.json"
181291
diff_file="$REPORT_DIR/diffs/$test_name/${endpoint}.diff"
182292

183293
if ! diff -u "$main_file" "$branch_file" > "$diff_file" 2>/dev/null; then
184294
has_diff=true
185295
lines=$(wc -l < "$diff_file" | tr -d ' ')
186-
echo -e " ${YELLOW}${endpoint}: DIFF (${lines} lines)${NC}"
296+
log " ${YELLOW}${endpoint}: DIFF (${lines} lines)${NC}"
187297
else
188298
rm -f "$diff_file" # No diff, remove empty file
189-
echo -e " ${GREEN}${endpoint}: OK${NC}"
299+
log " ${GREEN}${endpoint}: OK${NC}"
190300
fi
191301
done
192302

@@ -200,7 +310,7 @@ for config in "${TEST_CONFIGS[@]}"; do
200310
DIFF_STATUS+=("OK")
201311
fi
202312

203-
echo ""
313+
log ""
204314
done
205315

206316
# Generate summary report
@@ -282,7 +392,8 @@ for i in "${!TEST_NAMES[@]}"; do
282392
echo "### $name" >> "$REPORT_FILE"
283393
echo "" >> "$REPORT_FILE"
284394

285-
for endpoint in initialize tools resources prompts; do
395+
# Check all possible endpoints
396+
for endpoint in initialize tools resources prompts list_toolsets_before get_toolset_tools enable_toolset list_toolsets_after; do
286397
diff_file="$REPORT_DIR/diffs/$name/${endpoint}.diff"
287398
if [ -f "$diff_file" ] && [ -s "$diff_file" ]; then
288399
echo "#### ${endpoint}" >> "$REPORT_FILE"
@@ -295,19 +406,27 @@ for i in "${!TEST_NAMES[@]}"; do
295406
fi
296407
done
297408

298-
echo -e "${BLUE}=== Conformance Test Complete ===${NC}"
299-
echo ""
300-
echo -e "Report: ${GREEN}$REPORT_FILE${NC}"
301-
echo ""
302-
echo "Summary:"
303-
echo " Tests passed: $ok_count"
304-
echo " Tests with diffs: $diff_count"
305-
echo " Total main time: ${total_main}s"
306-
echo " Total branch time: ${total_branch}s"
307-
echo " Time delta: $total_delta_str"
409+
log "${BLUE}=== Conformance Test Complete ===${NC}"
410+
log ""
411+
log "Report: ${GREEN}$REPORT_FILE${NC}"
412+
log ""
413+
414+
# Output summary to stdout (for CI capture)
415+
echo "=== Conformance Test Summary ==="
416+
echo "Tests passed: $ok_count"
417+
echo "Tests with diffs: $diff_count"
418+
echo "Total main time: ${total_main}s"
419+
echo "Total branch time: ${total_branch}s"
420+
echo "Time delta: $total_delta_str"
308421

309422
if [ $diff_count -gt 0 ]; then
423+
log ""
424+
log "${YELLOW}⚠️ Some tests have differences. Review the diffs in:${NC}"
425+
log " $REPORT_DIR/diffs/"
426+
echo ""
427+
echo "RESULT: DIFFERENCES FOUND"
428+
# Don't exit with error - diffs may be intentional improvements
429+
else
310430
echo ""
311-
echo -e "${YELLOW}⚠️ Some tests have differences. Review the diffs in:${NC}"
312-
echo " $REPORT_DIR/diffs/"
431+
echo "RESULT: ALL TESTS PASSED"
313432
fi

0 commit comments

Comments
 (0)