44# Conformance test script for comparing MCP server behavior between branches
55# Builds both main and current branch, runs various flag combinations,
66# and produces a conformance report with timing and diffs.
7+ #
8+ # Output:
9+ # - Progress/status messages go to stderr (for visibility in CI)
10+ # - Final report summary goes to stdout (for piping/capture)
711
812SCRIPT_DIR=" $( cd " $( dirname " $0 " ) " && pwd) "
913PROJECT_DIR=" $( dirname " $SCRIPT_DIR " ) "
1014REPORT_DIR=" $PROJECT_DIR /conformance-report"
1115CURRENT_BRANCH=$( git rev-parse --abbrev-ref HEAD)
1216
13- # Colors for output
17+ # Colors for output (only used on stderr)
1418RED=' \033[0;31m'
1519GREEN=' \033[0;32m'
1620YELLOW=' \033[1;33m'
1721BLUE=' \033[0;34m'
1822NC=' \033[0m' # No Color
1923
20- echo -e " ${BLUE} === MCP Server Conformance Test ===${NC} "
21- echo " Current branch: $CURRENT_BRANCH "
22- echo " Report directory: $REPORT_DIR "
24+ # Helper to print to stderr
25+ log () {
26+ echo -e " $@ " >&2
27+ }
28+
29+ log " ${BLUE} === MCP Server Conformance Test ===${NC} "
30+ log " Current branch: $CURRENT_BRANCH "
31+ log " Report directory: $REPORT_DIR "
2332
2433# Find the common ancestor
2534MERGE_BASE=$( git merge-base HEAD origin/main)
26- echo " Comparing against merge-base: $MERGE_BASE "
27- echo " "
35+ log " Comparing against merge-base: $MERGE_BASE "
36+ log " "
2837
2938# Create report directory
3039rm -rf " $REPORT_DIR "
3140mkdir -p " $REPORT_DIR " /{main,branch,diffs}
3241
3342# Build binaries
34- echo -e " ${YELLOW} Building binaries...${NC} "
43+ log " ${YELLOW} Building binaries...${NC} "
3544
36- echo " Building current branch ($CURRENT_BRANCH )..."
45+ log " Building current branch ($CURRENT_BRANCH )..."
3746go build -o " $REPORT_DIR /branch/github-mcp-server" ./cmd/github-mcp-server
3847BRANCH_BUILD_OK=$?
3948
40- echo " Building main branch (using temp worktree at merge-base)..."
49+ log " Building main branch (using temp worktree at merge-base)..."
4150TEMP_WORKTREE=$( mktemp -d)
4251git worktree add --quiet " $TEMP_WORKTREE " " $MERGE_BASE "
4352(cd " $TEMP_WORKTREE " && go build -o " $REPORT_DIR /main/github-mcp-server" ./cmd/github-mcp-server)
4453MAIN_BUILD_OK=$?
4554git worktree remove --force " $TEMP_WORKTREE "
4655
4756if [ $BRANCH_BUILD_OK -ne 0 ] || [ $MAIN_BUILD_OK -ne 0 ]; then
48- echo -e " ${RED} Build failed!${NC} "
57+ log " ${RED} Build failed!${NC} "
4958 exit 1
5059fi
5160
52- echo -e " ${GREEN} Both binaries built successfully${NC} "
53- echo " "
61+ log " ${GREEN} Both binaries built successfully${NC} "
62+ log " "
5463
5564# MCP JSON-RPC messages
5665INIT_MSG=' {"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"conformance-test","version":"1.0.0"}}}'
@@ -59,13 +68,40 @@ LIST_TOOLS_MSG='{"jsonrpc":"2.0","id":2,"method":"tools/list","params":{}}'
5968LIST_RESOURCES_MSG=' {"jsonrpc":"2.0","id":3,"method":"resources/listTemplates","params":{}}'
6069LIST_PROMPTS_MSG=' {"jsonrpc":"2.0","id":4,"method":"prompts/list","params":{}}'
6170
71+ # Dynamic toolset management tool calls (for dynamic mode testing)
72+ LIST_TOOLSETS_MSG=' {"jsonrpc":"2.0","id":10,"method":"tools/call","params":{"name":"list_available_toolsets","arguments":{}}}'
73+ GET_TOOLSET_TOOLS_MSG=' {"jsonrpc":"2.0","id":11,"method":"tools/call","params":{"name":"get_toolset_tools","arguments":{"toolset":"repos"}}}'
74+ ENABLE_TOOLSET_MSG=' {"jsonrpc":"2.0","id":12,"method":"tools/call","params":{"name":"enable_toolset","arguments":{"toolset":"repos"}}}'
75+ LIST_TOOLSETS_AFTER_MSG=' {"jsonrpc":"2.0","id":13,"method":"tools/call","params":{"name":"list_available_toolsets","arguments":{}}}'
76+
6277# Function to normalize JSON for comparison
6378# Sorts all arrays (including nested ones) and formats consistently
79+ # Also handles embedded JSON strings in "text" fields (from tool call responses)
6480normalize_json () {
6581 local file=" $1 "
6682 if [ -s " $file " ]; then
67- # Deep sort: sort all arrays recursively, then sort keys
68- jq -S ' walk(if type == "array" then sort_by(tostring) else . end)' " $file " 2> /dev/null > " ${file} .tmp" && mv " ${file} .tmp" " $file "
83+ # First, try to parse and re-serialize any JSON embedded in text fields
84+ # This handles tool call responses where the result is JSON-in-a-string
85+ jq -S '
86+ # Function to sort arrays recursively
87+ def deep_sort:
88+ if type == "array" then
89+ [.[] | deep_sort] | sort_by(tostring)
90+ elif type == "object" then
91+ to_entries | map(.value |= deep_sort) | from_entries
92+ else
93+ .
94+ end;
95+
96+ # Walk the structure, and for any "text" field that looks like JSON array/object, parse and sort it
97+ walk(
98+ if type == "object" and .text and (.text | type == "string") and ((.text | startswith("[")) or (.text | startswith("{"))) then
99+ .text = ((.text | fromjson | deep_sort) | tojson)
100+ else
101+ .
102+ end
103+ ) | deep_sort
104+ ' " $file " 2> /dev/null > " ${file} .tmp" && mv " ${file} .tmp" " $file "
69105 fi
70106}
71107
@@ -118,23 +154,84 @@ run_mcp_test() {
118154 echo " $duration "
119155}
120156
121- # Test configurations - array of "name|flags"
157+ # Function to run MCP server with dynamic tool calls (for dynamic mode testing)
158+ run_mcp_dynamic_test () {
159+ local binary=" $1 "
160+ local name=" $2 "
161+ local flags=" $3 "
162+ local output_prefix=" $4 "
163+
164+ local start_time end_time duration
165+ start_time=$( date +%s.%N)
166+
167+ # Run the server with dynamic tool calls in sequence:
168+ # 1. Initialize
169+ # 2. List available toolsets (before enable)
170+ # 3. Get tools for repos toolset
171+ # 4. Enable repos toolset
172+ # 5. List available toolsets (after enable - should show repos as enabled)
173+ output=$(
174+ (
175+ echo " $INIT_MSG "
176+ echo " $INITIALIZED_MSG "
177+ echo " $LIST_TOOLSETS_MSG "
178+ sleep 0.1
179+ echo " $GET_TOOLSET_TOOLS_MSG "
180+ sleep 0.1
181+ echo " $ENABLE_TOOLSET_MSG "
182+ sleep 0.1
183+ echo " $LIST_TOOLSETS_AFTER_MSG "
184+ sleep 0.3
185+ ) | GITHUB_PERSONAL_ACCESS_TOKEN=1 $binary stdio $flags 2> /dev/null
186+ )
187+
188+ end_time=$( date +%s.%N)
189+ duration=$( echo " $end_time - $start_time " | bc)
190+
191+ # Parse and save each response by matching JSON-RPC id
192+ echo " $output " | while IFS= read -r line; do
193+ id=$( echo " $line " | jq -r ' .id // empty' 2> /dev/null)
194+ case " $id " in
195+ 1) echo " $line " | jq -S ' .' > " ${output_prefix} _initialize.json" 2> /dev/null ;;
196+ 10) echo " $line " | jq -S ' .' > " ${output_prefix} _list_toolsets_before.json" 2> /dev/null ;;
197+ 11) echo " $line " | jq -S ' .' > " ${output_prefix} _get_toolset_tools.json" 2> /dev/null ;;
198+ 12) echo " $line " | jq -S ' .' > " ${output_prefix} _enable_toolset.json" 2> /dev/null ;;
199+ 13) echo " $line " | jq -S ' .' > " ${output_prefix} _list_toolsets_after.json" 2> /dev/null ;;
200+ esac
201+ done
202+
203+ # Create empty files if not created
204+ touch " ${output_prefix} _initialize.json" " ${output_prefix} _list_toolsets_before.json" \
205+ " ${output_prefix} _get_toolset_tools.json" " ${output_prefix} _enable_toolset.json" \
206+ " ${output_prefix} _list_toolsets_after.json"
207+
208+ # Normalize all JSON files
209+ for endpoint in initialize list_toolsets_before get_toolset_tools enable_toolset list_toolsets_after; do
210+ normalize_json " ${output_prefix} _${endpoint} .json"
211+ done
212+
213+ echo " $duration "
214+ }
215+
216+ # Test configurations - array of "name|flags|type"
217+ # type can be "standard" or "dynamic" (for dynamic tool call testing)
122218declare -a TEST_CONFIGS=(
123- " default|"
124- " read-only|--read-only"
125- " dynamic-toolsets|--dynamic-toolsets"
126- " read-only+dynamic|--read-only --dynamic-toolsets"
127- " toolsets-repos|--toolsets=repos"
128- " toolsets-issues|--toolsets=issues"
129- " toolsets-pull_requests|--toolsets=pull_requests"
130- " toolsets-repos,issues|--toolsets=repos,issues"
131- " toolsets-all|--toolsets=all"
132- " tools-get_me|--tools=get_me"
133- " tools-get_me,list_issues|--tools=get_me,list_issues"
134- " toolsets-repos+read-only|--toolsets=repos --read-only"
135- " toolsets-all+dynamic|--toolsets=all --dynamic-toolsets"
136- " toolsets-repos+dynamic|--toolsets=repos --dynamic-toolsets"
137- " toolsets-repos,issues+dynamic|--toolsets=repos,issues --dynamic-toolsets"
219+ " default||standard"
220+ " read-only|--read-only|standard"
221+ " dynamic-toolsets|--dynamic-toolsets|standard"
222+ " read-only+dynamic|--read-only --dynamic-toolsets|standard"
223+ " toolsets-repos|--toolsets=repos|standard"
224+ " toolsets-issues|--toolsets=issues|standard"
225+ " toolsets-pull_requests|--toolsets=pull_requests|standard"
226+ " toolsets-repos,issues|--toolsets=repos,issues|standard"
227+ " toolsets-all|--toolsets=all|standard"
228+ " tools-get_me|--tools=get_me|standard"
229+ " tools-get_me,list_issues|--tools=get_me,list_issues|standard"
230+ " toolsets-repos+read-only|--toolsets=repos --read-only|standard"
231+ " toolsets-all+dynamic|--toolsets=all --dynamic-toolsets|standard"
232+ " toolsets-repos+dynamic|--toolsets=repos --dynamic-toolsets|standard"
233+ " toolsets-repos,issues+dynamic|--toolsets=repos,issues --dynamic-toolsets|standard"
234+ " dynamic-tool-calls|--dynamic-toolsets|dynamic"
138235)
139236
140237# Summary arrays
@@ -143,50 +240,63 @@ declare -a MAIN_TIMES
143240declare -a BRANCH_TIMES
144241declare -a DIFF_STATUS
145242
146- echo -e " ${YELLOW} Running conformance tests...${NC} "
147- echo " "
243+ log " ${YELLOW} Running conformance tests...${NC} "
244+ log " "
148245
149246for config in " ${TEST_CONFIGS[@]} " ; do
150- IFS=' |' read -r test_name flags <<< " $config"
247+ IFS=' |' read -r test_name flags test_type <<< " $config"
151248
152- echo -e " ${BLUE} Test: ${test_name}${NC} "
153- echo " Flags: ${flags:- <none>} "
249+ log " ${BLUE} Test: ${test_name}${NC} "
250+ log " Flags: ${flags:- <none>} "
251+ log " Type: ${test_type} "
154252
155253 # Create output directories
156254 mkdir -p " $REPORT_DIR /main/$test_name "
157255 mkdir -p " $REPORT_DIR /branch/$test_name "
158256 mkdir -p " $REPORT_DIR /diffs/$test_name "
159257
160- # Run main version
161- main_time=$( run_mcp_test " $REPORT_DIR /main/github-mcp-server" " main" " $flags " " $REPORT_DIR /main/$test_name /output" )
162- echo " Main: ${main_time} s"
163-
164- # Run branch version
165- branch_time=$( run_mcp_test " $REPORT_DIR /branch/github-mcp-server" " branch" " $flags " " $REPORT_DIR /branch/$test_name /output" )
166- echo " Branch: ${branch_time} s"
258+ if [ " $test_type " = " dynamic" ]; then
259+ # Run dynamic tool call test
260+ main_time=$( run_mcp_dynamic_test " $REPORT_DIR /main/github-mcp-server" " main" " $flags " " $REPORT_DIR /main/$test_name /output" )
261+ log " Main: ${main_time} s"
262+
263+ branch_time=$( run_mcp_dynamic_test " $REPORT_DIR /branch/github-mcp-server" " branch" " $flags " " $REPORT_DIR /branch/$test_name /output" )
264+ log " Branch: ${branch_time} s"
265+
266+ endpoints=" initialize list_toolsets_before get_toolset_tools enable_toolset list_toolsets_after"
267+ else
268+ # Run standard test
269+ main_time=$( run_mcp_test " $REPORT_DIR /main/github-mcp-server" " main" " $flags " " $REPORT_DIR /main/$test_name /output" )
270+ log " Main: ${main_time} s"
271+
272+ branch_time=$( run_mcp_test " $REPORT_DIR /branch/github-mcp-server" " branch" " $flags " " $REPORT_DIR /branch/$test_name /output" )
273+ log " Branch: ${branch_time} s"
274+
275+ endpoints=" initialize tools resources prompts"
276+ fi
167277
168278 # Calculate time difference
169279 time_diff=$( echo " $branch_time - $main_time " | bc)
170280 if (( $(echo "$time_diff > 0 " | bc - l) )) ; then
171- echo -e " Δ Time: ${RED} +${time_diff} s (slower)${NC} "
281+ log " Δ Time: ${RED} +${time_diff} s (slower)${NC} "
172282 else
173- echo -e " Δ Time: ${GREEN}${time_diff} s (faster)${NC} "
283+ log " Δ Time: ${GREEN}${time_diff} s (faster)${NC} "
174284 fi
175285
176286 # Generate diffs for each endpoint
177287 has_diff=false
178- for endpoint in initialize tools resources prompts ; do
288+ for endpoint in $endpoints ; do
179289 main_file=" $REPORT_DIR /main/$test_name /output_${endpoint} .json"
180290 branch_file=" $REPORT_DIR /branch/$test_name /output_${endpoint} .json"
181291 diff_file=" $REPORT_DIR /diffs/$test_name /${endpoint} .diff"
182292
183293 if ! diff -u " $main_file " " $branch_file " > " $diff_file " 2> /dev/null; then
184294 has_diff=true
185295 lines=$( wc -l < " $diff_file " | tr -d ' ' )
186- echo -e " ${YELLOW}${endpoint} : DIFF (${lines} lines)${NC} "
296+ log " ${YELLOW}${endpoint} : DIFF (${lines} lines)${NC} "
187297 else
188298 rm -f " $diff_file " # No diff, remove empty file
189- echo -e " ${GREEN}${endpoint} : OK${NC} "
299+ log " ${GREEN}${endpoint} : OK${NC} "
190300 fi
191301 done
192302
@@ -200,7 +310,7 @@ for config in "${TEST_CONFIGS[@]}"; do
200310 DIFF_STATUS+=(" OK" )
201311 fi
202312
203- echo " "
313+ log " "
204314done
205315
206316# Generate summary report
@@ -282,7 +392,8 @@ for i in "${!TEST_NAMES[@]}"; do
282392 echo " ### $name " >> " $REPORT_FILE "
283393 echo " " >> " $REPORT_FILE "
284394
285- for endpoint in initialize tools resources prompts; do
395+ # Check all possible endpoints
396+ for endpoint in initialize tools resources prompts list_toolsets_before get_toolset_tools enable_toolset list_toolsets_after; do
286397 diff_file=" $REPORT_DIR /diffs/$name /${endpoint} .diff"
287398 if [ -f " $diff_file " ] && [ -s " $diff_file " ]; then
288399 echo " #### ${endpoint} " >> " $REPORT_FILE "
@@ -295,19 +406,27 @@ for i in "${!TEST_NAMES[@]}"; do
295406 fi
296407done
297408
298- echo -e " ${BLUE} === Conformance Test Complete ===${NC} "
299- echo " "
300- echo -e " Report: ${GREEN} $REPORT_FILE ${NC} "
301- echo " "
302- echo " Summary:"
303- echo " Tests passed: $ok_count "
304- echo " Tests with diffs: $diff_count "
305- echo " Total main time: ${total_main} s"
306- echo " Total branch time: ${total_branch} s"
307- echo " Time delta: $total_delta_str "
409+ log " ${BLUE} === Conformance Test Complete ===${NC} "
410+ log " "
411+ log " Report: ${GREEN} $REPORT_FILE ${NC} "
412+ log " "
413+
414+ # Output summary to stdout (for CI capture)
415+ echo " === Conformance Test Summary ==="
416+ echo " Tests passed: $ok_count "
417+ echo " Tests with diffs: $diff_count "
418+ echo " Total main time: ${total_main} s"
419+ echo " Total branch time: ${total_branch} s"
420+ echo " Time delta: $total_delta_str "
308421
309422if [ $diff_count -gt 0 ]; then
423+ log " "
424+ log " ${YELLOW} ⚠️ Some tests have differences. Review the diffs in:${NC} "
425+ log " $REPORT_DIR /diffs/"
426+ echo " "
427+ echo " RESULT: DIFFERENCES FOUND"
428+ # Don't exit with error - diffs may be intentional improvements
429+ else
310430 echo " "
311- echo -e " ${YELLOW} ⚠️ Some tests have differences. Review the diffs in:${NC} "
312- echo " $REPORT_DIR /diffs/"
431+ echo " RESULT: ALL TESTS PASSED"
313432fi
0 commit comments