|
| 1 | +#!/bin/bash |
| 2 | +set -e |
| 3 | + |
| 4 | +# Conformance test script for comparing MCP server behavior between branches |
| 5 | +# Builds both main and current branch, runs various flag combinations, |
| 6 | +# and produces a conformance report with timing and diffs. |
| 7 | + |
| 8 | +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" |
| 9 | +PROJECT_DIR="$(dirname "$SCRIPT_DIR")" |
| 10 | +REPORT_DIR="$PROJECT_DIR/conformance-report" |
| 11 | +CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD) |
| 12 | + |
| 13 | +# Colors for output |
| 14 | +RED='\033[0;31m' |
| 15 | +GREEN='\033[0;32m' |
| 16 | +YELLOW='\033[1;33m' |
| 17 | +BLUE='\033[0;34m' |
| 18 | +NC='\033[0m' # No Color |
| 19 | + |
| 20 | +echo -e "${BLUE}=== MCP Server Conformance Test ===${NC}" |
| 21 | +echo "Current branch: $CURRENT_BRANCH" |
| 22 | +echo "Report directory: $REPORT_DIR" |
| 23 | + |
| 24 | +# Find the common ancestor |
| 25 | +MERGE_BASE=$(git merge-base HEAD origin/main) |
| 26 | +echo "Comparing against merge-base: $MERGE_BASE" |
| 27 | +echo "" |
| 28 | + |
| 29 | +# Create report directory |
| 30 | +rm -rf "$REPORT_DIR" |
| 31 | +mkdir -p "$REPORT_DIR"/{main,branch,diffs} |
| 32 | + |
| 33 | +# Build binaries |
| 34 | +echo -e "${YELLOW}Building binaries...${NC}" |
| 35 | + |
| 36 | +echo "Building current branch ($CURRENT_BRANCH)..." |
| 37 | +go build -o "$REPORT_DIR/branch/github-mcp-server" ./cmd/github-mcp-server |
| 38 | +BRANCH_BUILD_OK=$? |
| 39 | + |
| 40 | +echo "Building main branch (using temp worktree at merge-base)..." |
| 41 | +TEMP_WORKTREE=$(mktemp -d) |
| 42 | +git worktree add --quiet "$TEMP_WORKTREE" "$MERGE_BASE" |
| 43 | +(cd "$TEMP_WORKTREE" && go build -o "$REPORT_DIR/main/github-mcp-server" ./cmd/github-mcp-server) |
| 44 | +MAIN_BUILD_OK=$? |
| 45 | +git worktree remove --force "$TEMP_WORKTREE" |
| 46 | + |
| 47 | +if [ $BRANCH_BUILD_OK -ne 0 ] || [ $MAIN_BUILD_OK -ne 0 ]; then |
| 48 | + echo -e "${RED}Build failed!${NC}" |
| 49 | + exit 1 |
| 50 | +fi |
| 51 | + |
| 52 | +echo -e "${GREEN}Both binaries built successfully${NC}" |
| 53 | +echo "" |
| 54 | + |
| 55 | +# MCP JSON-RPC messages |
| 56 | +INIT_MSG='{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"conformance-test","version":"1.0.0"}}}' |
| 57 | +INITIALIZED_MSG='{"jsonrpc":"2.0","method":"notifications/initialized","params":{}}' |
| 58 | +LIST_TOOLS_MSG='{"jsonrpc":"2.0","id":2,"method":"tools/list","params":{}}' |
| 59 | +LIST_RESOURCES_MSG='{"jsonrpc":"2.0","id":3,"method":"resources/listTemplates","params":{}}' |
| 60 | +LIST_PROMPTS_MSG='{"jsonrpc":"2.0","id":4,"method":"prompts/list","params":{}}' |
| 61 | + |
| 62 | +# Function to normalize JSON for comparison |
| 63 | +# Sorts all arrays (including nested ones) and formats consistently |
| 64 | +normalize_json() { |
| 65 | + local file="$1" |
| 66 | + if [ -s "$file" ]; then |
| 67 | + # Deep sort: sort all arrays recursively, then sort keys |
| 68 | + jq -S 'walk(if type == "array" then sort_by(tostring) else . end)' "$file" 2>/dev/null > "${file}.tmp" && mv "${file}.tmp" "$file" |
| 69 | + fi |
| 70 | +} |
| 71 | + |
| 72 | +# Function to run MCP server and capture output with timing |
| 73 | +run_mcp_test() { |
| 74 | + local binary="$1" |
| 75 | + local name="$2" |
| 76 | + local flags="$3" |
| 77 | + local output_prefix="$4" |
| 78 | + |
| 79 | + local start_time end_time duration |
| 80 | + start_time=$(date +%s.%N) |
| 81 | + |
| 82 | + # Run the server with all list commands - each response is on its own line |
| 83 | + output=$( |
| 84 | + ( |
| 85 | + echo "$INIT_MSG" |
| 86 | + echo "$INITIALIZED_MSG" |
| 87 | + echo "$LIST_TOOLS_MSG" |
| 88 | + echo "$LIST_RESOURCES_MSG" |
| 89 | + echo "$LIST_PROMPTS_MSG" |
| 90 | + sleep 0.5 |
| 91 | + ) | GITHUB_PERSONAL_ACCESS_TOKEN=1 $binary stdio $flags 2>/dev/null |
| 92 | + ) |
| 93 | + |
| 94 | + end_time=$(date +%s.%N) |
| 95 | + duration=$(echo "$end_time - $start_time" | bc) |
| 96 | + |
| 97 | + # Parse and save each response by matching JSON-RPC id |
| 98 | + # Each line is a separate JSON response |
| 99 | + echo "$output" | while IFS= read -r line; do |
| 100 | + id=$(echo "$line" | jq -r '.id // empty' 2>/dev/null) |
| 101 | + case "$id" in |
| 102 | + 1) echo "$line" | jq -S '.' > "${output_prefix}_initialize.json" 2>/dev/null ;; |
| 103 | + 2) echo "$line" | jq -S '.' > "${output_prefix}_tools.json" 2>/dev/null ;; |
| 104 | + 3) echo "$line" | jq -S '.' > "${output_prefix}_resources.json" 2>/dev/null ;; |
| 105 | + 4) echo "$line" | jq -S '.' > "${output_prefix}_prompts.json" 2>/dev/null ;; |
| 106 | + esac |
| 107 | + done |
| 108 | + |
| 109 | + # Create empty files if not created (in case of errors or missing responses) |
| 110 | + touch "${output_prefix}_initialize.json" "${output_prefix}_tools.json" \ |
| 111 | + "${output_prefix}_resources.json" "${output_prefix}_prompts.json" |
| 112 | + |
| 113 | + # Normalize all JSON files for consistent comparison (sorts arrays, keys) |
| 114 | + for endpoint in initialize tools resources prompts; do |
| 115 | + normalize_json "${output_prefix}_${endpoint}.json" |
| 116 | + done |
| 117 | + |
| 118 | + echo "$duration" |
| 119 | +} |
| 120 | + |
| 121 | +# Test configurations - array of "name|flags" |
| 122 | +declare -a TEST_CONFIGS=( |
| 123 | + "default|" |
| 124 | + "read-only|--read-only" |
| 125 | + "dynamic-toolsets|--dynamic-toolsets" |
| 126 | + "read-only+dynamic|--read-only --dynamic-toolsets" |
| 127 | + "toolsets-repos|--toolsets=repos" |
| 128 | + "toolsets-issues|--toolsets=issues" |
| 129 | + "toolsets-pull_requests|--toolsets=pull_requests" |
| 130 | + "toolsets-repos,issues|--toolsets=repos,issues" |
| 131 | + "toolsets-all|--toolsets=all" |
| 132 | + "tools-get_me|--tools=get_me" |
| 133 | + "tools-get_me,list_issues|--tools=get_me,list_issues" |
| 134 | + "toolsets-repos+read-only|--toolsets=repos --read-only" |
| 135 | + "toolsets-all+dynamic|--toolsets=all --dynamic-toolsets" |
| 136 | + "toolsets-repos+dynamic|--toolsets=repos --dynamic-toolsets" |
| 137 | + "toolsets-repos,issues+dynamic|--toolsets=repos,issues --dynamic-toolsets" |
| 138 | +) |
| 139 | + |
| 140 | +# Summary arrays |
| 141 | +declare -a TEST_NAMES |
| 142 | +declare -a MAIN_TIMES |
| 143 | +declare -a BRANCH_TIMES |
| 144 | +declare -a DIFF_STATUS |
| 145 | + |
| 146 | +echo -e "${YELLOW}Running conformance tests...${NC}" |
| 147 | +echo "" |
| 148 | + |
| 149 | +for config in "${TEST_CONFIGS[@]}"; do |
| 150 | + IFS='|' read -r test_name flags <<< "$config" |
| 151 | + |
| 152 | + echo -e "${BLUE}Test: ${test_name}${NC}" |
| 153 | + echo " Flags: ${flags:-<none>}" |
| 154 | + |
| 155 | + # Create output directories |
| 156 | + mkdir -p "$REPORT_DIR/main/$test_name" |
| 157 | + mkdir -p "$REPORT_DIR/branch/$test_name" |
| 158 | + mkdir -p "$REPORT_DIR/diffs/$test_name" |
| 159 | + |
| 160 | + # Run main version |
| 161 | + main_time=$(run_mcp_test "$REPORT_DIR/main/github-mcp-server" "main" "$flags" "$REPORT_DIR/main/$test_name/output") |
| 162 | + echo " Main: ${main_time}s" |
| 163 | + |
| 164 | + # Run branch version |
| 165 | + branch_time=$(run_mcp_test "$REPORT_DIR/branch/github-mcp-server" "branch" "$flags" "$REPORT_DIR/branch/$test_name/output") |
| 166 | + echo " Branch: ${branch_time}s" |
| 167 | + |
| 168 | + # Calculate time difference |
| 169 | + time_diff=$(echo "$branch_time - $main_time" | bc) |
| 170 | + if (( $(echo "$time_diff > 0" | bc -l) )); then |
| 171 | + echo -e " Δ Time: ${RED}+${time_diff}s (slower)${NC}" |
| 172 | + else |
| 173 | + echo -e " Δ Time: ${GREEN}${time_diff}s (faster)${NC}" |
| 174 | + fi |
| 175 | + |
| 176 | + # Generate diffs for each endpoint |
| 177 | + has_diff=false |
| 178 | + for endpoint in initialize tools resources prompts; do |
| 179 | + main_file="$REPORT_DIR/main/$test_name/output_${endpoint}.json" |
| 180 | + branch_file="$REPORT_DIR/branch/$test_name/output_${endpoint}.json" |
| 181 | + diff_file="$REPORT_DIR/diffs/$test_name/${endpoint}.diff" |
| 182 | + |
| 183 | + if ! diff -u "$main_file" "$branch_file" > "$diff_file" 2>/dev/null; then |
| 184 | + has_diff=true |
| 185 | + lines=$(wc -l < "$diff_file" | tr -d ' ') |
| 186 | + echo -e " ${YELLOW}${endpoint}: DIFF (${lines} lines)${NC}" |
| 187 | + else |
| 188 | + rm -f "$diff_file" # No diff, remove empty file |
| 189 | + echo -e " ${GREEN}${endpoint}: OK${NC}" |
| 190 | + fi |
| 191 | + done |
| 192 | + |
| 193 | + # Store results |
| 194 | + TEST_NAMES+=("$test_name") |
| 195 | + MAIN_TIMES+=("$main_time") |
| 196 | + BRANCH_TIMES+=("$branch_time") |
| 197 | + if [ "$has_diff" = true ]; then |
| 198 | + DIFF_STATUS+=("DIFF") |
| 199 | + else |
| 200 | + DIFF_STATUS+=("OK") |
| 201 | + fi |
| 202 | + |
| 203 | + echo "" |
| 204 | +done |
| 205 | + |
| 206 | +# Generate summary report |
| 207 | +REPORT_FILE="$REPORT_DIR/CONFORMANCE_REPORT.md" |
| 208 | + |
| 209 | +cat > "$REPORT_FILE" << EOF |
| 210 | +# MCP Server Conformance Report |
| 211 | +
|
| 212 | +Generated: $(date) |
| 213 | +Current Branch: $CURRENT_BRANCH |
| 214 | +Compared Against: merge-base ($MERGE_BASE) |
| 215 | +
|
| 216 | +## Summary |
| 217 | +
|
| 218 | +| Test | Main Time | Branch Time | Δ Time | Status | |
| 219 | +|------|-----------|-------------|--------|--------| |
| 220 | +EOF |
| 221 | + |
| 222 | +total_main=0 |
| 223 | +total_branch=0 |
| 224 | +diff_count=0 |
| 225 | +ok_count=0 |
| 226 | + |
| 227 | +for i in "${!TEST_NAMES[@]}"; do |
| 228 | + name="${TEST_NAMES[$i]}" |
| 229 | + main_t="${MAIN_TIMES[$i]}" |
| 230 | + branch_t="${BRANCH_TIMES[$i]}" |
| 231 | + status="${DIFF_STATUS[$i]}" |
| 232 | + |
| 233 | + delta=$(echo "$branch_t - $main_t" | bc) |
| 234 | + if (( $(echo "$delta > 0" | bc -l) )); then |
| 235 | + delta_str="+${delta}s" |
| 236 | + else |
| 237 | + delta_str="${delta}s" |
| 238 | + fi |
| 239 | + |
| 240 | + if [ "$status" = "DIFF" ]; then |
| 241 | + status_str="⚠️ DIFF" |
| 242 | + ((diff_count++)) || true |
| 243 | + else |
| 244 | + status_str="✅ OK" |
| 245 | + ((ok_count++)) || true |
| 246 | + fi |
| 247 | + |
| 248 | + total_main=$(echo "$total_main + $main_t" | bc) |
| 249 | + total_branch=$(echo "$total_branch + $branch_t" | bc) |
| 250 | + |
| 251 | + echo "| $name | ${main_t}s | ${branch_t}s | $delta_str | $status_str |" >> "$REPORT_FILE" |
| 252 | +done |
| 253 | + |
| 254 | +total_delta=$(echo "$total_branch - $total_main" | bc) |
| 255 | +if (( $(echo "$total_delta > 0" | bc -l) )); then |
| 256 | + total_delta_str="+${total_delta}s" |
| 257 | +else |
| 258 | + total_delta_str="${total_delta}s" |
| 259 | +fi |
| 260 | + |
| 261 | +cat >> "$REPORT_FILE" << EOF |
| 262 | +| **TOTAL** | **${total_main}s** | **${total_branch}s** | **$total_delta_str** | **$ok_count OK / $diff_count DIFF** | |
| 263 | +
|
| 264 | +## Statistics |
| 265 | +
|
| 266 | +- **Tests Passed (no diff):** $ok_count |
| 267 | +- **Tests with Differences:** $diff_count |
| 268 | +- **Total Main Time:** ${total_main}s |
| 269 | +- **Total Branch Time:** ${total_branch}s |
| 270 | +- **Overall Time Delta:** $total_delta_str |
| 271 | +
|
| 272 | +## Detailed Diffs |
| 273 | +
|
| 274 | +EOF |
| 275 | + |
| 276 | +# Add diff details to report |
| 277 | +for i in "${!TEST_NAMES[@]}"; do |
| 278 | + name="${TEST_NAMES[$i]}" |
| 279 | + status="${DIFF_STATUS[$i]}" |
| 280 | + |
| 281 | + if [ "$status" = "DIFF" ]; then |
| 282 | + echo "### $name" >> "$REPORT_FILE" |
| 283 | + echo "" >> "$REPORT_FILE" |
| 284 | + |
| 285 | + for endpoint in initialize tools resources prompts; do |
| 286 | + diff_file="$REPORT_DIR/diffs/$name/${endpoint}.diff" |
| 287 | + if [ -f "$diff_file" ] && [ -s "$diff_file" ]; then |
| 288 | + echo "#### ${endpoint}" >> "$REPORT_FILE" |
| 289 | + echo '```diff' >> "$REPORT_FILE" |
| 290 | + cat "$diff_file" >> "$REPORT_FILE" |
| 291 | + echo '```' >> "$REPORT_FILE" |
| 292 | + echo "" >> "$REPORT_FILE" |
| 293 | + fi |
| 294 | + done |
| 295 | + fi |
| 296 | +done |
| 297 | + |
| 298 | +echo -e "${BLUE}=== Conformance Test Complete ===${NC}" |
| 299 | +echo "" |
| 300 | +echo -e "Report: ${GREEN}$REPORT_FILE${NC}" |
| 301 | +echo "" |
| 302 | +echo "Summary:" |
| 303 | +echo " Tests passed: $ok_count" |
| 304 | +echo " Tests with diffs: $diff_count" |
| 305 | +echo " Total main time: ${total_main}s" |
| 306 | +echo " Total branch time: ${total_branch}s" |
| 307 | +echo " Time delta: $total_delta_str" |
| 308 | + |
| 309 | +if [ $diff_count -gt 0 ]; then |
| 310 | + echo "" |
| 311 | + echo -e "${YELLOW}⚠️ Some tests have differences. Review the diffs in:${NC}" |
| 312 | + echo " $REPORT_DIR/diffs/" |
| 313 | +fi |
0 commit comments