From 7434948e972cedbce5638950fd33f5063a1b43e5 Mon Sep 17 00:00:00 2001 From: Ian Maia Date: Tue, 24 Mar 2026 14:47:55 +0100 Subject: [PATCH 01/12] Add Buildkite pipeline for AI E2E tests using simulator-llm-pilot gem The gem provides a sandboxed agent that drives the simulator through a fixed set of tools (tap, swipe, type, REST API) with no arbitrary code execution. It handles WDA lifecycle, session management, context window compression, and verification/cleanup enforcement internally. The Buildkite step: - Checks for "Testing" label (skips if missing) - Downloads build artifacts and installs app on simulator - Installs the simulator-llm-pilot gem from GitHub - Runs all test cases in Tests/AgentTests/ui-tests/ Ref: AINFRA-2176 Co-Authored-By: Claude Opus 4.6 (1M context) --- .buildkite/commands/run-ai-e2e-tests.sh | 117 ++++++++++++++++++++++++ .buildkite/pipeline.yml | 18 ++++ 2 files changed, 135 insertions(+) create mode 100755 .buildkite/commands/run-ai-e2e-tests.sh diff --git a/.buildkite/commands/run-ai-e2e-tests.sh b/.buildkite/commands/run-ai-e2e-tests.sh new file mode 100755 index 000000000000..2267a84e31eb --- /dev/null +++ b/.buildkite/commands/run-ai-e2e-tests.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash +# Run AI-driven E2E tests on an iOS Simulator using simulator-llm-pilot. +# +# This script manages the full lifecycle: +# 1. Check for "Testing" label on PR (Buildkite only, skips if missing) +# 2. Download build artifacts and install app (Buildkite only) +# 3. Install the simulator-llm-pilot gem from GitHub +# 4. Run tests (gem handles simulator, WDA, agent loop, and results) +# +# The gem provides a sandboxed agent that drives the simulator through a +# fixed set of tools (tap, swipe, type, REST API, etc.) — no arbitrary +# code execution, no shell access. +# +# Required environment variables: +# ANTHROPIC_API_KEY Claude API key +# SIMULATOR_LLM_PILOT_SITE_URL WordPress test site URL +# SIMULATOR_LLM_PILOT_USERNAME WordPress username +# SIMULATOR_LLM_PILOT_APP_PASSWORD WordPress application password +# +# Optional environment variables: +# APP wordpress | jetpack (default: jetpack) +# SIMULATOR_NAME Simulator to boot if none running (default: iPhone 16) +# TEST_DIR Test directory (default: Tests/AgentTests/ui-tests) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +cd "$REPO_ROOT" + +# ── Label gate (Buildkite only) ───────────────────────────────────── +if [ -n "${BUILDKITE_PULL_REQUEST_LABELS:-}" ]; then + echo "--- Checking for 'Testing' label" + + if ! echo ";${BUILDKITE_PULL_REQUEST_LABELS};" | grep -q ";Testing;"; then + echo "PR does not have the 'Testing' label. Skipping." + echo "Add the label and re-run this step to trigger AI E2E tests." + exit 0 + fi + echo "'Testing' label found." +fi + +# ── Required env vars ──────────────────────────────────────────────── +: "${ANTHROPIC_API_KEY:?Set ANTHROPIC_API_KEY}" +: "${SIMULATOR_LLM_PILOT_SITE_URL:?Set SIMULATOR_LLM_PILOT_SITE_URL}" +: "${SIMULATOR_LLM_PILOT_USERNAME:?Set SIMULATOR_LLM_PILOT_USERNAME}" +: "${SIMULATOR_LLM_PILOT_APP_PASSWORD:?Set SIMULATOR_LLM_PILOT_APP_PASSWORD}" + +# ── Defaults ───────────────────────────────────────────────────────── +APP="${APP:-jetpack}" +SIMULATOR_NAME="${SIMULATOR_NAME:-iPhone 16}" +TEST_DIR="${TEST_DIR:-Tests/AgentTests/ui-tests}" + +case "$APP" in + wordpress) BUNDLE_ID="org.wordpress" ;; + jetpack) BUNDLE_ID="com.automattic.jetpack" ;; + *) echo "Error: APP must be 'wordpress' or 'jetpack', got '$APP'" >&2; exit 1 ;; +esac + +# ── Artifact download (Buildkite only) ─────────────────────────────── +if [ -n "${BUILDKITE:-}" ]; then + echo "--- Downloading Build Artifacts" + download_artifact "build-products-${APP}.tar" + tar -xf "build-products-${APP}.tar" + + echo "--- Setting up Gems" + install_gems +fi + +# ── Install simulator-llm-pilot ────────────────────────────────────── +echo "--- Installing simulator-llm-pilot" +GEM_BUILD_DIR="$(mktemp -d)" +git clone --depth 1 https://github.com/Automattic/simulator-llm-pilot.git "$GEM_BUILD_DIR" +gem build "$GEM_BUILD_DIR/simulator-llm-pilot.gemspec" --output "$GEM_BUILD_DIR/simulator-llm-pilot.gem" +gem install "$GEM_BUILD_DIR/simulator-llm-pilot.gem" +rm -rf "$GEM_BUILD_DIR" +echo "simulator-llm-pilot $(simulator-llm-pilot version)" + +# ── Boot simulator and install app (Buildkite only) ────────────────── +echo "--- Setting up Simulator" +xcrun simctl boot "$SIMULATOR_NAME" 2>/dev/null || true +sleep 3 + +if [ -n "${BUILDKITE:-}" ]; then + APP_DISPLAY_NAME="Jetpack" + [ "$APP" = "wordpress" ] && APP_DISPLAY_NAME="WordPress" + + APP_PATH=$(find DerivedData/Build/Products -name "${APP_DISPLAY_NAME}.app" -path "*Debug-iphonesimulator*" | head -1) + if [ -z "$APP_PATH" ]; then + echo "Error: ${APP_DISPLAY_NAME}.app not found in build products" >&2 + exit 1 + fi + echo "Installing $APP_PATH on simulator..." + xcrun simctl install booted "$APP_PATH" +fi + +# ── Run tests ──────────────────────────────────────────────────────── +echo "--- Running AI E2E Tests" + +TIMESTAMP="$(date +%Y-%m-%d-%H%M)" +RESULTS_DIR="Tests/AgentTests/results/${TIMESTAMP}" + +simulator-llm-pilot run "$TEST_DIR" \ + --app-bundle-id "$BUNDLE_ID" \ + --simulator-name "$SIMULATOR_NAME" \ + --results-dir "$RESULTS_DIR" + +EXIT_CODE=$? + +# ── Report results ─────────────────────────────────────────────────── +echo "--- Results" +RESULTS_FILE="${RESULTS_DIR}/results.md" +if [ -f "$RESULTS_FILE" ]; then + cat "$RESULTS_FILE" +fi + +exit "$EXIT_CODE" diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 16ef55c325bf..c51e60eb72e8 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -138,6 +138,24 @@ steps: command: .buildkite/commands/lint-localized-strings-format.sh plugins: [$CI_TOOLKIT_PLUGIN] + ################# + # AI E2E Tests (requires "Testing" label on PR) + ################# + - label: "🤖 AI E2E Tests" + command: .buildkite/commands/run-ai-e2e-tests.sh + depends_on: "build_jetpack" + if: "build.pull_request.id != null" + soft_fail: true + timeout_in_minutes: 30 + plugins: [$CI_TOOLKIT_PLUGIN] + env: + APP: jetpack + artifact_paths: + - "Tests/AgentTests/results/**/*" + notify: + - github_commit_status: + context: "AI E2E Tests" + ################# # Claude Build Analysis - dynamically uploaded so Build result conditions evaluate at runtime after the wait ################# From 0cce295d3ebd92178c2ce63ee7e79a8dd982c86f Mon Sep 17 00:00:00 2001 From: Ian Maia Date: Tue, 24 Mar 2026 17:28:04 +0100 Subject: [PATCH 02/12] Use [[ instead of [ for conditional tests Co-Authored-By: Claude Opus 4.6 (1M context) --- .buildkite/commands/run-ai-e2e-tests.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.buildkite/commands/run-ai-e2e-tests.sh b/.buildkite/commands/run-ai-e2e-tests.sh index 2267a84e31eb..7d2ccc5b1b80 100755 --- a/.buildkite/commands/run-ai-e2e-tests.sh +++ b/.buildkite/commands/run-ai-e2e-tests.sh @@ -29,7 +29,7 @@ REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" cd "$REPO_ROOT" # ── Label gate (Buildkite only) ───────────────────────────────────── -if [ -n "${BUILDKITE_PULL_REQUEST_LABELS:-}" ]; then +if [[ -n "${BUILDKITE_PULL_REQUEST_LABELS:-}" ]]; then echo "--- Checking for 'Testing' label" if ! echo ";${BUILDKITE_PULL_REQUEST_LABELS};" | grep -q ";Testing;"; then @@ -58,7 +58,7 @@ case "$APP" in esac # ── Artifact download (Buildkite only) ─────────────────────────────── -if [ -n "${BUILDKITE:-}" ]; then +if [[ -n "${BUILDKITE:-}" ]]; then echo "--- Downloading Build Artifacts" download_artifact "build-products-${APP}.tar" tar -xf "build-products-${APP}.tar" @@ -81,12 +81,12 @@ echo "--- Setting up Simulator" xcrun simctl boot "$SIMULATOR_NAME" 2>/dev/null || true sleep 3 -if [ -n "${BUILDKITE:-}" ]; then +if [[ -n "${BUILDKITE:-}" ]]; then APP_DISPLAY_NAME="Jetpack" [ "$APP" = "wordpress" ] && APP_DISPLAY_NAME="WordPress" APP_PATH=$(find DerivedData/Build/Products -name "${APP_DISPLAY_NAME}.app" -path "*Debug-iphonesimulator*" | head -1) - if [ -z "$APP_PATH" ]; then + if [[ -z "$APP_PATH" ]]; then echo "Error: ${APP_DISPLAY_NAME}.app not found in build products" >&2 exit 1 fi @@ -110,7 +110,7 @@ EXIT_CODE=$? # ── Report results ─────────────────────────────────────────────────── echo "--- Results" RESULTS_FILE="${RESULTS_DIR}/results.md" -if [ -f "$RESULTS_FILE" ]; then +if [[ -f "$RESULTS_FILE" ]]; then cat "$RESULTS_FILE" fi From eb15a6e85ebd3a77c3ce32c616dde7d00434ae30 Mon Sep 17 00:00:00 2001 From: Ian Maia Date: Wed, 25 Mar 2026 22:05:19 +0100 Subject: [PATCH 03/12] Fix label check: BUILDKITE_PULL_REQUEST_LABELS is comma-separated Co-Authored-By: Claude Opus 4.6 (1M context) --- .buildkite/commands/run-ai-e2e-tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/commands/run-ai-e2e-tests.sh b/.buildkite/commands/run-ai-e2e-tests.sh index 7d2ccc5b1b80..1a9b96ed2c7d 100755 --- a/.buildkite/commands/run-ai-e2e-tests.sh +++ b/.buildkite/commands/run-ai-e2e-tests.sh @@ -32,7 +32,7 @@ cd "$REPO_ROOT" if [[ -n "${BUILDKITE_PULL_REQUEST_LABELS:-}" ]]; then echo "--- Checking for 'Testing' label" - if ! echo ";${BUILDKITE_PULL_REQUEST_LABELS};" | grep -q ";Testing;"; then + if ! echo ",${BUILDKITE_PULL_REQUEST_LABELS}," | grep -qF ",Testing,"; then echo "PR does not have the 'Testing' label. Skipping." echo "Add the label and re-run this step to trigger AI E2E tests." exit 0 From 9550f6aae58de108821f95a69e275d0a4f04076e Mon Sep 17 00:00:00 2001 From: Ian Maia Date: Wed, 25 Mar 2026 22:20:00 +0100 Subject: [PATCH 04/12] Fix gem build: run from inside cloned directory gem build resolves spec file paths relative to cwd, so bin/simulator-llm-pilot wasn't found when building from the wordpress-ios repo root. Co-Authored-By: Claude Opus 4.6 (1M context) --- .buildkite/commands/run-ai-e2e-tests.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.buildkite/commands/run-ai-e2e-tests.sh b/.buildkite/commands/run-ai-e2e-tests.sh index 1a9b96ed2c7d..88163d38b528 100755 --- a/.buildkite/commands/run-ai-e2e-tests.sh +++ b/.buildkite/commands/run-ai-e2e-tests.sh @@ -71,8 +71,10 @@ fi echo "--- Installing simulator-llm-pilot" GEM_BUILD_DIR="$(mktemp -d)" git clone --depth 1 https://github.com/Automattic/simulator-llm-pilot.git "$GEM_BUILD_DIR" -gem build "$GEM_BUILD_DIR/simulator-llm-pilot.gemspec" --output "$GEM_BUILD_DIR/simulator-llm-pilot.gem" -gem install "$GEM_BUILD_DIR/simulator-llm-pilot.gem" +pushd "$GEM_BUILD_DIR" +gem build simulator-llm-pilot.gemspec +gem install simulator-llm-pilot-*.gem +popd rm -rf "$GEM_BUILD_DIR" echo "simulator-llm-pilot $(simulator-llm-pilot version)" From d7d00039d0697f0efa2d3278750139825445c81c Mon Sep 17 00:00:00 2001 From: Ian Maia Date: Wed, 25 Mar 2026 22:41:47 +0100 Subject: [PATCH 05/12] Clone and build WebDriverAgent if not present on CI agent Extract WDA build to a separate build-wda.sh script for clarity. Co-Authored-By: Claude Opus 4.6 (1M context) --- .buildkite/commands/build-wda.sh | 27 +++++++++++++++++++++++++ .buildkite/commands/run-ai-e2e-tests.sh | 6 +++++- 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100755 .buildkite/commands/build-wda.sh diff --git a/.buildkite/commands/build-wda.sh b/.buildkite/commands/build-wda.sh new file mode 100755 index 000000000000..0445b9bbfab1 --- /dev/null +++ b/.buildkite/commands/build-wda.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +# Clone and build WebDriverAgent for iOS Simulator testing. +# +# Skips if WDA is already built at .build/WebDriverAgent/. +# +# Required: +# SIMULATOR_NAME Simulator name for the build destination (e.g., iPhone 16) + +set -euo pipefail + +SIMULATOR_NAME="${SIMULATOR_NAME:?Set SIMULATOR_NAME}" +WDA_PROJECT=".build/WebDriverAgent/WebDriverAgent.xcodeproj" + +if [[ -d "$WDA_PROJECT" ]]; then + echo "WebDriverAgent already built, skipping." + return 0 2>/dev/null || exit 0 +fi + +mkdir -p .build +git clone --depth 1 https://github.com/appium/WebDriverAgent.git .build/WebDriverAgent + +xcodebuild build-for-testing \ + -project "$WDA_PROJECT" \ + -scheme WebDriverAgentRunner \ + -destination "platform=iOS Simulator,name=$SIMULATOR_NAME" \ + CODE_SIGNING_ALLOWED=NO \ + | tail -1 diff --git a/.buildkite/commands/run-ai-e2e-tests.sh b/.buildkite/commands/run-ai-e2e-tests.sh index 88163d38b528..1006a06fb8fa 100755 --- a/.buildkite/commands/run-ai-e2e-tests.sh +++ b/.buildkite/commands/run-ai-e2e-tests.sh @@ -85,7 +85,7 @@ sleep 3 if [[ -n "${BUILDKITE:-}" ]]; then APP_DISPLAY_NAME="Jetpack" - [ "$APP" = "wordpress" ] && APP_DISPLAY_NAME="WordPress" + [[ "$APP" = "wordpress" ]] && APP_DISPLAY_NAME="WordPress" APP_PATH=$(find DerivedData/Build/Products -name "${APP_DISPLAY_NAME}.app" -path "*Debug-iphonesimulator*" | head -1) if [[ -z "$APP_PATH" ]]; then @@ -96,6 +96,10 @@ if [[ -n "${BUILDKITE:-}" ]]; then xcrun simctl install booted "$APP_PATH" fi +# ── Build WebDriverAgent (if not present) ──────────────────────────── +echo "--- Building WebDriverAgent" +"$(dirname "$0")/build-wda.sh" + # ── Run tests ──────────────────────────────────────────────────────── echo "--- Running AI E2E Tests" From 0edbbf3d8d34a73f51058c7c37f4b0138e05e096 Mon Sep 17 00:00:00 2001 From: Ian Maia Date: Thu, 26 Mar 2026 10:55:17 +0100 Subject: [PATCH 06/12] Export SIMULATOR_NAME so build-wda.sh can read it Co-Authored-By: Claude Opus 4.6 (1M context) --- .buildkite/commands/run-ai-e2e-tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/commands/run-ai-e2e-tests.sh b/.buildkite/commands/run-ai-e2e-tests.sh index 1006a06fb8fa..022b6bafef02 100755 --- a/.buildkite/commands/run-ai-e2e-tests.sh +++ b/.buildkite/commands/run-ai-e2e-tests.sh @@ -48,7 +48,7 @@ fi # ── Defaults ───────────────────────────────────────────────────────── APP="${APP:-jetpack}" -SIMULATOR_NAME="${SIMULATOR_NAME:-iPhone 16}" +export SIMULATOR_NAME="${SIMULATOR_NAME:-iPhone 16}" TEST_DIR="${TEST_DIR:-Tests/AgentTests/ui-tests}" case "$APP" in From e253eef6cec0bec831bb0468200825c8a90b08c3 Mon Sep 17 00:00:00 2001 From: Ian Maia Date: Thu, 26 Mar 2026 11:27:17 +0100 Subject: [PATCH 07/12] Harden gem-backed AI E2E runner --- .buildkite/commands/build-wda.sh | 62 +++++++++++++++++++---- .buildkite/commands/run-ai-e2e-tests.sh | 49 +++++++++++------- Scripts/ci/find-booted-simulator.rb | 38 ++++++++++++++ Scripts/ci/install-simulator-llm-pilot.sh | 48 ++++++++++++++++++ 4 files changed, 169 insertions(+), 28 deletions(-) create mode 100644 Scripts/ci/find-booted-simulator.rb create mode 100644 Scripts/ci/install-simulator-llm-pilot.sh diff --git a/.buildkite/commands/build-wda.sh b/.buildkite/commands/build-wda.sh index 0445b9bbfab1..5b079b0c6c30 100755 --- a/.buildkite/commands/build-wda.sh +++ b/.buildkite/commands/build-wda.sh @@ -1,27 +1,69 @@ #!/usr/bin/env bash # Clone and build WebDriverAgent for iOS Simulator testing. # -# Skips if WDA is already built at .build/WebDriverAgent/. +# Skips the build only when a usable build-for-testing artifact already exists. # -# Required: +# Required (one of): +# SIMULATOR_UDID Simulator UDID for the build destination # SIMULATOR_NAME Simulator name for the build destination (e.g., iPhone 16) +# +# Optional: +# WEBDRIVERAGENT_REPO_URL Repo URL (default: appium/WebDriverAgent) +# WEBDRIVERAGENT_REF Git ref or commit to build (default: current remote HEAD / existing checkout) set -euo pipefail -SIMULATOR_NAME="${SIMULATOR_NAME:?Set SIMULATOR_NAME}" -WDA_PROJECT=".build/WebDriverAgent/WebDriverAgent.xcodeproj" +if [[ -z "${SIMULATOR_UDID:-}" && -z "${SIMULATOR_NAME:-}" ]]; then + echo "Error: set SIMULATOR_UDID or SIMULATOR_NAME" >&2 + exit 1 +fi + +WDA_DIR=".build/WebDriverAgent" +WDA_PROJECT="${WDA_DIR}/WebDriverAgent.xcodeproj" +WDA_DERIVED_DATA="${WDA_DIR}/DerivedData" +WEBDRIVERAGENT_REPO_URL="${WEBDRIVERAGENT_REPO_URL:-https://github.com/appium/WebDriverAgent.git}" +WEBDRIVERAGENT_REF="${WEBDRIVERAGENT_REF:-}" -if [[ -d "$WDA_PROJECT" ]]; then - echo "WebDriverAgent already built, skipping." - return 0 2>/dev/null || exit 0 +if [[ -n "${SIMULATOR_UDID:-}" ]]; then + DESTINATION="platform=iOS Simulator,id=${SIMULATOR_UDID}" +else + DESTINATION="platform=iOS Simulator,name=${SIMULATOR_NAME}" fi -mkdir -p .build -git clone --depth 1 https://github.com/appium/WebDriverAgent.git .build/WebDriverAgent +ensure_wda_checkout() { + mkdir -p .build + + if [[ ! -d "${WDA_DIR}/.git" ]]; then + git clone --depth 1 "${WEBDRIVERAGENT_REPO_URL}" "${WDA_DIR}" + fi + + if [[ -n "${WEBDRIVERAGENT_REF}" ]]; then + git -C "${WDA_DIR}" fetch --depth 1 origin "${WEBDRIVERAGENT_REF}" + git -C "${WDA_DIR}" checkout --detach "${WEBDRIVERAGENT_REF}" + fi +} + +has_built_artifacts() { + [[ -d "${WDA_DERIVED_DATA}/Build/Products" ]] && \ + find "${WDA_DERIVED_DATA}/Build/Products" -name '*.xctestrun' -print -quit | grep -q . +} + +ensure_wda_checkout + +if [[ -d "$WDA_PROJECT" ]] && has_built_artifacts; then + echo "WebDriverAgent already built, skipping." + exit 0 +fi xcodebuild build-for-testing \ -project "$WDA_PROJECT" \ -scheme WebDriverAgentRunner \ - -destination "platform=iOS Simulator,name=$SIMULATOR_NAME" \ + -destination "$DESTINATION" \ + -derivedDataPath "$WDA_DERIVED_DATA" \ CODE_SIGNING_ALLOWED=NO \ | tail -1 + +if ! has_built_artifacts; then + echo "Error: WebDriverAgent build completed without an .xctestrun artifact" >&2 + exit 1 +fi diff --git a/.buildkite/commands/run-ai-e2e-tests.sh b/.buildkite/commands/run-ai-e2e-tests.sh index 022b6bafef02..f4702762879f 100755 --- a/.buildkite/commands/run-ai-e2e-tests.sh +++ b/.buildkite/commands/run-ai-e2e-tests.sh @@ -18,9 +18,11 @@ # SIMULATOR_LLM_PILOT_APP_PASSWORD WordPress application password # # Optional environment variables: -# APP wordpress | jetpack (default: jetpack) -# SIMULATOR_NAME Simulator to boot if none running (default: iPhone 16) -# TEST_DIR Test directory (default: Tests/AgentTests/ui-tests) +# APP wordpress | jetpack (default: jetpack) +# SIMULATOR_NAME Simulator to boot if none running (default: iPhone 16) +# TEST_DIR Test directory (default: Tests/AgentTests/ui-tests) +# SIMULATOR_LLM_PILOT_REPO_URL Remote repo URL for simulator-llm-pilot +# SIMULATOR_LLM_PILOT_SOURCE_PATH Local source checkout override for simulator-llm-pilot set -euo pipefail @@ -50,6 +52,8 @@ fi APP="${APP:-jetpack}" export SIMULATOR_NAME="${SIMULATOR_NAME:-iPhone 16}" TEST_DIR="${TEST_DIR:-Tests/AgentTests/ui-tests}" +SIMULATOR_LLM_PILOT_REPO_URL="${SIMULATOR_LLM_PILOT_REPO_URL:-https://github.com/Automattic/simulator-llm-pilot.git}" +SIMULATOR_LLM_PILOT_SOURCE_PATH="${SIMULATOR_LLM_PILOT_SOURCE_PATH:-}" case "$APP" in wordpress) BUNDLE_ID="org.wordpress" ;; @@ -69,19 +73,26 @@ fi # ── Install simulator-llm-pilot ────────────────────────────────────── echo "--- Installing simulator-llm-pilot" -GEM_BUILD_DIR="$(mktemp -d)" -git clone --depth 1 https://github.com/Automattic/simulator-llm-pilot.git "$GEM_BUILD_DIR" -pushd "$GEM_BUILD_DIR" -gem build simulator-llm-pilot.gemspec -gem install simulator-llm-pilot-*.gem -popd -rm -rf "$GEM_BUILD_DIR" +bash Scripts/ci/install-simulator-llm-pilot.sh echo "simulator-llm-pilot $(simulator-llm-pilot version)" -# ── Boot simulator and install app (Buildkite only) ────────────────── +# ── Resolve simulator and install app (Buildkite only) ─────────────── echo "--- Setting up Simulator" -xcrun simctl boot "$SIMULATOR_NAME" 2>/dev/null || true -sleep 3 + +UDID="$(ruby Scripts/ci/find-booted-simulator.rb "$SIMULATOR_NAME" 2>/dev/null || true)" +if [[ -z "$UDID" ]]; then + echo "No booted simulator named '$SIMULATOR_NAME' found. Booting..." + xcrun simctl boot "$SIMULATOR_NAME" 2>/dev/null || true + UDID="$(ruby Scripts/ci/find-booted-simulator.rb "$SIMULATOR_NAME" 30 1 2>/dev/null || true)" +fi + +if [[ -z "$UDID" ]]; then + echo "Error: could not find a booted simulator named '$SIMULATOR_NAME'" >&2 + exit 1 +fi + +export SIMULATOR_UDID="$UDID" +echo "Simulator UDID: $UDID" if [[ -n "${BUILDKITE:-}" ]]; then APP_DISPLAY_NAME="Jetpack" @@ -93,7 +104,7 @@ if [[ -n "${BUILDKITE:-}" ]]; then exit 1 fi echo "Installing $APP_PATH on simulator..." - xcrun simctl install booted "$APP_PATH" + xcrun simctl install "$UDID" "$APP_PATH" fi # ── Build WebDriverAgent (if not present) ──────────────────────────── @@ -106,18 +117,20 @@ echo "--- Running AI E2E Tests" TIMESTAMP="$(date +%Y-%m-%d-%H%M)" RESULTS_DIR="Tests/AgentTests/results/${TIMESTAMP}" +EXIT_CODE=0 simulator-llm-pilot run "$TEST_DIR" \ --app-bundle-id "$BUNDLE_ID" \ - --simulator-name "$SIMULATOR_NAME" \ - --results-dir "$RESULTS_DIR" - -EXIT_CODE=$? + --simulator-udid "$UDID" \ + --results-dir "$RESULTS_DIR" \ + || EXIT_CODE=$? # ── Report results ─────────────────────────────────────────────────── echo "--- Results" RESULTS_FILE="${RESULTS_DIR}/results.md" if [[ -f "$RESULTS_FILE" ]]; then cat "$RESULTS_FILE" +else + echo "Warning: no results.md found at $RESULTS_FILE" fi exit "$EXIT_CODE" diff --git a/Scripts/ci/find-booted-simulator.rb b/Scripts/ci/find-booted-simulator.rb new file mode 100644 index 000000000000..67f33992366a --- /dev/null +++ b/Scripts/ci/find-booted-simulator.rb @@ -0,0 +1,38 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require 'json' +require 'open3' + +requested_name = ARGV[0].to_s +wait_seconds = ARGV[1].to_f +poll_interval = ARGV[2].to_f +poll_interval = 1.0 if poll_interval <= 0 +deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + [wait_seconds, 0].max + +loop do + output, status = Open3.capture2('xcrun', 'simctl', 'list', 'devices', 'booted', '-j') + exit 1 unless status.success? + + data = JSON.parse(output) + devices = data.fetch('devices', {}).each_value.flat_map do |list| + list.select { |device| device['state'] == 'Booted' } + end + + device = if requested_name.empty? + devices.first + else + devices.find { |entry| entry['name'] == requested_name } + end + + if device + print(device['udid']) + exit 0 + end + + break if wait_seconds <= 0 || Process.clock_gettime(Process::CLOCK_MONOTONIC) >= deadline + + sleep poll_interval +end + +exit 1 diff --git a/Scripts/ci/install-simulator-llm-pilot.sh b/Scripts/ci/install-simulator-llm-pilot.sh new file mode 100644 index 000000000000..35f3b23bd2d7 --- /dev/null +++ b/Scripts/ci/install-simulator-llm-pilot.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +DEFAULT_LOCAL_GEM_PATH="$(cd "$REPO_ROOT/.." && pwd)/simulator-llm-pilot" + +SIMULATOR_LLM_PILOT_REPO_URL="${SIMULATOR_LLM_PILOT_REPO_URL:-https://github.com/Automattic/simulator-llm-pilot.git}" +SIMULATOR_LLM_PILOT_SOURCE_PATH="${SIMULATOR_LLM_PILOT_SOURCE_PATH:-}" + +build_dir="$(mktemp -d)" +trap 'rm -rf "$build_dir"' EXIT + +source_path="${SIMULATOR_LLM_PILOT_SOURCE_PATH}" +if [[ -z "$source_path" && -f "${DEFAULT_LOCAL_GEM_PATH}/simulator-llm-pilot.gemspec" ]]; then + source_path="${DEFAULT_LOCAL_GEM_PATH}" +fi + +if [[ -n "$source_path" ]]; then + echo "Using local simulator-llm-pilot source at ${source_path}" + if [[ -d "${source_path}/.git" ]]; then + source_revision="$(git -C "${source_path}" rev-parse HEAD)" + git -C "${source_path}" archive HEAD | tar -x -C "$build_dir" + else + source_revision="local-filesystem" + tar -cf - -C "${source_path}" . | tar -xf - -C "$build_dir" + fi +else + echo "Cloning simulator-llm-pilot from ${SIMULATOR_LLM_PILOT_REPO_URL}" + git clone --depth 1 "${SIMULATOR_LLM_PILOT_REPO_URL}" "$build_dir" + source_revision="$(git -C "$build_dir" rev-parse HEAD)" +fi + +pushd "$build_dir" >/dev/null +gem build simulator-llm-pilot.gemspec >/dev/null +shopt -s nullglob +gem_files=(simulator-llm-pilot-*.gem) +shopt -u nullglob + +if [[ ${#gem_files[@]} -ne 1 ]]; then + echo "Error: expected exactly one built simulator-llm-pilot gem, found ${#gem_files[@]}" >&2 + exit 1 +fi + +gem install --no-document --force "./${gem_files[0]}" +popd >/dev/null + +echo "Installed simulator-llm-pilot from ${source_revision}" From c7c52811bdcb8d830763b77fe098c3bcb247f339 Mon Sep 17 00:00:00 2001 From: Ian Maia Date: Thu, 26 Mar 2026 14:21:42 +0100 Subject: [PATCH 08/12] Use APP_BUNDLE_ID consistently --- .buildkite/commands/run-ai-e2e-tests.sh | 6 +++--- .claude/skills/ai-test-runner/SKILL.md | 4 ++-- .claude/skills/ios-sim-navigation/SKILL.md | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.buildkite/commands/run-ai-e2e-tests.sh b/.buildkite/commands/run-ai-e2e-tests.sh index f4702762879f..d780e11d3abb 100755 --- a/.buildkite/commands/run-ai-e2e-tests.sh +++ b/.buildkite/commands/run-ai-e2e-tests.sh @@ -56,8 +56,8 @@ SIMULATOR_LLM_PILOT_REPO_URL="${SIMULATOR_LLM_PILOT_REPO_URL:-https://github.com SIMULATOR_LLM_PILOT_SOURCE_PATH="${SIMULATOR_LLM_PILOT_SOURCE_PATH:-}" case "$APP" in - wordpress) BUNDLE_ID="org.wordpress" ;; - jetpack) BUNDLE_ID="com.automattic.jetpack" ;; + wordpress) APP_BUNDLE_ID="org.wordpress" ;; + jetpack) APP_BUNDLE_ID="com.automattic.jetpack" ;; *) echo "Error: APP must be 'wordpress' or 'jetpack', got '$APP'" >&2; exit 1 ;; esac @@ -119,7 +119,7 @@ RESULTS_DIR="Tests/AgentTests/results/${TIMESTAMP}" EXIT_CODE=0 simulator-llm-pilot run "$TEST_DIR" \ - --app-bundle-id "$BUNDLE_ID" \ + --app-bundle-id "$APP_BUNDLE_ID" \ --simulator-udid "$UDID" \ --results-dir "$RESULTS_DIR" \ || EXIT_CODE=$? diff --git a/.claude/skills/ai-test-runner/SKILL.md b/.claude/skills/ai-test-runner/SKILL.md index c8f611c1dd9d..0828d9640920 100644 --- a/.claude/skills/ai-test-runner/SKILL.md +++ b/.claude/skills/ai-test-runner/SKILL.md @@ -96,7 +96,7 @@ Use the ios-sim-navigation skill for WDA interaction reference. ## Context -- App Bundle ID: +- App Bundle ID: - WDA Session ID: - Simulator UDID: - Test file: (absolute path) @@ -117,7 +117,7 @@ Use the ios-sim-navigation skill for WDA interaction reference. 2. **Relaunch the app** for a clean state: ```bash - xcrun simctl launch --terminate-running-process \ + xcrun simctl launch --terminate-running-process \ -ui-test-site-url \ -ui-test-site-user \ -ui-test-site-pass diff --git a/.claude/skills/ios-sim-navigation/SKILL.md b/.claude/skills/ios-sim-navigation/SKILL.md index 0ec4ab7efab1..96cb6311aeb4 100644 --- a/.claude/skills/ios-sim-navigation/SKILL.md +++ b/.claude/skills/ios-sim-navigation/SKILL.md @@ -366,7 +366,7 @@ If actions consistently fail or the tree looks unexpected, the app may have cras xcrun simctl list devices booted # Re-launch the app -xcrun simctl launch +xcrun simctl launch ``` After re-launching, create a new WDA session before continuing. From 8fda202e8e95d63a5cb1c4f5549660054390e1a3 Mon Sep 17 00:00:00 2001 From: Ian Maia Date: Thu, 26 Mar 2026 14:55:31 +0100 Subject: [PATCH 09/12] Normalize CI site URLs for simulator runs --- .buildkite/commands/run-ai-e2e-tests.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.buildkite/commands/run-ai-e2e-tests.sh b/.buildkite/commands/run-ai-e2e-tests.sh index d780e11d3abb..dae1411ac79d 100755 --- a/.buildkite/commands/run-ai-e2e-tests.sh +++ b/.buildkite/commands/run-ai-e2e-tests.sh @@ -30,6 +30,15 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" cd "$REPO_ROOT" +normalize_site_url() { + local site_url="$1" + if [[ "$site_url" == http://* || "$site_url" == https://* ]]; then + printf '%s' "$site_url" + else + printf 'https://%s' "$site_url" + fi +} + # ── Label gate (Buildkite only) ───────────────────────────────────── if [[ -n "${BUILDKITE_PULL_REQUEST_LABELS:-}" ]]; then echo "--- Checking for 'Testing' label" @@ -47,6 +56,7 @@ fi : "${SIMULATOR_LLM_PILOT_SITE_URL:?Set SIMULATOR_LLM_PILOT_SITE_URL}" : "${SIMULATOR_LLM_PILOT_USERNAME:?Set SIMULATOR_LLM_PILOT_USERNAME}" : "${SIMULATOR_LLM_PILOT_APP_PASSWORD:?Set SIMULATOR_LLM_PILOT_APP_PASSWORD}" +export SIMULATOR_LLM_PILOT_SITE_URL="$(normalize_site_url "$SIMULATOR_LLM_PILOT_SITE_URL")" # ── Defaults ───────────────────────────────────────────────────────── APP="${APP:-jetpack}" From d8caf4626fd5ae5ab36be467f2920c079e9b2369 Mon Sep 17 00:00:00 2001 From: Ian Maia Date: Thu, 26 Mar 2026 17:09:10 +0100 Subject: [PATCH 10/12] Extend AI E2E timeout to 60 minutes --- .buildkite/pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index c51e60eb72e8..0116632a3f22 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -146,7 +146,7 @@ steps: depends_on: "build_jetpack" if: "build.pull_request.id != null" soft_fail: true - timeout_in_minutes: 30 + timeout_in_minutes: 60 plugins: [$CI_TOOLKIT_PLUGIN] env: APP: jetpack From 8e60354680bf4ef9ecf2e521721d84c25b3665ce Mon Sep 17 00:00:00 2001 From: Ian Maia Date: Thu, 26 Mar 2026 23:30:22 +0100 Subject: [PATCH 11/12] Fix Rubocop errors --- Scripts/ci/find-booted-simulator.rb | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 Scripts/ci/find-booted-simulator.rb diff --git a/Scripts/ci/find-booted-simulator.rb b/Scripts/ci/find-booted-simulator.rb old mode 100644 new mode 100755 From 146daa1079d35503c816dd81319f6f4c0d866eb0 Mon Sep 17 00:00:00 2001 From: Ian Maia Date: Tue, 31 Mar 2026 19:31:09 +0200 Subject: [PATCH 12/12] Pass app instructions and name to simulator-llm-pilot The gem no longer hardcodes WordPress login flow in its system prompt. Add app-instructions.md with the WordPress/Jetpack login flow and pass it via --app-instructions-file. Also pass --app-name so the LLM knows the app's display name. Co-Authored-By: Claude Opus 4.6 (1M context) --- .buildkite/commands/run-ai-e2e-tests.sh | 11 ++++++----- Tests/AgentTests/app-instructions.md | 13 +++++++++++++ 2 files changed, 19 insertions(+), 5 deletions(-) create mode 100644 Tests/AgentTests/app-instructions.md diff --git a/.buildkite/commands/run-ai-e2e-tests.sh b/.buildkite/commands/run-ai-e2e-tests.sh index dae1411ac79d..21a03fde6c6c 100755 --- a/.buildkite/commands/run-ai-e2e-tests.sh +++ b/.buildkite/commands/run-ai-e2e-tests.sh @@ -66,11 +66,13 @@ SIMULATOR_LLM_PILOT_REPO_URL="${SIMULATOR_LLM_PILOT_REPO_URL:-https://github.com SIMULATOR_LLM_PILOT_SOURCE_PATH="${SIMULATOR_LLM_PILOT_SOURCE_PATH:-}" case "$APP" in - wordpress) APP_BUNDLE_ID="org.wordpress" ;; - jetpack) APP_BUNDLE_ID="com.automattic.jetpack" ;; + wordpress) APP_BUNDLE_ID="org.wordpress"; APP_DISPLAY_NAME="WordPress" ;; + jetpack) APP_BUNDLE_ID="com.automattic.jetpack"; APP_DISPLAY_NAME="Jetpack" ;; *) echo "Error: APP must be 'wordpress' or 'jetpack', got '$APP'" >&2; exit 1 ;; esac +APP_INSTRUCTIONS_FILE="${REPO_ROOT}/Tests/AgentTests/app-instructions.md" + # ── Artifact download (Buildkite only) ─────────────────────────────── if [[ -n "${BUILDKITE:-}" ]]; then echo "--- Downloading Build Artifacts" @@ -105,9 +107,6 @@ export SIMULATOR_UDID="$UDID" echo "Simulator UDID: $UDID" if [[ -n "${BUILDKITE:-}" ]]; then - APP_DISPLAY_NAME="Jetpack" - [[ "$APP" = "wordpress" ]] && APP_DISPLAY_NAME="WordPress" - APP_PATH=$(find DerivedData/Build/Products -name "${APP_DISPLAY_NAME}.app" -path "*Debug-iphonesimulator*" | head -1) if [[ -z "$APP_PATH" ]]; then echo "Error: ${APP_DISPLAY_NAME}.app not found in build products" >&2 @@ -130,6 +129,8 @@ RESULTS_DIR="Tests/AgentTests/results/${TIMESTAMP}" EXIT_CODE=0 simulator-llm-pilot run "$TEST_DIR" \ --app-bundle-id "$APP_BUNDLE_ID" \ + --app-name "$APP_DISPLAY_NAME" \ + --app-instructions-file "$APP_INSTRUCTIONS_FILE" \ --simulator-udid "$UDID" \ --results-dir "$RESULTS_DIR" \ || EXIT_CODE=$? diff --git a/Tests/AgentTests/app-instructions.md b/Tests/AgentTests/app-instructions.md new file mode 100644 index 000000000000..fc9bc168c3b3 --- /dev/null +++ b/Tests/AgentTests/app-instructions.md @@ -0,0 +1,13 @@ +## Login + +This app uses a self-hosted WordPress site login flow. The app password is +passed via launch arguments — NEVER type a password manually. + +- NEVER tap "Continue with WordPress.com", NEVER enter WordPress.com + email/password, and NEVER request a login link. +- Tap "Enter your existing site address", then enter the site host first + (without scheme, for example `example.com`). If the app rejects the + host-only form, try the full site URL once. +- If you reach any WordPress.com email/password screen, back out and + return to the self-hosted flow. +- If the app is already logged in (e.g., My Site tab visible), skip login.