diff --git a/CLAUDE.md b/CLAUDE.md index f74cb95..f7a17c2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -106,6 +106,7 @@ Both automation servers expose `GET /health` and `POST /jsonrpc` (JSON-RPC 2.0) | `android_input_text` | Type text into the currently focused element | | `android_press_back` | Press the back button | | `android_press_home` | Press the home button | +| `android_screenshot` | Capture the device display and save as a PNG file (optional `outputPath`; defaults to `./screenshots/` in the project's CWD) | ### UI Automation (iOS) | Tool | Description | @@ -121,6 +122,7 @@ Both automation servers expose `GET /health` and `POST /jsonrpc` (JSON-RPC 2.0) | `ios_get_device_info` | Get display size, rotation, and iOS version | | `ios_input_text` | Type text into the currently focused element | | `ios_press_home` | Press home button | +| `ios_screenshot` | Capture the simulator display and save as a PNG file (optional `outputPath`; defaults to `./screenshots/` in the project's CWD) | | `ios_stop_automation_server` | Stop the running XCUITest server | ### Typical Automation Workflow diff --git a/app/src/main/kotlin/com/example/visiontest/android/AutomationClient.kt b/app/src/main/kotlin/com/example/visiontest/android/AutomationClient.kt index 88be909..a31eec1 100644 --- a/app/src/main/kotlin/com/example/visiontest/android/AutomationClient.kt +++ b/app/src/main/kotlin/com/example/visiontest/android/AutomationClient.kt @@ -95,6 +95,14 @@ class AutomationClient( return sendRequest("ui.dumpHierarchy") } + /** + * Captures the current device display as a PNG and returns the raw JSON-RPC response. + * The response `result` contains `success: Boolean`, `pngBase64: String?`, and `error: String?`. + */ + suspend fun screenshot(): String { + return sendRequest("ui.screenshot") + } + /** * Gets device information. */ diff --git a/app/src/main/kotlin/com/example/visiontest/ios/IOSAutomationClient.kt b/app/src/main/kotlin/com/example/visiontest/ios/IOSAutomationClient.kt index 776ed3c..535d79e 100644 --- a/app/src/main/kotlin/com/example/visiontest/ios/IOSAutomationClient.kt +++ b/app/src/main/kotlin/com/example/visiontest/ios/IOSAutomationClient.kt @@ -181,6 +181,14 @@ class IOSAutomationClient( return sendRequest("device.pressHome") } + /** + * Captures a screenshot of the current simulator display. + * Returns the raw JSON-RPC response containing a base64-encoded PNG in `result.pngBase64`. + */ + suspend fun screenshot(): String { + return sendRequest("ui.screenshot") + } + /** * Finds an element by selector. Returns element info if found. * @param text Exact text match diff --git a/app/src/main/kotlin/com/example/visiontest/tools/AndroidAutomationToolRegistrar.kt b/app/src/main/kotlin/com/example/visiontest/tools/AndroidAutomationToolRegistrar.kt index b562a33..81f91ca 100644 --- a/app/src/main/kotlin/com/example/visiontest/tools/AndroidAutomationToolRegistrar.kt +++ b/app/src/main/kotlin/com/example/visiontest/tools/AndroidAutomationToolRegistrar.kt @@ -5,10 +5,18 @@ import com.example.visiontest.android.AutomationClient import com.example.visiontest.common.DeviceConfig import com.example.visiontest.config.AutomationConfig import com.example.visiontest.discovery.ToolDiscovery +import com.google.gson.JsonParser import io.modelcontextprotocol.kotlin.sdk.Tool import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.delay import kotlinx.coroutines.withContext +import java.io.File +import java.io.IOException +import java.nio.file.Files +import java.nio.file.StandardCopyOption +import java.time.LocalDateTime +import java.time.format.DateTimeFormatter +import java.util.Base64 class AndroidAutomationToolRegistrar( private val android: DeviceConfig, @@ -31,6 +39,7 @@ class AndroidAutomationToolRegistrar( registerInputText(scope) registerGetDeviceInfo(scope) registerGetInteractiveElements(scope) + registerScreenshot(scope) } private fun registerInstallAutomationServer(scope: ToolScope) { @@ -531,4 +540,161 @@ class AndroidAutomationToolRegistrar( automationClient.getInteractiveElements(includeDisabled) } } + + private fun registerScreenshot(scope: ToolScope) { + scope.tool( + name = "android_screenshot", + description = """ + Captures a screenshot of the current Android device display and saves it as a PNG file on the host. + The automation server must be running first (use start_automation_server). + + OPTIONAL PARAMETERS: + - outputPath: Absolute or relative path where the PNG will be written. + Relative paths resolve against the MCP server's working directory (typically the + user's current project). If the file already exists it will be overwritten. + Missing parent directories are created automatically. + If omitted, saves to ./screenshots/android_screenshot_.png relative to + the server's working directory (i.e. the current project, not the visiontest install dir). + + Returns the absolute path of the saved PNG. + """.trimIndent(), + timeoutMs = 30000 + ) { request -> + captureScreenshot(request.optionalString("outputPath")) + } + } + + internal suspend fun captureScreenshot(outputPath: String?): String { + if (!automationClient.isServerRunning()) { + return "Automation server is not running. Use 'start_automation_server' first." + } + + val response = automationClient.screenshot() + val root = try { + JsonParser.parseString(response).asJsonObject + } catch (e: Exception) { + return "Screenshot failed: unable to parse response from Android automation server (${e.message})." + } + + val errorElement = root.get("error") + if (errorElement != null && !errorElement.isJsonNull) { + if (errorElement.isJsonObject) { + val errorObj = errorElement.asJsonObject + val codeElement = errorObj.get("code") + val code = if (codeElement?.isJsonPrimitive == true && codeElement.asJsonPrimitive.isNumber) { + codeElement.asInt + } else null + val messageElement = errorObj.get("message") + val message = if (messageElement?.isJsonPrimitive == true && messageElement.asJsonPrimitive.isString) { + messageElement.asString + } else "unknown error" + if (code == JSON_RPC_METHOD_NOT_FOUND) { + return "Screenshot failed: the Android automation server does not recognize 'ui.screenshot' " + + "(JSON-RPC methodNotFound). This indicates an outdated Android automation server APK " + + "— rebuild from source or update the installed APK." + } + return if (code != null) { + "Screenshot failed: Android automation server returned error ($code): $message" + } else { + "Screenshot failed: Android automation server returned an error: $message" + } + } + return "Screenshot failed: Android automation server returned a malformed error envelope." + } + + val resultElement = root.get("result") + if (resultElement == null || resultElement.isJsonNull) { + return "Screenshot failed: response missing 'result' object." + } + if (!resultElement.isJsonObject) { + return "Screenshot failed: response 'result' is not a JSON object." + } + val result = resultElement.asJsonObject + + val successElement = result.get("success") + if (successElement == null || successElement.isJsonNull || !successElement.isJsonPrimitive) { + return "Screenshot failed: response 'result' has a missing or non-primitive 'success' field." + } + val successPrimitive = successElement.asJsonPrimitive + if (!successPrimitive.isBoolean) { + return "Screenshot failed: response 'result.success' is not a boolean (got: $successElement)." + } + if (!successPrimitive.asBoolean) { + val errorElement = result.get("error") + val error = if (errorElement != null && !errorElement.isJsonNull && errorElement.isJsonPrimitive && errorElement.asJsonPrimitive.isString) { + errorElement.asString + } else { + "unknown error" + } + return "Screenshot failed on the Android automation server: $error" + } + + val pngBase64Element = result.get("pngBase64") + if (pngBase64Element == null || pngBase64Element.isJsonNull) { + return "Screenshot failed: response missing 'pngBase64'. This may indicate an outdated Android automation server APK — rebuild from source or update the installed APK." + } + if (!pngBase64Element.isJsonPrimitive || !pngBase64Element.asJsonPrimitive.isString) { + return "Screenshot failed: response 'result.pngBase64' is not a string (got: $pngBase64Element)." + } + val pngBase64 = pngBase64Element.asString + if (pngBase64.isEmpty()) { + return "Screenshot failed: response missing 'pngBase64'. This may indicate an outdated Android automation server APK — rebuild from source or update the installed APK." + } + + val targetFile = resolveScreenshotPath(outputPath) + return writeScreenshot(targetFile, pngBase64) + } + + internal fun resolveScreenshotPath(outputPath: String?): File { + if (outputPath != null && outputPath.isNotBlank()) { + return File(outputPath).absoluteFile + } + val timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")) + return File("screenshots/android_screenshot_$timestamp.png").absoluteFile + } + + internal suspend fun writeScreenshot(target: File, pngBase64: String): String = withContext(Dispatchers.IO) { + val bytes = try { + Base64.getDecoder().decode(pngBase64) + } catch (e: IllegalArgumentException) { + return@withContext "Screenshot failed: Android automation server returned invalid base64 PNG data (${e.message})." + } + + val targetPath = target.toPath() + val parentDir = target.parentFile + ?: return@withContext "Screenshot failed: cannot determine parent directory for ${target.absolutePath}." + + try { + Files.createDirectories(parentDir.toPath()) + } catch (e: IOException) { + return@withContext "Screenshot failed: unable to create parent directory ${parentDir.absolutePath} (${e.message})." + } + + val tempFile = try { + Files.createTempFile(parentDir.toPath(), ".android_screenshot_", ".png.tmp") + } catch (e: IOException) { + return@withContext "Screenshot failed: unable to create temp file in ${parentDir.absolutePath} (${e.message})." + } + + try { + Files.write(tempFile, bytes) + try { + Files.move(tempFile, targetPath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE) + } catch (_: java.nio.file.AtomicMoveNotSupportedException) { + Files.move(tempFile, targetPath, StandardCopyOption.REPLACE_EXISTING) + } + "Screenshot saved to ${target.absolutePath}" + } catch (e: IOException) { + runCatching { Files.deleteIfExists(tempFile) } + "Screenshot failed: unable to write PNG to ${target.absolutePath} (${e.message})." + } catch (e: Exception) { + runCatching { Files.deleteIfExists(tempFile) } + throw e + } + } + + companion object { + /** Standard JSON-RPC 2.0 error code for an unknown method. */ + private const val JSON_RPC_METHOD_NOT_FOUND = -32601 + } } diff --git a/app/src/main/kotlin/com/example/visiontest/tools/IOSAutomationToolRegistrar.kt b/app/src/main/kotlin/com/example/visiontest/tools/IOSAutomationToolRegistrar.kt index 55fb5e8..d5c7764 100644 --- a/app/src/main/kotlin/com/example/visiontest/tools/IOSAutomationToolRegistrar.kt +++ b/app/src/main/kotlin/com/example/visiontest/tools/IOSAutomationToolRegistrar.kt @@ -4,11 +4,19 @@ import com.example.visiontest.common.DeviceConfig import com.example.visiontest.config.IOSAutomationConfig import com.example.visiontest.discovery.ToolDiscovery import com.example.visiontest.ios.IOSAutomationClient +import com.google.gson.JsonParser import io.modelcontextprotocol.kotlin.sdk.Tool import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.delay import kotlinx.coroutines.withContext import org.slf4j.Logger +import java.io.File +import java.io.IOException +import java.nio.file.Files +import java.nio.file.StandardCopyOption +import java.time.LocalDateTime +import java.time.format.DateTimeFormatter +import java.util.Base64 class IOSAutomationToolRegistrar( private val ios: DeviceConfig, @@ -32,6 +40,7 @@ class IOSAutomationToolRegistrar( registerGetDeviceInfo(scope) registerPressHome(scope) registerInputText(scope) + registerScreenshot(scope) registerStopAutomationServer(scope) } @@ -467,6 +476,179 @@ class IOSAutomationToolRegistrar( } } + private fun registerScreenshot(scope: ToolScope) { + scope.tool( + name = "ios_screenshot", + description = """ + Captures a screenshot of the current iOS simulator display and saves it as a PNG file on the host. + The iOS automation server must be running first (use ios_start_automation_server). + + OPTIONAL PARAMETERS: + - outputPath: Absolute or relative path where the PNG will be written. + Relative paths resolve against the MCP server's working directory (typically the + user's current project). If the file already exists it will be overwritten. + Missing parent directories are created automatically. + If omitted, saves to ./screenshots/ios_screenshot_.png relative to + the server's working directory (i.e. the current project, not the visiontest install dir). + + Returns the absolute path of the saved PNG. + """.trimIndent(), + timeoutMs = 30000 + ) { request -> + captureScreenshot(request.optionalString("outputPath")) + } + } + + internal suspend fun captureScreenshot(outputPath: String?): String { + if (!iosAutomationClient.isServerRunning()) { + return "iOS automation server is not running. Use 'ios_start_automation_server' first." + } + + val response = iosAutomationClient.screenshot() + val root = try { + JsonParser.parseString(response).asJsonObject + } catch (e: Exception) { + return "Screenshot failed: unable to parse response from iOS automation server (${e.message})." + } + + // JSON-RPC 2.0 envelope: either `result` OR `error` is present at the top level. + // Check `error` first so we can surface the server's message and map `methodNotFound` + // to the outdated-bundle guidance (older bundles won't know about `ui.screenshot`). + val errorElement = root.get("error") + if (errorElement != null && !errorElement.isJsonNull) { + if (errorElement.isJsonObject) { + val errorObj = errorElement.asJsonObject + val codeElement = errorObj.get("code") + val code = if (codeElement?.isJsonPrimitive == true && codeElement.asJsonPrimitive.isNumber) { + codeElement.asInt + } else null + val messageElement = errorObj.get("message") + val message = if (messageElement?.isJsonPrimitive == true) { + messageElement.asString + } else "unknown error" + if (code == JSON_RPC_METHOD_NOT_FOUND) { + return "Screenshot failed: the iOS automation server does not recognize 'ui.screenshot' " + + "(JSON-RPC methodNotFound). This indicates an outdated iOS automation server bundle " + + "— rebuild from source or update the installed bundle." + } + return if (code != null) { + "Screenshot failed: iOS automation server returned error ($code): $message" + } else { + "Screenshot failed: iOS automation server returned an error: $message" + } + } + return "Screenshot failed: iOS automation server returned a malformed error envelope." + } + + val resultElement = root.get("result") + if (resultElement == null || resultElement.isJsonNull) { + return "Screenshot failed: response missing 'result' object." + } + if (!resultElement.isJsonObject) { + return "Screenshot failed: response 'result' is not a JSON object." + } + val result = resultElement.asJsonObject + + val successElement = result.get("success") + if (successElement == null || successElement.isJsonNull || !successElement.isJsonPrimitive) { + return "Screenshot failed: response 'result' has a missing or non-primitive 'success' field." + } + val successPrimitive = successElement.asJsonPrimitive + if (!successPrimitive.isBoolean) { + return "Screenshot failed: response 'result.success' is not a boolean (got: $successElement)." + } + if (!successPrimitive.asBoolean) { + val errorElement = result.get("error") + val error = if (errorElement != null && !errorElement.isJsonNull && errorElement.isJsonPrimitive && errorElement.asJsonPrimitive.isString) { + errorElement.asString + } else { + "unknown error" + } + return "Screenshot failed on the iOS automation server: $error" + } + + val pngBase64Element = result.get("pngBase64") + if (pngBase64Element == null || pngBase64Element.isJsonNull) { + return "Screenshot failed: response missing 'pngBase64'. This may indicate an outdated iOS automation server bundle — rebuild from source or update the installed bundle." + } + if (!pngBase64Element.isJsonPrimitive || !pngBase64Element.asJsonPrimitive.isString) { + return "Screenshot failed: response 'result.pngBase64' is not a string (got: $pngBase64Element)." + } + val pngBase64 = pngBase64Element.asString + if (pngBase64.isEmpty()) { + return "Screenshot failed: response missing 'pngBase64'. This may indicate an outdated iOS automation server bundle — rebuild from source or update the installed bundle." + } + + val targetFile = resolveScreenshotPath(outputPath) + return writeScreenshot(targetFile, pngBase64) + } + + internal fun resolveScreenshotPath(outputPath: String?): File { + if (outputPath != null && outputPath.isNotBlank()) { + return File(outputPath).absoluteFile + } + val timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")) + // Default to the MCP server's working directory so screenshots land in the + // user's current project (not the visiontest install dir). Coding agents like + // Claude Code launch the server with CWD set to the project they're working on. + return File("screenshots/ios_screenshot_$timestamp.png").absoluteFile + } + + /** + * Decodes the base64 PNG and writes it atomically to [target]. + * Runs on Dispatchers.IO so we don't block the tool handler's coroutine context. + * Writes to a sibling temp file first, then moves into place so a failure or cancellation + * mid-write cannot leave a partial PNG at [target]. + * + * Returns a user-facing result string (success or a specific error message). + */ + internal suspend fun writeScreenshot(target: File, pngBase64: String): String = withContext(Dispatchers.IO) { + val bytes = try { + Base64.getDecoder().decode(pngBase64) + } catch (e: IllegalArgumentException) { + return@withContext "Screenshot failed: iOS automation server returned invalid base64 PNG data (${e.message})." + } + + val targetPath = target.toPath() + val parentDir = target.parentFile + ?: return@withContext "Screenshot failed: cannot determine parent directory for ${target.absolutePath}." + + try { + Files.createDirectories(parentDir.toPath()) + } catch (e: IOException) { + return@withContext "Screenshot failed: unable to create parent directory ${parentDir.absolutePath} (${e.message})." + } + + val tempFile = try { + Files.createTempFile(parentDir.toPath(), ".ios_screenshot_", ".png.tmp") + } catch (e: IOException) { + return@withContext "Screenshot failed: unable to create temp file in ${parentDir.absolutePath} (${e.message})." + } + + try { + Files.write(tempFile, bytes) + // ATOMIC_MOVE isn't guaranteed across filesystems, but tempFile is a sibling of + // target so they're on the same FS. Fall back to plain replace on rare failures. + try { + Files.move(tempFile, targetPath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE) + } catch (_: java.nio.file.AtomicMoveNotSupportedException) { + Files.move(tempFile, targetPath, StandardCopyOption.REPLACE_EXISTING) + } + "Screenshot saved to ${target.absolutePath}" + } catch (e: IOException) { + runCatching { Files.deleteIfExists(tempFile) } + "Screenshot failed: unable to write PNG to ${target.absolutePath} (${e.message})." + } catch (e: Exception) { + runCatching { Files.deleteIfExists(tempFile) } + throw e + } + } + + companion object { + /** Standard JSON-RPC 2.0 error code for an unknown method. */ + private const val JSON_RPC_METHOD_NOT_FOUND = -32601 + } + private fun registerStopAutomationServer(scope: ToolScope) { scope.tool( name = "ios_stop_automation_server", diff --git a/app/src/test/kotlin/com/example/visiontest/tools/AndroidScreenshotToolTest.kt b/app/src/test/kotlin/com/example/visiontest/tools/AndroidScreenshotToolTest.kt new file mode 100644 index 0000000..2402eb9 --- /dev/null +++ b/app/src/test/kotlin/com/example/visiontest/tools/AndroidScreenshotToolTest.kt @@ -0,0 +1,265 @@ +package com.example.visiontest.tools + +import com.example.visiontest.android.AutomationClient +import com.example.visiontest.common.DeviceConfig +import com.example.visiontest.discovery.ToolDiscovery +import kotlinx.coroutines.runBlocking +import okhttp3.mockwebserver.MockResponse +import okhttp3.mockwebserver.MockWebServer +import org.slf4j.LoggerFactory +import java.io.File +import java.nio.file.Files +import java.util.Base64 +import kotlin.test.* + +class AndroidScreenshotToolTest { + + // Smallest valid PNG (1x1 transparent pixel) used as fixture. + // Bytes: 89 50 4E 47 0D 0A 1A 0A ... (PNG magic header) + private val fixturePngBase64 = + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=" + private val fixturePngBytes = Base64.getDecoder().decode(fixturePngBase64) + + private lateinit var mockServer: MockWebServer + private lateinit var registrar: AndroidAutomationToolRegistrar + private lateinit var tempDir: File + + private val logger = LoggerFactory.getLogger(AndroidScreenshotToolTest::class.java) + private val fakeDeviceConfig = object : DeviceConfig { + override suspend fun listDevices() = emptyList() + override suspend fun getFirstAvailableDevice(): com.example.visiontest.common.MobileDevice = + throw UnsupportedOperationException() + override suspend fun listApps(deviceId: String?) = emptyList() + override suspend fun getAppInfo(packageName: String, deviceId: String?) = "" + override suspend fun launchApp(packageName: String, activityName: String?, deviceId: String?) = false + override suspend fun executeShell(command: String, deviceId: String?) = "" + } + + @BeforeTest + fun setUp() { + mockServer = MockWebServer() + mockServer.start() + val client = AutomationClient(host = mockServer.hostName, port = mockServer.port) + registrar = AndroidAutomationToolRegistrar(fakeDeviceConfig, client, ToolDiscovery(logger)) + tempDir = Files.createTempDirectory("android-screenshot-test").toFile() + } + + @AfterTest + fun tearDown() { + mockServer.shutdown() + tempDir.deleteRecursively() + } + + // --- resolveScreenshotPath --- + + @Test + fun `default path places timestamped filename under screenshots in CWD`() { + val resolved = registrar.resolveScreenshotPath(null) + assertEquals("screenshots", resolved.parentFile.name) + assertTrue( + resolved.name.matches(Regex("""android_screenshot_\d{8}_\d{6}\.png""")), + "Expected default filename to match android_screenshot_.png, got ${resolved.name}" + ) + assertTrue(resolved.isAbsolute, "Default path should be absolute") + // Default must be relative to the JVM's working directory (the user's project when + // launched by a coding agent), NOT the visiontest install dir. + val expectedRoot = File(System.getProperty("user.dir")).absoluteFile + assertEquals(expectedRoot, resolved.parentFile.parentFile) + } + + @Test + fun `explicit outputPath is used verbatim`() { + val explicit = File(tempDir, "custom/shot.png") + val resolved = registrar.resolveScreenshotPath(explicit.absolutePath) + assertEquals(explicit.absolutePath, resolved.absolutePath) + } + + @Test + fun `blank outputPath falls back to default`() { + val resolved = registrar.resolveScreenshotPath(" ") + assertTrue(resolved.name.startsWith("android_screenshot_")) + } + + // --- captureScreenshot end-to-end via MockWebServer --- + + @Test + fun `successful capture writes decoded PNG bytes to target path`() = runBlocking { + enqueueHealthOk() + enqueueScreenshotResult(successBody(fixturePngBase64)) + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue(message.contains("Screenshot saved to ${target.absolutePath}"), "Got: $message") + assertTrue(target.exists(), "PNG file should be written") + assertContentEquals(fixturePngBytes, target.readBytes()) + } + + @Test + fun `missing parent directories are created automatically`() = runBlocking { + enqueueHealthOk() + enqueueScreenshotResult(successBody(fixturePngBase64)) + + val nested = File(tempDir, "a/b/c/out.png") + assertFalse(nested.parentFile.exists(), "precondition: parent should not exist") + + registrar.captureScreenshot(nested.absolutePath) + + assertTrue(nested.exists()) + assertContentEquals(fixturePngBytes, nested.readBytes()) + } + + @Test + fun `server not running short-circuits with no file write`() = runBlocking { + // Health check fails with 500; no second request should be made. + mockServer.enqueue(MockResponse().setResponseCode(500)) + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue(message.contains("Automation server is not running")) + assertFalse(target.exists(), "No file should be written when server is not running") + assertEquals(1, mockServer.requestCount, "Only the health check should have been attempted") + } + + @Test + fun `success false in response surfaces as error and writes no file`() = runBlocking { + enqueueHealthOk() + enqueueScreenshotResult("""{"success":false,"error":"capture failed"}""") + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue(message.contains("Screenshot failed on the Android automation server: capture failed"), "Got: $message") + assertFalse(target.exists()) + } + + @Test + fun `missing pngBase64 surfaces outdated-APK hint and writes no file`() = runBlocking { + enqueueHealthOk() + enqueueScreenshotResult("""{"success":true}""") + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue(message.contains("response missing 'pngBase64'"), "Got: $message") + assertTrue(message.contains("outdated Android automation server APK"), "Got: $message") + assertFalse(target.exists()) + } + + @Test + fun `missing result object surfaces error and writes no file`() = runBlocking { + enqueueHealthOk() + // Response with no "result" wrapper at all. + mockServer.enqueue(MockResponse().setBody("""{"jsonrpc":"2.0","id":1}""")) + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue(message.contains("response missing 'result' object"), "Got: $message") + assertFalse(target.exists()) + } + + @Test + fun `malformed JSON response surfaces parse error and writes no file`() = runBlocking { + enqueueHealthOk() + mockServer.enqueue(MockResponse().setBody("not json")) + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue( + message.contains("unable to parse response from Android automation server"), + "Got: $message" + ) + assertFalse(target.exists()) + } + + @Test + fun `JSON-RPC methodNotFound maps to outdated-APK guidance`() = runBlocking { + enqueueHealthOk() + // Standard JSON-RPC 2.0 error envelope for methodNotFound (code -32601). + // This is what an older pre-built APK would return for ui.screenshot. + mockServer.enqueue( + MockResponse().setBody( + """{"jsonrpc":"2.0","error":{"code":-32601,"message":"Method not found: ui.screenshot"},"id":1}""" + ) + ) + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue(message.contains("methodNotFound"), "Got: $message") + assertTrue(message.contains("outdated Android automation server APK"), "Got: $message") + assertFalse(target.exists()) + } + + @Test + fun `other JSON-RPC errors surface code and message`() = runBlocking { + enqueueHealthOk() + mockServer.enqueue( + MockResponse().setBody( + """{"jsonrpc":"2.0","error":{"code":-32000,"message":"UIAutomator crashed"},"id":1}""" + ) + ) + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue(message.contains("-32000"), "Got: $message") + assertTrue(message.contains("UIAutomator crashed"), "Got: $message") + assertFalse(target.exists()) + } + + @Test + fun `result present but not a JSON object is rejected gracefully`() = runBlocking { + enqueueHealthOk() + mockServer.enqueue(MockResponse().setBody("""{"jsonrpc":"2.0","result":"oops","id":1}""")) + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue(message.contains("'result' is not a JSON object"), "Got: $message") + assertFalse(target.exists()) + } + + @Test + fun `invalid base64 in pngBase64 surfaces a decode error and writes no file`() = runBlocking { + enqueueHealthOk() + enqueueScreenshotResult(successBody("!!!not-base64!!!")) + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue(message.contains("invalid base64 PNG data"), "Got: $message") + assertFalse(target.exists()) + } + + @Test + fun `atomic write leaves no temp file on success`() = runBlocking { + enqueueHealthOk() + enqueueScreenshotResult(successBody(fixturePngBase64)) + + val target = File(tempDir, "out.png") + registrar.captureScreenshot(target.absolutePath) + + // Confirm the final file exists and no .png.tmp sidecar was left behind + assertTrue(target.exists()) + val leftovers = tempDir.listFiles { f -> f.name.endsWith(".png.tmp") }.orEmpty() + assertEquals(0, leftovers.size, "Temp files left behind: ${leftovers.map { it.name }}") + } + + // --- helpers --- + + private fun enqueueHealthOk() { + mockServer.enqueue(MockResponse().setResponseCode(200).setBody("OK")) + } + + private fun enqueueScreenshotResult(innerResultJson: String) { + val body = """{"jsonrpc":"2.0","result":$innerResultJson,"id":1}""" + mockServer.enqueue(MockResponse().setBody(body)) + } + + private fun successBody(pngBase64: String): String = + """{"success":true,"pngBase64":"$pngBase64"}""" +} diff --git a/app/src/test/kotlin/com/example/visiontest/tools/IOSScreenshotToolTest.kt b/app/src/test/kotlin/com/example/visiontest/tools/IOSScreenshotToolTest.kt new file mode 100644 index 0000000..9f8765d --- /dev/null +++ b/app/src/test/kotlin/com/example/visiontest/tools/IOSScreenshotToolTest.kt @@ -0,0 +1,265 @@ +package com.example.visiontest.tools + +import com.example.visiontest.common.DeviceConfig +import com.example.visiontest.discovery.ToolDiscovery +import com.example.visiontest.ios.IOSAutomationClient +import kotlinx.coroutines.runBlocking +import okhttp3.mockwebserver.MockResponse +import okhttp3.mockwebserver.MockWebServer +import org.slf4j.LoggerFactory +import java.io.File +import java.nio.file.Files +import java.util.Base64 +import kotlin.test.* + +class IOSScreenshotToolTest { + + // Smallest valid PNG (1x1 transparent pixel) used as fixture. + // Bytes: 89 50 4E 47 0D 0A 1A 0A ... (PNG magic header) + private val fixturePngBase64 = + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII=" + private val fixturePngBytes = Base64.getDecoder().decode(fixturePngBase64) + + private lateinit var mockServer: MockWebServer + private lateinit var registrar: IOSAutomationToolRegistrar + private lateinit var tempDir: File + + private val logger = LoggerFactory.getLogger(IOSScreenshotToolTest::class.java) + private val fakeDeviceConfig = object : DeviceConfig { + override suspend fun listDevices() = emptyList() + override suspend fun getFirstAvailableDevice(): com.example.visiontest.common.MobileDevice = + throw UnsupportedOperationException() + override suspend fun listApps(deviceId: String?) = emptyList() + override suspend fun getAppInfo(packageName: String, deviceId: String?) = "" + override suspend fun launchApp(packageName: String, activityName: String?, deviceId: String?) = false + override suspend fun executeShell(command: String, deviceId: String?) = "" + } + + @BeforeTest + fun setUp() { + mockServer = MockWebServer() + mockServer.start() + val client = IOSAutomationClient(host = mockServer.hostName, port = mockServer.port) + registrar = IOSAutomationToolRegistrar(fakeDeviceConfig, client, ToolDiscovery(logger), logger) + tempDir = Files.createTempDirectory("ios-screenshot-test").toFile() + } + + @AfterTest + fun tearDown() { + mockServer.shutdown() + tempDir.deleteRecursively() + } + + // --- resolveScreenshotPath --- + + @Test + fun `default path places timestamped filename under screenshots in CWD`() { + val resolved = registrar.resolveScreenshotPath(null) + assertEquals("screenshots", resolved.parentFile.name) + assertTrue( + resolved.name.matches(Regex("""ios_screenshot_\d{8}_\d{6}\.png""")), + "Expected default filename to match ios_screenshot_.png, got ${resolved.name}" + ) + assertTrue(resolved.isAbsolute, "Default path should be absolute") + // Default must be relative to the JVM's working directory (the user's project when + // launched by a coding agent), NOT the visiontest install dir. + val expectedRoot = File(System.getProperty("user.dir")).absoluteFile + assertEquals(expectedRoot, resolved.parentFile.parentFile) + } + + @Test + fun `explicit outputPath is used verbatim`() { + val explicit = File(tempDir, "custom/shot.png") + val resolved = registrar.resolveScreenshotPath(explicit.absolutePath) + assertEquals(explicit.absolutePath, resolved.absolutePath) + } + + @Test + fun `blank outputPath falls back to default`() { + val resolved = registrar.resolveScreenshotPath(" ") + assertTrue(resolved.name.startsWith("ios_screenshot_")) + } + + // --- captureScreenshot end-to-end via MockWebServer --- + + @Test + fun `successful capture writes decoded PNG bytes to target path`() = runBlocking { + enqueueHealthOk() + enqueueScreenshotResult(successBody(fixturePngBase64)) + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue(message.contains("Screenshot saved to ${target.absolutePath}"), "Got: $message") + assertTrue(target.exists(), "PNG file should be written") + assertContentEquals(fixturePngBytes, target.readBytes()) + } + + @Test + fun `missing parent directories are created automatically`() = runBlocking { + enqueueHealthOk() + enqueueScreenshotResult(successBody(fixturePngBase64)) + + val nested = File(tempDir, "a/b/c/out.png") + assertFalse(nested.parentFile.exists(), "precondition: parent should not exist") + + registrar.captureScreenshot(nested.absolutePath) + + assertTrue(nested.exists()) + assertContentEquals(fixturePngBytes, nested.readBytes()) + } + + @Test + fun `server not running short-circuits with no file write`() = runBlocking { + // Health check fails with 500; no second request should be made. + mockServer.enqueue(MockResponse().setResponseCode(500)) + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue(message.contains("iOS automation server is not running")) + assertFalse(target.exists(), "No file should be written when server is not running") + assertEquals(1, mockServer.requestCount, "Only the health check should have been attempted") + } + + @Test + fun `success false in response surfaces as error and writes no file`() = runBlocking { + enqueueHealthOk() + enqueueScreenshotResult("""{"success":false,"error":"capture failed"}""") + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue(message.contains("Screenshot failed on the iOS automation server: capture failed"), "Got: $message") + assertFalse(target.exists()) + } + + @Test + fun `missing pngBase64 surfaces outdated-bundle hint and writes no file`() = runBlocking { + enqueueHealthOk() + enqueueScreenshotResult("""{"success":true}""") + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue(message.contains("response missing 'pngBase64'"), "Got: $message") + assertTrue(message.contains("outdated iOS automation server bundle"), "Got: $message") + assertFalse(target.exists()) + } + + @Test + fun `missing result object surfaces error and writes no file`() = runBlocking { + enqueueHealthOk() + // Response with no "result" wrapper at all. + mockServer.enqueue(MockResponse().setBody("""{"jsonrpc":"2.0","id":1}""")) + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue(message.contains("response missing 'result' object"), "Got: $message") + assertFalse(target.exists()) + } + + @Test + fun `malformed JSON response surfaces parse error and writes no file`() = runBlocking { + enqueueHealthOk() + mockServer.enqueue(MockResponse().setBody("not json")) + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue( + message.contains("unable to parse response from iOS automation server"), + "Got: $message" + ) + assertFalse(target.exists()) + } + + @Test + fun `JSON-RPC methodNotFound maps to outdated-bundle guidance`() = runBlocking { + enqueueHealthOk() + // Standard JSON-RPC 2.0 error envelope for methodNotFound (code -32601). + // This is what an older pre-built bundle would return for ui.screenshot. + mockServer.enqueue( + MockResponse().setBody( + """{"jsonrpc":"2.0","error":{"code":-32601,"message":"Method not found: ui.screenshot"},"id":1}""" + ) + ) + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue(message.contains("methodNotFound"), "Got: $message") + assertTrue(message.contains("outdated iOS automation server bundle"), "Got: $message") + assertFalse(target.exists()) + } + + @Test + fun `other JSON-RPC errors surface code and message`() = runBlocking { + enqueueHealthOk() + mockServer.enqueue( + MockResponse().setBody( + """{"jsonrpc":"2.0","error":{"code":-32000,"message":"XCUITest crashed"},"id":1}""" + ) + ) + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue(message.contains("-32000"), "Got: $message") + assertTrue(message.contains("XCUITest crashed"), "Got: $message") + assertFalse(target.exists()) + } + + @Test + fun `result present but not a JSON object is rejected gracefully`() = runBlocking { + enqueueHealthOk() + mockServer.enqueue(MockResponse().setBody("""{"jsonrpc":"2.0","result":"oops","id":1}""")) + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue(message.contains("'result' is not a JSON object"), "Got: $message") + assertFalse(target.exists()) + } + + @Test + fun `invalid base64 in pngBase64 surfaces a decode error and writes no file`() = runBlocking { + enqueueHealthOk() + enqueueScreenshotResult(successBody("!!!not-base64!!!")) + + val target = File(tempDir, "out.png") + val message = registrar.captureScreenshot(target.absolutePath) + + assertTrue(message.contains("invalid base64 PNG data"), "Got: $message") + assertFalse(target.exists()) + } + + @Test + fun `atomic write leaves no temp file on success`() = runBlocking { + enqueueHealthOk() + enqueueScreenshotResult(successBody(fixturePngBase64)) + + val target = File(tempDir, "out.png") + registrar.captureScreenshot(target.absolutePath) + + // Confirm the final file exists and no .png.tmp sidecar was left behind + assertTrue(target.exists()) + val leftovers = tempDir.listFiles { f -> f.name.endsWith(".png.tmp") }.orEmpty() + assertEquals(0, leftovers.size, "Temp files left behind: ${leftovers.map { it.name }}") + } + + // --- helpers --- + + private fun enqueueHealthOk() { + mockServer.enqueue(MockResponse().setResponseCode(200).setBody("OK")) + } + + private fun enqueueScreenshotResult(innerResultJson: String) { + val body = """{"jsonrpc":"2.0","result":$innerResultJson,"id":1}""" + mockServer.enqueue(MockResponse().setBody(body)) + } + + private fun successBody(pngBase64: String): String = + """{"success":true,"pngBase64":"$pngBase64"}""" +} diff --git a/automation-server/src/androidTest/java/com/example/automationserver/JsonRpcServerInstrumented.kt b/automation-server/src/androidTest/java/com/example/automationserver/JsonRpcServerInstrumented.kt index d02eae9..cbc4269 100644 --- a/automation-server/src/androidTest/java/com/example/automationserver/JsonRpcServerInstrumented.kt +++ b/automation-server/src/androidTest/java/com/example/automationserver/JsonRpcServerInstrumented.kt @@ -160,6 +160,9 @@ class JsonRpcServerInstrumented( "device.pressBack" -> uiAutomator.pressBack() "device.pressHome" -> uiAutomator.pressHome() + // Screenshot + "ui.screenshot" -> uiAutomator.screenshot() + // Click methods "ui.tapByCoordinates" -> { val x = params?.get("x")?.asInt diff --git a/automation-server/src/main/java/com/example/automationserver/uiautomator/BaseUiAutomatorBridge.kt b/automation-server/src/main/java/com/example/automationserver/uiautomator/BaseUiAutomatorBridge.kt index 7fa3817..b9d344d 100644 --- a/automation-server/src/main/java/com/example/automationserver/uiautomator/BaseUiAutomatorBridge.kt +++ b/automation-server/src/main/java/com/example/automationserver/uiautomator/BaseUiAutomatorBridge.kt @@ -376,6 +376,52 @@ abstract class BaseUiAutomatorBridge { } } + /** + * Captures the current device display as a PNG and returns it base64-encoded. + * + * Uses [android.app.UiAutomation.takeScreenshot] which returns a [android.graphics.Bitmap] + * in the instrumentation test process, then compresses to PNG in-memory. The base64 + * payload crosses the JSON-RPC/ADB-forward boundary to the host, where the MCP tool + * decodes and writes it to disk. + */ + fun screenshot(): ScreenshotResult { + var bitmap: android.graphics.Bitmap? = null + return try { + getUiDevice().waitForIdle(1000) + bitmap = getUiAutomation().takeScreenshot() + if (bitmap == null) { + return ScreenshotResult( + success = false, + error = "Screenshot capture returned no bitmap (display unavailable or content is FLAG_SECURE)" + ) + } + val outputStream = ByteArrayOutputStream() + val compressed = outputStream.use { stream -> + bitmap.compress(android.graphics.Bitmap.CompressFormat.PNG, 100, stream) + } + if (!compressed) { + return ScreenshotResult( + success = false, + error = "PNG compression failed (Bitmap.compress returned false)" + ) + } + val bytes = outputStream.toByteArray() + if (bytes.isEmpty()) { + return ScreenshotResult( + success = false, + error = "PNG compression produced empty output" + ) + } + val base64 = android.util.Base64.encodeToString(bytes, android.util.Base64.NO_WRAP) + ScreenshotResult(success = true, pngBase64 = base64) + } catch (e: Exception) { + Log.e(TAG, "Error capturing screenshot", e) + ScreenshotResult(success = false, error = e.message ?: "Unknown error") + } finally { + bitmap?.recycle() + } + } + /** * Finds a UI element using various selectors. * diff --git a/automation-server/src/main/java/com/example/automationserver/uiautomator/UiAutomatorModels.kt b/automation-server/src/main/java/com/example/automationserver/uiautomator/UiAutomatorModels.kt index 8a9c9d2..5dfabdb 100644 --- a/automation-server/src/main/java/com/example/automationserver/uiautomator/UiAutomatorModels.kt +++ b/automation-server/src/main/java/com/example/automationserver/uiautomator/UiAutomatorModels.kt @@ -9,6 +9,15 @@ data class UiHierarchyResult( val error: String? = null ) +/** + * Result of a screenshot capture operation. + */ +data class ScreenshotResult( + val success: Boolean, + val pngBase64: String? = null, + val error: String? = null +) + /** * Device display and system information. */ diff --git a/automation-server/src/test/java/com/example/automationserver/uiautomator/UiAutomatorModelsTest.kt b/automation-server/src/test/java/com/example/automationserver/uiautomator/UiAutomatorModelsTest.kt index 0a2f151..2f56675 100644 --- a/automation-server/src/test/java/com/example/automationserver/uiautomator/UiAutomatorModelsTest.kt +++ b/automation-server/src/test/java/com/example/automationserver/uiautomator/UiAutomatorModelsTest.kt @@ -33,6 +33,31 @@ class UiAutomatorModelsTest { assertNull(result.error) } + // --- ScreenshotResult --- + + @Test + fun `ScreenshotResult success with pngBase64`() { + val result = ScreenshotResult(success = true, pngBase64 = "iVBORw0KGgo=") + assertTrue(result.success) + assertEquals("iVBORw0KGgo=", result.pngBase64) + assertNull(result.error) + } + + @Test + fun `ScreenshotResult failure with error`() { + val result = ScreenshotResult(success = false, error = "capture failed") + assertFalse(result.success) + assertNull(result.pngBase64) + assertEquals("capture failed", result.error) + } + + @Test + fun `ScreenshotResult defaults pngBase64 and error to null`() { + val result = ScreenshotResult(success = true) + assertNull(result.pngBase64) + assertNull(result.error) + } + // --- DeviceInfo --- @Test diff --git a/ios-automation-server/IOSAutomationServerTests/AutomationModelsTests.swift b/ios-automation-server/IOSAutomationServerTests/AutomationModelsTests.swift index e1edbb7..12a60b1 100644 --- a/ios-automation-server/IOSAutomationServerTests/AutomationModelsTests.swift +++ b/ios-automation-server/IOSAutomationServerTests/AutomationModelsTests.swift @@ -20,6 +20,24 @@ final class AutomationModelsTests: XCTestCase { XCTAssertEqual(dict["error"] as? String, "timeout") } + // MARK: - ScreenshotResult + + func testScreenshotResultSuccess() { + let result = ScreenshotResult(success: true, pngBase64: "iVBORw0KGgo=", error: nil) + let dict = result.toDictionary() + XCTAssertEqual(dict["success"] as? Bool, true) + XCTAssertEqual(dict["pngBase64"] as? String, "iVBORw0KGgo=") + XCTAssertNil(dict["error"]) + } + + func testScreenshotResultFailure() { + let result = ScreenshotResult(success: false, pngBase64: nil, error: "capture failed") + let dict = result.toDictionary() + XCTAssertEqual(dict["success"] as? Bool, false) + XCTAssertNil(dict["pngBase64"]) + XCTAssertEqual(dict["error"] as? String, "capture failed") + } + // MARK: - DeviceInfoResult func testDeviceInfoResultAllFields() { diff --git a/ios-automation-server/IOSAutomationServerUITests/Bridge/XCUITestBridge.swift b/ios-automation-server/IOSAutomationServerUITests/Bridge/XCUITestBridge.swift index 79cae90..4647152 100644 --- a/ios-automation-server/IOSAutomationServerUITests/Bridge/XCUITestBridge.swift +++ b/ios-automation-server/IOSAutomationServerUITests/Bridge/XCUITestBridge.swift @@ -110,6 +110,18 @@ class XCUITestBridge { ) } + // MARK: - Screenshot + + /// Captures the current simulator display as a PNG and returns it base64-encoded. + func screenshot() -> ScreenshotResult { + let pngData = XCUIScreen.main.screenshot().pngRepresentation + if pngData.isEmpty { + return ScreenshotResult(success: false, pngBase64: nil, error: "Screenshot returned empty PNG data") + } + let base64 = pngData.base64EncodedString() + return ScreenshotResult(success: true, pngBase64: base64, error: nil) + } + // MARK: - Tap /// Taps at absolute screen coordinates using the springboard coordinate space. diff --git a/ios-automation-server/IOSAutomationServerUITests/Models/AutomationModels.swift b/ios-automation-server/IOSAutomationServerUITests/Models/AutomationModels.swift index 4464427..67f5f99 100644 --- a/ios-automation-server/IOSAutomationServerUITests/Models/AutomationModels.swift +++ b/ios-automation-server/IOSAutomationServerUITests/Models/AutomationModels.swift @@ -38,6 +38,19 @@ struct DeviceInfoResult { } } +struct ScreenshotResult { + let success: Bool + let pngBase64: String? + let error: String? + + func toDictionary() -> [String: Any] { + var dict: [String: Any] = ["success": success] + if let pngBase64 = pngBase64 { dict["pngBase64"] = pngBase64 } + if let error = error { dict["error"] = error } + return dict + } +} + struct OperationResult { let success: Bool let error: String? diff --git a/ios-automation-server/IOSAutomationServerUITests/Server/JsonRpcServer.swift b/ios-automation-server/IOSAutomationServerUITests/Server/JsonRpcServer.swift index 53ef541..386c3c7 100644 --- a/ios-automation-server/IOSAutomationServerUITests/Server/JsonRpcServer.swift +++ b/ios-automation-server/IOSAutomationServerUITests/Server/JsonRpcServer.swift @@ -96,6 +96,10 @@ class JsonRpcServer { let bundleId = params?["bundleId"] as? String return bridge.getInteractiveElements(includeDisabled: includeDisabled, bundleId: bundleId).toDictionary() + // Screenshot + case "ui.screenshot": + return bridge.screenshot().toDictionary() + // Device methods case "device.getInfo": return bridge.getDeviceInfo().toDictionary() diff --git a/openspec/changes/add-android-screenshot/.openspec.yaml b/openspec/changes/add-android-screenshot/.openspec.yaml new file mode 100644 index 0000000..3a54a17 --- /dev/null +++ b/openspec/changes/add-android-screenshot/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-04-16 diff --git a/openspec/changes/add-android-screenshot/design.md b/openspec/changes/add-android-screenshot/design.md new file mode 100644 index 0000000..762a730 --- /dev/null +++ b/openspec/changes/add-android-screenshot/design.md @@ -0,0 +1,117 @@ +## Context + +The Android automation server (`automation-server/`) is a UIAutomator-based JSON-RPC 2.0 server that runs as an Android instrumentation test. The MCP server (`app/`) talks to it via `AutomationClient`, which wraps `HttpURLConnection` calls to `/jsonrpc` on `localhost:9008` (ADB-forwarded). All existing JSON-RPC methods exchange text-based JSON payloads (XML hierarchies as strings, element data as objects). + +Screenshots are binary PNG data — tens of KB to a few MB per image — and must cross three boundaries: + +1. Android UiAutomation capture → instrumentation test process (on device). +2. Device (via ADB TCP forward) → Mac host (MCP server) over HTTP/JSON-RPC. +3. MCP server → caller's working directory as a PNG file. + +Android exposes two relevant APIs: +- `UiDevice.takeScreenshot(File)` — writes a PNG directly to a file path on the device (must then be `adb pull`-ed to host). +- `UiAutomation.takeScreenshot()` — returns a `Bitmap` in the test process memory. + +The recently-merged `ios_screenshot` tool (archived change `2026-04-16-add-ios-screenshot`) established the shape: base64-over-JSON-RPC, host-side decode/write, optional `outputPath` defaulting to `./screenshots/_.png`. This change adopts the same pattern for consistency. + +## Goals / Non-Goals + +**Goals:** +- Capture the current Android screen as a PNG using the existing UIAutomator bridge. +- Return the file path of the saved PNG to the MCP caller, so the agent can reference it in follow-up operations. +- Allow the caller to specify an output path; default to a sensible location (user's project CWD) when omitted. +- Keep the transport mechanism consistent with existing JSON-RPC methods (no new endpoints, no `adb pull`). +- Fail clearly when the server is not running, when the output path is unwritable, when the screenshot call itself fails, or when the installed test APK is too old to know `ui.screenshot`. +- Behave identically to `ios_screenshot` from the agent's perspective (same default path shape, same error phrasing) so cross-platform automation scripts stay symmetric. + +**Non-Goals:** +- Capturing a specific view/subregion — only the full display. +- Image format options (JPEG, compression level, resizing) — PNG at native resolution only. +- Returning the image inline as an MCP image content block — file-on-disk only (matches iOS). +- Streaming or chunked transport for very large screenshots. +- Sharing a common `ScreenshotToolSupport` helper with the iOS registrar. The two registrars each get their own implementation now; a refactor to share the host-side write logic can happen later as a non-breaking cleanup. +- Screenshotting a specific Android device when multiple are connected — the automation server targets the single device that `adb` is forwarded to, same as every other tool. + +## Decisions + +### Decision 1: Encode PNG as base64 over the existing JSON-RPC transport + +**Choice:** The new `ui.screenshot` JSON-RPC method returns `{ "success": true, "pngBase64": "" }`. The Kotlin client decodes and writes to disk. + +**Alternatives considered:** +- **`UiDevice.takeScreenshot(File)` on device + `adb pull` on host**: Avoids base64 overhead (~33%) but requires knowing a writable path on the device, a second ADB call, and splits the transport across two mechanisms. It also bypasses the JSON-RPC "server is the single source of truth" contract every other tool relies on. +- **Separate binary HTTP endpoint (`GET /screenshot`)**: Avoids base64 overhead but doubles the surface area of the Ktor server, requires new routing/content-type handling, and diverges from the single-method JSON-RPC dispatch pattern established by every other operation. + +**Rationale:** Base64-over-JSON-RPC preserves architectural consistency across both automation servers (iOS and Android), and the size overhead is acceptable at typical phone-screen resolutions (~2–4 MB base64 for a 1080×2400 screen). It also matches exactly what `ios_screenshot` does — the Kotlin tool code on the host is ~95% identical, which is a strong signal that the transport choice is right. + +### Decision 2: Use `UiAutomation.takeScreenshot()` + in-memory `Bitmap.compress(PNG)`, not `UiDevice.takeScreenshot(File)` + +**Choice:** The Swift-side equivalent was `XCUIScreen.main.screenshot().pngRepresentation`. On Android the equivalent is `uiAutomation.takeScreenshot()` (returns a `Bitmap`) followed by `bitmap.compress(Bitmap.CompressFormat.PNG, 100, outputStream)` into a `ByteArrayOutputStream`, then base64-encode the bytes. + +**Alternatives considered:** +- **`UiDevice.takeScreenshot(File)`**: Writes PNG directly to a device path, which we'd then have to read back into memory (or `adb pull` to host). Extra filesystem I/O on the device for no benefit. Also: the test process's filesystem sandbox can be finicky, and we'd need to pick/cleanup a temp path on the device. + +**Rationale:** Keeping the entire capture in-memory on the device matches the iOS flow (`.pngRepresentation` is also in-memory) and avoids any device-side filesystem concerns. `UiAutomation.takeScreenshot()` is also the API `UiDevice.takeScreenshot` delegates to internally. + +### Decision 3: File is written by the Kotlin MCP tool, not by the device side + +**Choice:** The UiAutomator bridge returns only the base64-encoded PNG bytes. The Kotlin tool handler on the host decodes and writes the file to the caller's specified (or default) path. + +**Rationale:** Same reasoning as iOS — writing on the host keeps path semantics unambiguous (caller provides a host path, host writes it). Paths like `./screenshots/` resolve against the MCP server's CWD (the user's project), which would make no sense on the Android device's sandboxed filesystem. + +### Decision 4: Output path parameter — optional, default is CWD-relative with `android_screenshot_` prefix + +**Choice:** `outputPath` is optional. When omitted, default to `./screenshots/android_screenshot_.png` resolved against the MCP server's current working directory. When provided, the path is used as-is (both absolute and CWD-relative paths are supported). Missing parent directories are created. + +**Rationale:** Matches iOS behavior exactly, except the filename prefix changes from `ios_screenshot_` to `android_screenshot_` so the two platforms' outputs don't collide when an agent runs both. Coding agents (Claude Code, Codex, etc.) launch MCP servers with CWD set to the project they're working on, so the CWD-relative default lands the PNG in that project, where the agent and user can easily reference it. + +### Decision 5: New `ScreenshotResult` data class mirroring existing result types + +**Choice:** Add `ScreenshotResult(success: Boolean, pngBase64: String?, error: String?)` to `automation-server/src/main/java/com/example/automationserver/uiautomator/UiAutomatorModels.kt`. It follows the same pattern as `UiHierarchyResult`. + +**Rationale:** Consistency with existing models. These data classes are already covered by unit tests in `UiAutomatorModelsTest.kt`, so the new one plugs into the existing test pattern. + +### Decision 6: Tool name is `android_screenshot` + +**Choice:** The MCP tool name is `android_screenshot`, matching the `android_*` prefix pattern used by `android_tap_by_coordinates`, `android_swipe`, `android_swipe_direction`, `android_input_text`, etc. + +**Alternatives considered:** +- `screenshot_android` — out of order with the existing `android_*` convention. +- Platform-less `screenshot` — ambiguous when both iOS and Android tools are registered. + +**Rationale:** `android_screenshot` matches the prefix convention that's already established in `AndroidAutomationToolRegistrar` and mirrors `ios_screenshot`. + +### Decision 7: Timeout and registration + +**Choice:** Register in `AndroidAutomationToolRegistrar` alongside the other Android tools. Use `timeoutMs = 30000` (same as `get_ui_hierarchy` and matching the iOS screenshot timeout). + +**Rationale:** `UiAutomation.takeScreenshot()` typically returns in <1s but PNG compression of a 1080×2400 bitmap plus base64 encode + HTTP transport warrants the same headroom we gave to UI hierarchy dumps. + +### Decision 8: Duplicate host-side write/decode logic instead of extracting a helper + +**Choice:** Copy the ~40 lines of base64 decode + atomic-write + path-resolution logic from `IOSAutomationToolRegistrar` into `AndroidAutomationToolRegistrar`, renaming `ios_screenshot_` prefix to `android_screenshot_` and keeping error messages platform-specific ("Android automation server" vs "iOS automation server"). + +**Alternatives considered:** +- **Extract a `ScreenshotWriter` helper now**: Cleaner but balloons the scope of this change, and the right shape of the helper is clearer after two concrete callers exist. Deferring also lets us land the Android tool without touching the stable iOS code path. + +**Rationale:** "Three similar lines is better than a premature abstraction." Two concrete implementations will make the eventual shared helper obvious; a shared helper designed from one caller's perspective tends to leak its platform assumptions. The duplicated code is ~40 lines and purely mechanical. + +## Risks / Trade-offs + +- **Risk:** Base64 payloads for very large Android screenshots (foldable devices in unfolded mode, tablet screenshots at 2560×1600) may stress `HttpURLConnection` buffer behavior or the MCP stdio transport. + - **Mitigation:** Initial scope is phone-sized devices (<4 MB base64). Document tablet/foldable as a known edge; address with chunked or binary endpoint only if it surfaces in practice. + +- **Risk:** The pre-built test APK (`automation-server-debug-androidTest.apk`) downloaded by `install.sh` will not contain the new `ui.screenshot` method until a new release is cut. Users running an older installed APK will get a `methodNotFound` error. + - **Mitigation:** The JSON-RPC server already returns `methodNotFound` for unknown methods. The Kotlin tool maps this to an "outdated bundle — rebuild from source or re-run install.sh for the next release" hint, identical to the iOS flow. A new tagged release refreshes the bundle. + +- **Risk:** `UiAutomation.takeScreenshot()` can return `null` (e.g., in very rare cases where the display is unavailable or the security flags of the current window block capture, such as apps using `FLAG_SECURE` for DRM content). + - **Mitigation:** The bridge checks for a `null`/empty `Bitmap` and returns a `ScreenshotResult(success = false, error = "...")` so the tool surfaces a clear message instead of a confusing base64-decode failure. + +- **Risk:** Writing to an arbitrary caller-supplied path could clobber existing files. + - **Mitigation:** Document overwrite behavior in the tool description (same as iOS). The default path uses a timestamped filename to avoid accidental overwrites when the caller doesn't pass `outputPath`. + +- **Trade-off:** Code duplication between `IOSAutomationToolRegistrar.captureScreenshot` and `AndroidAutomationToolRegistrar.captureScreenshot` (~40 lines). + - **Justification:** See Decision 8. A follow-up refactor can extract a shared helper once both callers exist and the natural seam is visible. + +- **Trade-off:** Not returning the image as an MCP image content block means the agent cannot "see" the screenshot directly in the tool response — it must issue a separate read/view. + - **Justification:** Keeps parity with `ios_screenshot` and the Kotlin → MCP string-result contract every other tool follows. Can be added later as a non-breaking enhancement on both platforms simultaneously. diff --git a/openspec/changes/add-android-screenshot/proposal.md b/openspec/changes/add-android-screenshot/proposal.md new file mode 100644 index 0000000..3f2aa04 --- /dev/null +++ b/openspec/changes/add-android-screenshot/proposal.md @@ -0,0 +1,27 @@ +## Why + +VisionTest recently gained an `ios_screenshot` MCP tool for capturing the iOS simulator display, but the equivalent capability does not yet exist for Android devices. Agents automating Android apps still have no way to capture a pixel-accurate image of the current screen for visual verification (layout regressions, rendering bugs, image assets, or UI that the accessibility tree does not describe). This change closes that gap by adding the Android counterpart, following the same shape as the iOS tool so agents can operate on both platforms with a consistent mental model. + +## What Changes + +- Add a new MCP tool `android_screenshot` that captures the connected Android device's current screen and saves it as a PNG to a caller-specified (or defaulted) path on the host filesystem. +- Extend the Android automation server (UIAutomator-based) with a new JSON-RPC method `ui.screenshot` that returns the PNG bytes as a base64-encoded string. Implementation uses `UiAutomation.takeScreenshot()` plus `Bitmap.compress(PNG, ...)` rather than `UiDevice.takeScreenshot(File)` so the capture never has to materialize a file on the device before crossing the ADB port forward. +- Extend `AutomationClient` (Kotlin MCP side) with a `screenshot()` suspend function that calls the new JSON-RPC method. +- The tool decodes the base64 payload and writes the PNG bytes atomically to the resolved output path, returning the absolute path in the tool result. Default path is `./screenshots/android_screenshot_.png` resolved against the MCP server's working directory (the user's project). +- Reuse the exact error-handling, path-resolution, and atomic-write logic already proven on the iOS side so both tools behave identically. + +## Capabilities + +### New Capabilities +- `android-screenshot`: Captures a screenshot of the connected Android device (physical or emulator) via the UIAutomator-based automation server and saves it as a PNG file on the host. + +### Modified Capabilities + + +## Impact + +- **Android automation server (`automation-server/`)** — New `screenshot()` method on `BaseUiAutomatorBridge` (shared code), new `ScreenshotResult` data class in `UiAutomatorModels.kt`, new `ui.screenshot` branch in `JsonRpcServerInstrumented.executeMethod`. Requires a new release of the test APK (`automation-server-debug-androidTest.apk`) for users who installed via `install.sh`. +- **MCP server (`app/`)** — New `screenshot()` method on `AutomationClient`, new tool registration in `AndroidAutomationToolRegistrar`. No changes to shared infrastructure (`ToolFactory`, `ToolScope`, `ErrorHandler`). The base64 decode / path resolution / atomic write logic is near-identical to `IOSAutomationToolRegistrar.captureScreenshot`; we duplicate it for now (shared helper extraction is out of scope) but keep behavior identical. +- **Tests** — New pure-JVM unit test `AndroidScreenshotToolTest.kt` covering base64 decode, default path, atomic write, server-not-running, and JSON-RPC error paths (parallels `IOSScreenshotToolTest`). New unit test for `ScreenshotResult` in `automation-server` test module. +- **Docs** — `CLAUDE.md` tool table needs a new `android_screenshot` row under "UI Automation (Android)". +- **External surface** — New JSON-RPC method on the Android automation server. No breaking changes to existing tools or endpoints. Agents using an older pre-built test APK will get a `methodNotFound` error that the Kotlin tool translates into an "outdated bundle, rebuild/update" hint. diff --git a/openspec/changes/add-android-screenshot/specs/android-screenshot/spec.md b/openspec/changes/add-android-screenshot/specs/android-screenshot/spec.md new file mode 100644 index 0000000..fd98443 --- /dev/null +++ b/openspec/changes/add-android-screenshot/specs/android-screenshot/spec.md @@ -0,0 +1,73 @@ +## ADDED Requirements + +### Requirement: MCP tool captures Android device screenshot as PNG +The MCP server SHALL expose a tool named `android_screenshot` that captures the current display of the connected Android device (physical or emulator) and saves the image as a PNG file on the host filesystem. + +#### Scenario: Screenshot saved to caller-supplied path +- **WHEN** an agent invokes `android_screenshot` with parameter `outputPath` set to an absolute path ending in `.png` +- **THEN** the tool writes the PNG bytes of the current Android display to that exact path and returns a success message containing the absolute path + +#### Scenario: Screenshot saved to default path when outputPath is omitted +- **WHEN** an agent invokes `android_screenshot` with no `outputPath` parameter +- **THEN** the tool writes the PNG to `./screenshots/android_screenshot_.png` resolved against the MCP server's current working directory (the user's project when launched by a coding agent, NOT the visiontest install directory) and returns the absolute path of the saved file + +#### Scenario: Blank outputPath falls back to the default +- **WHEN** `android_screenshot` is invoked with an `outputPath` consisting only of whitespace +- **THEN** the tool treats it as omitted and uses the default path described above + +#### Scenario: Parent directories are created automatically +- **WHEN** `android_screenshot` is invoked with an `outputPath` whose parent directory does not yet exist +- **THEN** the tool creates all missing parent directories before writing the PNG + +#### Scenario: Existing file at target path is overwritten +- **WHEN** `android_screenshot` is invoked with an `outputPath` that already contains a file +- **THEN** the tool overwrites the existing file with the new PNG bytes and returns a success message + +#### Scenario: Atomic write leaves no temp file on success +- **WHEN** `android_screenshot` successfully writes a PNG to the target path +- **THEN** no temporary `.png.tmp` sidecar file remains in the target's parent directory after the tool returns + +#### Scenario: Automation server not running +- **WHEN** `android_screenshot` is invoked and the Android automation server is not reachable on its configured port +- **THEN** the tool returns an error message instructing the caller to run `start_automation_server` first and does NOT write any file + +#### Scenario: Server reports screenshot failure +- **WHEN** the automation server returns `{ "success": false, "error": "" }` for the `ui.screenshot` method +- **THEN** the tool returns a failure message containing the server's error text and writes no file + +#### Scenario: Server returns invalid base64 PNG data +- **WHEN** the automation server returns `success: true` but `pngBase64` cannot be decoded as base64 +- **THEN** the tool returns an error message referencing invalid base64 PNG data and writes no file + +#### Scenario: Outdated installed APK does not know ui.screenshot +- **WHEN** `android_screenshot` is invoked and the automation server responds with a JSON-RPC `methodNotFound` error (code `-32601`) for `ui.screenshot` +- **THEN** the tool returns an error message identifying this as an outdated Android automation server APK and instructing the caller to rebuild from source or update the installed APK, and writes no file + +### Requirement: Android automation server exposes `ui.screenshot` JSON-RPC method +The Android automation server (instrumentation test process) SHALL accept a JSON-RPC request with method `ui.screenshot` that captures the current screen via UIAutomator and returns the image bytes as a base64-encoded PNG string. + +#### Scenario: Successful screenshot capture +- **WHEN** a JSON-RPC client POSTs `{"jsonrpc":"2.0","method":"ui.screenshot","id":1}` to `/jsonrpc` while the device is displaying normal content +- **THEN** the response `result` object contains `success: true` and a non-empty `pngBase64` string whose decoded bytes start with the PNG magic header (`89 50 4E 47 0D 0A 1A 0A`) + +#### Scenario: Screenshot capture failure is reported +- **WHEN** `UiAutomation.takeScreenshot()` returns a null or empty bitmap (e.g. unavailable display or FLAG_SECURE content blocks capture) +- **THEN** the response `result` object contains `success: false`, omits `pngBase64`, and includes an `error` field describing the failure + +#### Scenario: Unknown method rejected +- **WHEN** a client calls the JSON-RPC endpoint with a misspelled method name resembling `ui.screenshot` +- **THEN** the server returns a JSON-RPC error with code `-32601` (methodNotFound) and the Kotlin tool surfaces the outdated-APK guidance described above + +### Requirement: AutomationClient provides a screenshot API +The Kotlin `AutomationClient` SHALL expose a suspend function `screenshot()` that calls the `ui.screenshot` JSON-RPC method and returns the server's raw JSON response string, consistent with other client methods. + +#### Scenario: screenshot() delegates to sendRequest +- **WHEN** `AutomationClient.screenshot()` is called +- **THEN** it invokes `sendRequest("ui.screenshot", null)` and returns the resulting response string unchanged + +### Requirement: Tool description documents outputPath semantics and prerequisite +The `android_screenshot` tool description SHALL document the optional `outputPath` parameter, the default output location, the overwrite behavior when the path already exists, and the requirement that the automation server be running (via `start_automation_server`) before the tool is invoked. + +#### Scenario: Tool description includes required workflow +- **WHEN** the MCP client lists the `android_screenshot` tool +- **THEN** the returned description mentions `outputPath` (optional), the default path `./screenshots/` relative to the server's working directory (user's current project), that existing files at the target path will be overwritten, and the prerequisite that `start_automation_server` must have been called first diff --git a/openspec/changes/add-android-screenshot/tasks.md b/openspec/changes/add-android-screenshot/tasks.md new file mode 100644 index 0000000..dd9cb15 --- /dev/null +++ b/openspec/changes/add-android-screenshot/tasks.md @@ -0,0 +1,48 @@ +## 1. Android Automation Server (on-device) + +- [x] 1.1 Add a `ScreenshotResult` data class to `automation-server/src/main/java/com/example/automationserver/uiautomator/UiAutomatorModels.kt` with fields `success: Boolean`, `pngBase64: String? = null`, `error: String? = null`, following the same pattern as `UiHierarchyResult` +- [x] 1.2 Add a `screenshot()` method to `BaseUiAutomatorBridge` (in `automation-server/src/main/java/com/example/automationserver/uiautomator/BaseUiAutomatorBridge.kt`) that calls `getUiAutomation().takeScreenshot()`, compresses the returned `Bitmap` to PNG via `Bitmap.compress(Bitmap.CompressFormat.PNG, 100, ByteArrayOutputStream())`, base64-encodes the bytes using `android.util.Base64.encodeToString(bytes, Base64.NO_WRAP)`, and returns a populated `ScreenshotResult`. Handle a null/empty bitmap by returning `ScreenshotResult(success = false, error = "Screenshot capture returned no bitmap (display unavailable or content is FLAG_SECURE)")`. Wrap the call in try/catch and return `success = false, error = e.message` on exception, same shape as `dumpHierarchy()`. +- [x] 1.3 Add a `case "ui.screenshot":` branch to `JsonRpcServerInstrumented.executeMethod` in `automation-server/src/androidTest/java/com/example/automationserver/JsonRpcServerInstrumented.kt` that calls `uiAutomator.screenshot()` and returns the `ScreenshotResult` (Gson will serialize it into the JSON-RPC `result` field, identical to how other bridge calls are returned) +- [x] 1.4 Add unit tests for `ScreenshotResult` to `automation-server/src/test/java/com/example/automationserver/uiautomator/UiAutomatorModelsTest.kt` covering: (a) success state with `pngBase64` populated and `error` null; (b) failure state with `error` populated and `pngBase64` null; (c) default values (`pngBase64` defaults to null, `error` defaults to null) + +## 2. Kotlin MCP Client + +- [x] 2.1 Add a `suspend fun screenshot(): String` method to `app/src/main/kotlin/com/example/visiontest/android/AutomationClient.kt` that calls `sendRequest("ui.screenshot", null)` and returns the raw response, mirroring the existing `getUiHierarchy()` shape + +## 3. MCP Tool Registration + +- [x] 3.1 In `app/src/main/kotlin/com/example/visiontest/tools/AndroidAutomationToolRegistrar.kt`, add a `registerScreenshot(scope)` call to `registerTools()` and implement a private `registerScreenshot` method that: + - Registers tool name `android_screenshot` with `timeoutMs = 30000` + - Documents the optional `outputPath` parameter (absolute or CWD-relative, overwrites existing files, auto-creates parent dirs), the default path `./screenshots/android_screenshot_.png` relative to the MCP server's working directory (i.e. the user's project, NOT the visiontest install dir), and the `start_automation_server` prerequisite in the description + - Checks `automationClient.isServerRunning()` first and returns the standard "Automation server is not running. Use 'start_automation_server' first." short-circuit if false + - Calls `automationClient.screenshot()`, parses the JSON-RPC response with Gson's `JsonParser`, and surfaces the standard error cases (JSON-RPC `error` envelope with `-32601` → outdated-APK hint, other `error` envelopes → "Android automation server returned error (code): message", missing/non-object `result`, `success: false` → bridge-reported error, missing `pngBase64` → outdated-APK hint, invalid base64 → decode error, parse failure → "unable to parse response from Android automation server") +- [x] 3.2 Implement `internal fun resolveScreenshotPath(outputPath: String?): File` on the registrar (same shape as `IOSAutomationToolRegistrar.resolveScreenshotPath`): if the caller provided `outputPath` and it is non-blank, return `File(outputPath).absoluteFile`; otherwise build `File("screenshots/android_screenshot_$timestamp.png").absoluteFile` using `java.time.LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"))`. Do NOT resolve against `VISIONTEST_DIR` — the default MUST land in the MCP server's CWD (the user's project). +- [x] 3.3 Implement `internal suspend fun writeScreenshot(target: File, pngBase64: String): String` on `Dispatchers.IO` that: base64-decodes (catching `IllegalArgumentException` → "invalid base64 PNG data" message), ensures the parent directory exists (`Files.createDirectories`), writes to a sibling temp file (`Files.createTempFile(parentDir, ".android_screenshot_", ".png.tmp")`), then `Files.move(..., REPLACE_EXISTING, ATOMIC_MOVE)` onto the target path (falling back to plain `REPLACE_EXISTING` on `AtomicMoveNotSupportedException`). On any `IOException` during write, delete the temp file and return a user-facing error. On success return `"Screenshot saved to ${target.absolutePath}"`. Mirror the iOS implementation exactly, swapping "iOS" → "Android" in error messages and the temp-file prefix. +- [x] 3.4 Add unit test `app/src/test/kotlin/com/example/visiontest/tools/AndroidScreenshotToolTest.kt` parallel to `IOSScreenshotToolTest`, using `MockWebServer` and `AutomationClient`. Cover: + - `resolveScreenshotPath(null)` returns a file under `./screenshots/` with filename matching `android_screenshot_\d{8}_\d{6}\.png`, absolute, parent-parent equals `user.dir` + - `resolveScreenshotPath(explicitPath)` returns the explicit path verbatim + - `resolveScreenshotPath(" ")` (blank string) falls back to the default + - `captureScreenshot(target)` writes the decoded PNG bytes to the target path when the server returns `success: true, pngBase64: ` + - Missing parent directories are created + - Server-not-running short-circuits, no file is written, and only the `/health` probe is made + - `success: false` in the response surfaces as an error and writes no file + - Missing `pngBase64` surfaces the outdated-APK hint + - Missing/non-object `result` surfaces parse errors + - Malformed JSON surfaces "unable to parse response from Android automation server" + - JSON-RPC error envelope with code `-32601` maps to the outdated-APK hint mentioning `android_*` + - Other JSON-RPC error envelopes surface code and message + - Invalid base64 surfaces a decode error + - Atomic write leaves no `.png.tmp` sidecar on success + +## 4. Documentation + +- [x] 4.1 Add an `android_screenshot` row to the "UI Automation (Android)" tool table in `CLAUDE.md` with a one-line description matching the tool registration (reference: the `ios_screenshot` row added in the previous change) +- [x] 4.2 Verify the new tool does not need to appear in the "Typical Automation Workflow" section (screenshots are orthogonal to the install→start→inspect→interact flow — should match the decision made for iOS); if any other docs reference the iOS tool side-by-side with Android tools, add the Android counterpart there + +## 5. Verification + +- [x] 5.1 Run `./gradlew :automation-server:test` and ensure the new `ScreenshotResult` tests in `UiAutomatorModelsTest` pass +- [x] 5.2 Run `./gradlew :app:test` and ensure the new `AndroidScreenshotToolTest` passes along with all existing Kotlin tests +- [x] 5.3 Run `./gradlew :automation-server:assembleDebug :automation-server:assembleDebugAndroidTest` and `./gradlew :automation-server:lint` to ensure the instrumentation module still compiles cleanly with the new JSON-RPC branch and the `BaseUiAutomatorBridge.screenshot()` method +- [ ] 5.4 Manual end-to-end check: build and install both APKs on a connected device/emulator, run `start_automation_server`, invoke `android_screenshot` via the MCP server (or a direct JSON-RPC curl to `localhost:9008/jsonrpc`), confirm the PNG file opens and renders the device's current display at native resolution +- [ ] 5.5 Manual cross-platform symmetry check: with both iOS and Android automation servers running, confirm `ios_screenshot` and `android_screenshot` produce files under the same `./screenshots/` directory with filenames differing only in the platform prefix diff --git a/openspec/changes/archive/2026-04-16-add-ios-screenshot/.openspec.yaml b/openspec/changes/archive/2026-04-16-add-ios-screenshot/.openspec.yaml new file mode 100644 index 0000000..3a54a17 --- /dev/null +++ b/openspec/changes/archive/2026-04-16-add-ios-screenshot/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-04-16 diff --git a/openspec/changes/archive/2026-04-16-add-ios-screenshot/design.md b/openspec/changes/archive/2026-04-16-add-ios-screenshot/design.md new file mode 100644 index 0000000..d1e571b --- /dev/null +++ b/openspec/changes/archive/2026-04-16-add-ios-screenshot/design.md @@ -0,0 +1,92 @@ +## Context + +VisionTest's iOS automation server runs as an XCUITest UI test process (see `AutomationServerUITest.swift`) exposing a JSON-RPC 2.0 endpoint over HTTP. The MCP server (`app/`) talks to it via `IOSAutomationClient`, which wraps `HttpURLConnection` calls to `/jsonrpc`. All JSON-RPC methods today exchange text-based JSON payloads: UI hierarchies as XML strings, element data as nested dictionaries. + +Screenshots are binary PNG data — tens of KB to a few MB per image. They must cross two boundaries: +1. Simulator (XCUITest process) → Mac host (MCP server) over HTTP/JSON-RPC. +2. MCP server → caller's working directory as a PNG file. + +XCUITest provides `XCUIScreen.main.screenshot()` which returns an `XCUIScreenshot` with a `.pngRepresentation: Data` property. This is the canonical simulator capture API; `simctl io screenshot` is an alternative but requires invoking the CLI and knowing the simulator UDID. + +## Goals / Non-Goals + +**Goals:** +- Capture the current iOS simulator display as a PNG using the existing XCUITest bridge. +- Return the file path of the saved PNG to the MCP caller, so the agent can reference it in follow-up operations. +- Allow the caller to specify an output path; default to a sensible location when omitted. +- Keep the transport mechanism consistent with existing JSON-RPC methods (no new endpoints). +- Fail clearly when the server is not running, when the output path is unwritable, or when the screenshot call itself throws. + +**Non-Goals:** +- Android screenshot support (follow-up change). +- Returning the image bytes inline to the MCP caller or as an MCP image content block — initial scope is file-on-disk only. +- Capturing a specific app/window subregion — only the full simulator display. +- Image format options (JPEG, compression level, resizing) — PNG at native resolution only. +- Streaming or chunked transport for very large screenshots. + +## Decisions + +### Decision 1: Encode PNG as base64 over the existing JSON-RPC transport + +**Choice:** The new `ui.screenshot` JSON-RPC method returns `{ "success": true, "pngBase64": "" }`. The Kotlin client decodes and writes to disk. + +**Alternatives considered:** +- **Separate binary HTTP endpoint (`GET /screenshot`)**: Avoids base64 overhead (~33%) but doubles the surface area of the automation server, requires new routing/content-type handling in Swifter, and diverges from the single-method JSON-RPC dispatch pattern established by every other operation. Not worth it at typical screenshot sizes (a 1290×2796 iPhone 15 Pro Max screenshot is ~1–2 MB raw, ~1.3–2.7 MB base64). +- **`simctl io screenshot ` from the Kotlin side**: Bypasses the automation server entirely and writes directly on the host. Simpler transport, but requires knowing the booted simulator's UDID, doesn't go through the "server must be running" check, and splits the iOS automation API across two mechanisms. Rejected to keep a single coherent surface. + +**Rationale:** Base64-over-JSON-RPC preserves architectural consistency, works with the existing Swifter JSON response helper, and the size overhead is acceptable for screenshots at this scale. + +### Decision 2: File is written by the Kotlin MCP tool, not by the Swift side + +**Choice:** The Swift bridge returns only the encoded bytes. The Kotlin tool handler decodes base64 and writes the file. + +**Alternatives considered:** +- **Write the file inside the XCUITest process**: The simulator and host share the filesystem in many cases but not reliably — `XCUIScreenshot` runs in the simulator's sandbox, so paths like `~/.local/share/visiontest/screenshots/foo.png` would resolve relative to the simulator's home, not the Mac's. Cross-process path translation is error-prone. + +**Rationale:** Writing on the host keeps path semantics unambiguous (caller provides a host path, host writes it). + +### Decision 3: Output path parameter — optional, default is CWD-relative + +**Choice:** `outputPath` is optional. When omitted, default to `./screenshots/ios_screenshot_.png` resolved against the MCP server's current working directory. When provided, the path is used as-is (both absolute and CWD-relative paths are supported). + +**Alternatives considered:** +- **Default under `VISIONTEST_DIR`** (initial design): placed screenshots in the visiontest install directory (`~/.local/share/visiontest/screenshots/`). Rejected because a coding agent taking a screenshot of the iOS simulator is almost always working on a *user project*, not on visiontest itself. Screenshots belong with the project that prompted them. +- **Require outputPath explicitly (no default)**: safer but forces every caller to think about path layout. Too much ceremony for the common case. +- **A fixed absolute path like `/tmp/`**: loses the "screenshots live with the project" property and is ephemeral across reboots on some platforms. + +**Rationale:** Coding agents (Claude Code, Codex, etc.) launch MCP servers with CWD set to the project they're working on. A CWD-relative default lands the PNG in the user's project directory, where the agent and user can easily reference it. The timestamped filename prevents overwrites when the agent takes multiple screenshots without specifying a path. + +### Decision 4: Path safety — create parent directories, trust caller-supplied paths + +**Choice:** If the caller supplies an explicit `outputPath`, trust it (the MCP server already has filesystem access under its user). Create any missing parent directories. + +**Rationale:** The MCP server runs as the user; restricting caller-supplied paths would be surprising and add little defense-in-depth. The CWD-relative default lands inside the project the agent is working on. + +### Decision 5: New `ScreenshotResult` model mirroring existing result types + +**Choice:** Add `ScreenshotResult { success: Bool, pngBase64: String?, error: String? }` to `AutomationModels.swift` with a `toDictionary()` method following the same pattern as `UiHierarchyResult`. + +**Rationale:** Consistency with the existing models; `toDictionary()` pattern is already covered by unit tests in `AutomationModelsTests.swift`. + +### Decision 6: Timeout and MCP tool registration + +**Choice:** Register in `IOSAutomationToolRegistrar` alongside other iOS tools. Use `timeoutMs = 30000` (same as `ios_get_ui_hierarchy`) because `XCUIScreen.main.screenshot()` typically returns in <1 second but base64 encoding + HTTP transport of a multi-MB payload warrants headroom. + +**Rationale:** Matches the pattern for other potentially large-payload tools. + +## Risks / Trade-offs + +- **Risk:** Base64 payloads for very large screenshots (iPad 13" in 2x mode approaches 8–10 MB raw → ~13 MB base64) may stress `HttpURLConnection` default buffer behavior or the MCP stdio transport. + - **Mitigation:** Initial scope is iPhone-sized simulators (<3 MB base64). Document the iPad limitation as a known issue; address with chunked or binary endpoint only if it surfaces in practice. + +- **Risk:** The pre-built iOS automation bundle (`ios-automation-server.tar.gz`) downloaded by `install.sh` will not contain the new `ui.screenshot` method until a new release is cut. Users running an older pre-built bundle will get a `methodNotFound` error. + - **Mitigation:** The JSON-RPC server already returns `methodNotFound` for unknown methods (see `JsonRpcServer.executeMethod` default case). The Kotlin tool should surface this cleanly with a message telling the user to upgrade or rebuild from source. A new tagged release will refresh the bundle. + +- **Risk:** Writing to an arbitrary caller-supplied path could clobber existing files. + - **Mitigation:** Document the overwrite behavior in the tool description; do not add a "confirm overwrite" flow (would break the stateless MCP tool contract). Defaults use timestamped filenames to avoid accidental overwrites. + +- **Risk:** The simulator must be unlocked/booted for `XCUIScreen.main.screenshot()` to return valid data. If the simulator is in a weird state (e.g., booting, locked), the call may return a black image rather than error. + - **Mitigation:** Accept this as a property of XCUITest; the agent can verify via `ios_get_device_info` or by inspecting the returned PNG. No extra validation in this change. + +- **Trade-off:** Not returning the image as an MCP image content block means the agent cannot "see" the screenshot directly in the tool response — it must issue a separate read/view to process the PNG. + - **Justification:** Keeps the Kotlin → MCP surface simple (string results only, like every other tool). An MCP image content response can be added later as a non-breaking enhancement if needed. diff --git a/openspec/changes/archive/2026-04-16-add-ios-screenshot/proposal.md b/openspec/changes/archive/2026-04-16-add-ios-screenshot/proposal.md new file mode 100644 index 0000000..b473af8 --- /dev/null +++ b/openspec/changes/archive/2026-04-16-add-ios-screenshot/proposal.md @@ -0,0 +1,27 @@ +## Why + +AI agents using VisionTest can inspect the iOS simulator's UI hierarchy and interactive elements, but have no way to capture a pixel-accurate image of the current screen. A screenshot capability is essential for visual verification (layout regressions, rendering bugs, image assets) and for situations where the accessibility tree does not fully describe what the user sees (custom-drawn views, web content, media). This change adds a first-class `ios_screenshot` MCP tool that captures the simulator display and saves it as a PNG file on the host machine. + +## What Changes + +- Add a new MCP tool `ios_screenshot` that captures the iOS simulator's current screen and saves it as a PNG to a caller-specified (or defaulted) path on the host filesystem. +- Extend the iOS automation server (XCUITest) with a new JSON-RPC method `ui.screenshot` that returns the PNG bytes as a base64-encoded string using `XCUIScreen.main.screenshot().pngRepresentation`. +- Extend `IOSAutomationClient` with a `screenshot()` suspend function that calls the new JSON-RPC method and returns the base64 payload. +- The tool decodes the base64 payload and writes the PNG bytes to the resolved output path, returning the absolute path in the tool result. +- Scope is iOS only — Android support is deferred to a follow-up change. + +## Capabilities + +### New Capabilities +- `ios-screenshot`: Captures a screenshot of the booted iOS simulator display via the XCUITest automation server and saves it as a PNG file on the host. + +### Modified Capabilities + + +## Impact + +- **iOS automation server (`ios-automation-server/`)** — New `screenshot()` method on `XCUITestBridge`, new `ScreenshotResult` model in `AutomationModels.swift`, new `ui.screenshot` case in `JsonRpcServer.executeMethod`. Pre-built bundle consumers (installed via `install.sh`) will need to be rebuilt/re-released for the new method to be available. +- **MCP server (`app/`)** — New `screenshot()` method on `IOSAutomationClient`, new tool registration in `IOSAutomationToolRegistrar`. No changes to shared infrastructure (`ToolFactory`, `ToolScope`, `ErrorHandler`). +- **Tests** — Unit tests for base64 decoding and file write in the Kotlin tool path; Swift unit tests for the new result model's `toDictionary()`. +- **Docs** — `CLAUDE.md` tool table needs a new row; `LEARNING.md` optionally documents the base64-over-JSON-RPC transport decision. +- **External surface** — New JSON-RPC method on the iOS automation server; no breaking changes to existing tools or endpoints. diff --git a/openspec/changes/archive/2026-04-16-add-ios-screenshot/specs/ios-screenshot/spec.md b/openspec/changes/archive/2026-04-16-add-ios-screenshot/specs/ios-screenshot/spec.md new file mode 100644 index 0000000..8e5e87e --- /dev/null +++ b/openspec/changes/archive/2026-04-16-add-ios-screenshot/specs/ios-screenshot/spec.md @@ -0,0 +1,49 @@ +## ADDED Requirements + +### Requirement: MCP tool captures iOS simulator screenshot as PNG +The MCP server SHALL expose a tool named `ios_screenshot` that captures the current booted iOS simulator display and saves the image as a PNG file on the host filesystem. + +#### Scenario: Screenshot saved to caller-supplied path +- **WHEN** an agent invokes `ios_screenshot` with parameter `outputPath` set to an absolute path ending in `.png` +- **THEN** the tool writes the PNG bytes of the current simulator display to that exact path and returns a success message containing the absolute path + +#### Scenario: Screenshot saved to default path when outputPath is omitted +- **WHEN** an agent invokes `ios_screenshot` with no `outputPath` parameter +- **THEN** the tool writes the PNG to `./screenshots/ios_screenshot_.png` resolved against the MCP server's current working directory (the user's project when launched by a coding agent, NOT the visiontest install directory) and returns the absolute path of the saved file + +#### Scenario: Parent directories are created automatically +- **WHEN** `ios_screenshot` is invoked with an `outputPath` whose parent directory does not yet exist +- **THEN** the tool creates all missing parent directories before writing the PNG + +#### Scenario: Server not running +- **WHEN** `ios_screenshot` is invoked and the iOS automation server is not reachable on its configured port +- **THEN** the tool returns an error message instructing the caller to run `ios_start_automation_server` and does NOT write any file + +### Requirement: iOS automation server exposes `ui.screenshot` JSON-RPC method +The iOS automation server SHALL accept a JSON-RPC request with method `ui.screenshot` that captures the current screen via XCUITest and returns the image bytes as a base64-encoded PNG string. + +#### Scenario: Successful screenshot capture +- **WHEN** a JSON-RPC client POSTs `{"jsonrpc":"2.0","method":"ui.screenshot","id":1}` to `/jsonrpc` while the simulator is displaying content +- **THEN** the response `result` object contains `success: true` and a non-empty `pngBase64` string whose decoded bytes start with the PNG magic header (`89 50 4E 47 0D 0A 1A 0A`) + +#### Scenario: Screenshot capture failure is reported +- **WHEN** the XCUITest screenshot API throws an error during capture +- **THEN** the response `result` object contains `success: false`, omits `pngBase64`, and includes an `error` field describing the failure + +#### Scenario: Unknown method rejected +- **WHEN** a client calls the JSON-RPC endpoint with a misspelled method name resembling `ui.screenshot` +- **THEN** the server returns a JSON-RPC error with code `methodNotFound` + +### Requirement: IOSAutomationClient provides a screenshot API +The Kotlin `IOSAutomationClient` SHALL expose a suspend function `screenshot()` that calls the `ui.screenshot` JSON-RPC method and returns the server's raw JSON response string, consistent with other client methods. + +#### Scenario: screenshot() delegates to sendRequest +- **WHEN** `IOSAutomationClient.screenshot()` is called +- **THEN** it invokes `sendRequest("ui.screenshot", null)` and returns the resulting response string unchanged + +### Requirement: Tool description documents outputPath semantics +The `ios_screenshot` tool description SHALL document the optional `outputPath` parameter, the default output location, the overwrite behavior when the path already exists, and the requirement that `ios_start_automation_server` be called first. + +#### Scenario: Tool description includes required workflow +- **WHEN** the MCP client lists the `ios_screenshot` tool +- **THEN** the returned description mentions `outputPath` (optional), the default path `./screenshots/` relative to the server's working directory (user's current project), that existing files at the target path will be overwritten, and the prerequisite of a running iOS automation server diff --git a/openspec/changes/archive/2026-04-16-add-ios-screenshot/tasks.md b/openspec/changes/archive/2026-04-16-add-ios-screenshot/tasks.md new file mode 100644 index 0000000..c2bc233 --- /dev/null +++ b/openspec/changes/archive/2026-04-16-add-ios-screenshot/tasks.md @@ -0,0 +1,32 @@ +## 1. iOS Automation Server (Swift) + +- [x] 1.1 Add `ScreenshotResult` struct to `ios-automation-server/IOSAutomationServerUITests/Models/AutomationModels.swift` with fields `success: Bool`, `pngBase64: String?`, `error: String?` and a `toDictionary()` method following the pattern of `UiHierarchyResult` +- [x] 1.2 Add a `screenshot()` method to `XCUITestBridge` that calls `XCUIScreen.main.screenshot().pngRepresentation`, base64-encodes the bytes, and returns a populated `ScreenshotResult` (checks for empty data since the XCUITest calls don't throw) +- [x] 1.3 Add a `case "ui.screenshot":` branch to `JsonRpcServer.executeMethod` that calls `bridge.screenshot().toDictionary()` +- [x] 1.4 Add unit tests for `ScreenshotResult.toDictionary()` in `ios-automation-server/IOSAutomationServerTests/AutomationModelsTests.swift` covering success (pngBase64 present, no error) and failure (error present, no pngBase64) paths + +## 2. Kotlin MCP Client + +- [x] 2.1 Add a `suspend fun screenshot(): String` method to `app/src/main/kotlin/com/example/visiontest/ios/IOSAutomationClient.kt` that calls `sendRequest("ui.screenshot", null)` and returns the raw response + +## 3. MCP Tool Registration + +- [x] 3.1 In `app/src/main/kotlin/com/example/visiontest/tools/IOSAutomationToolRegistrar.kt`, add a `registerScreenshot(scope)` call to `registerTools()` and implement a private `registerScreenshot` method that: + - Registers tool name `ios_screenshot` with `timeoutMs = 30000` + - Documents the optional `outputPath` parameter, default path under `VISIONTEST_DIR/screenshots/`, overwrite behavior, and the `ios_start_automation_server` prerequisite in the description + - Checks `iosAutomationClient.isServerRunning()` and returns the standard "server not running" message if false + - Calls `iosAutomationClient.screenshot()`, parses the JSON response to extract `result.pngBase64`, and returns an informative error if `success` is false or `pngBase64` is missing +- [x] 3.2 Resolve the output path: if the caller provided `outputPath`, use it as-is; otherwise build `./screenshots/ios_screenshot_.png` resolved against the MCP server's current working directory (the user's project), using `java.time.LocalDateTime` with a matching formatter — NOT under `VISIONTEST_DIR` which would incorrectly point at the visiontest install dir +- [x] 3.3 Create parent directories (`Files.createDirectories`), base64-decode the `pngBase64` string, and write the bytes atomically to the resolved path; return a success message containing the absolute path +- [x] 3.4 Add unit test `app/src/test/.../IOSScreenshotToolTest.kt` covering: (a) default path contains timestamped filename under screenshots/, (b) base64 is decoded correctly and written as bytes (compare against a known small PNG fixture), (c) parent directories are created when missing, (d) server-not-running short-circuits with no file write, (e) `success: false` in the response surfaces as an error and writes no file + +## 4. Documentation + +- [x] 4.1 Add an `ios_screenshot` row to the "UI Automation (iOS)" tool table in `CLAUDE.md` with a one-line description matching the tool registration +- [x] 4.2 Verify the new tool appears in the typical automation workflow section if needed (probably not, since screenshots are orthogonal to the tap/swipe flow) — confirmed: screenshots are orthogonal to the tap/swipe workflow, so no update needed there + +## 5. Verification + +- [x] 5.1 Run `./gradlew :app:test` and ensure all Kotlin tests (including the new `IOSScreenshotToolTest`) pass +- [x] 5.2 Run `xcodebuild test -project ios-automation-server/IOSAutomationServer.xcodeproj -scheme IOSAutomationServer -destination 'platform=iOS Simulator,name=iPhone 17' -only-testing:IOSAutomationServerTests` and ensure the new model test passes (used iPhone 17 — iPhone 16 not installed on this machine; all 69 tests pass including the 2 new `ScreenshotResult` tests) +- [x] 5.3 Manual end-to-end check: start the iOS automation server on a booted simulator, invoke `ios_screenshot` via the MCP server (or a direct JSON-RPC curl), and confirm the PNG file opens and renders the simulator's current display at native resolution — verified by user diff --git a/openspec/specs/ios-screenshot/spec.md b/openspec/specs/ios-screenshot/spec.md new file mode 100644 index 0000000..8e5e87e --- /dev/null +++ b/openspec/specs/ios-screenshot/spec.md @@ -0,0 +1,49 @@ +## ADDED Requirements + +### Requirement: MCP tool captures iOS simulator screenshot as PNG +The MCP server SHALL expose a tool named `ios_screenshot` that captures the current booted iOS simulator display and saves the image as a PNG file on the host filesystem. + +#### Scenario: Screenshot saved to caller-supplied path +- **WHEN** an agent invokes `ios_screenshot` with parameter `outputPath` set to an absolute path ending in `.png` +- **THEN** the tool writes the PNG bytes of the current simulator display to that exact path and returns a success message containing the absolute path + +#### Scenario: Screenshot saved to default path when outputPath is omitted +- **WHEN** an agent invokes `ios_screenshot` with no `outputPath` parameter +- **THEN** the tool writes the PNG to `./screenshots/ios_screenshot_.png` resolved against the MCP server's current working directory (the user's project when launched by a coding agent, NOT the visiontest install directory) and returns the absolute path of the saved file + +#### Scenario: Parent directories are created automatically +- **WHEN** `ios_screenshot` is invoked with an `outputPath` whose parent directory does not yet exist +- **THEN** the tool creates all missing parent directories before writing the PNG + +#### Scenario: Server not running +- **WHEN** `ios_screenshot` is invoked and the iOS automation server is not reachable on its configured port +- **THEN** the tool returns an error message instructing the caller to run `ios_start_automation_server` and does NOT write any file + +### Requirement: iOS automation server exposes `ui.screenshot` JSON-RPC method +The iOS automation server SHALL accept a JSON-RPC request with method `ui.screenshot` that captures the current screen via XCUITest and returns the image bytes as a base64-encoded PNG string. + +#### Scenario: Successful screenshot capture +- **WHEN** a JSON-RPC client POSTs `{"jsonrpc":"2.0","method":"ui.screenshot","id":1}` to `/jsonrpc` while the simulator is displaying content +- **THEN** the response `result` object contains `success: true` and a non-empty `pngBase64` string whose decoded bytes start with the PNG magic header (`89 50 4E 47 0D 0A 1A 0A`) + +#### Scenario: Screenshot capture failure is reported +- **WHEN** the XCUITest screenshot API throws an error during capture +- **THEN** the response `result` object contains `success: false`, omits `pngBase64`, and includes an `error` field describing the failure + +#### Scenario: Unknown method rejected +- **WHEN** a client calls the JSON-RPC endpoint with a misspelled method name resembling `ui.screenshot` +- **THEN** the server returns a JSON-RPC error with code `methodNotFound` + +### Requirement: IOSAutomationClient provides a screenshot API +The Kotlin `IOSAutomationClient` SHALL expose a suspend function `screenshot()` that calls the `ui.screenshot` JSON-RPC method and returns the server's raw JSON response string, consistent with other client methods. + +#### Scenario: screenshot() delegates to sendRequest +- **WHEN** `IOSAutomationClient.screenshot()` is called +- **THEN** it invokes `sendRequest("ui.screenshot", null)` and returns the resulting response string unchanged + +### Requirement: Tool description documents outputPath semantics +The `ios_screenshot` tool description SHALL document the optional `outputPath` parameter, the default output location, the overwrite behavior when the path already exists, and the requirement that `ios_start_automation_server` be called first. + +#### Scenario: Tool description includes required workflow +- **WHEN** the MCP client lists the `ios_screenshot` tool +- **THEN** the returned description mentions `outputPath` (optional), the default path `./screenshots/` relative to the server's working directory (user's current project), that existing files at the target path will be overwritten, and the prerequisite of a running iOS automation server