Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ Both automation servers expose `GET /health` and `POST /jsonrpc` (JSON-RPC 2.0)
| `ios_get_device_info` | Get display size, rotation, and iOS version |
| `ios_input_text` | Type text into the currently focused element |
| `ios_press_home` | Press home button |
| `ios_screenshot` | Capture the simulator display and save as a PNG file (optional `outputPath`; defaults to `./screenshots/` in the project's CWD) |
| `ios_stop_automation_server` | Stop the running XCUITest server |

### Typical Automation Workflow
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,14 @@ class IOSAutomationClient(
return sendRequest("device.pressHome")
}

/**
* Captures a screenshot of the current simulator display.
* Returns the raw JSON-RPC response containing a base64-encoded PNG in `result.pngBase64`.
*/
suspend fun screenshot(): String {
return sendRequest("ui.screenshot")
}

/**
* Finds an element by selector. Returns element info if found.
* @param text Exact text match
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,19 @@ import com.example.visiontest.common.DeviceConfig
import com.example.visiontest.config.IOSAutomationConfig
import com.example.visiontest.discovery.ToolDiscovery
import com.example.visiontest.ios.IOSAutomationClient
import com.google.gson.JsonParser
import io.modelcontextprotocol.kotlin.sdk.Tool
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.delay
import kotlinx.coroutines.withContext
import org.slf4j.Logger
import java.io.File
import java.io.IOException
import java.nio.file.Files
import java.nio.file.StandardCopyOption
import java.time.LocalDateTime
import java.time.format.DateTimeFormatter
import java.util.Base64

class IOSAutomationToolRegistrar(
private val ios: DeviceConfig,
Expand All @@ -32,6 +40,7 @@ class IOSAutomationToolRegistrar(
registerGetDeviceInfo(scope)
registerPressHome(scope)
registerInputText(scope)
registerScreenshot(scope)
registerStopAutomationServer(scope)
}

Expand Down Expand Up @@ -467,6 +476,179 @@ class IOSAutomationToolRegistrar(
}
}

private fun registerScreenshot(scope: ToolScope) {
scope.tool(
name = "ios_screenshot",
description = """
Captures a screenshot of the current iOS simulator display and saves it as a PNG file on the host.
The iOS automation server must be running first (use ios_start_automation_server).

OPTIONAL PARAMETERS:
- outputPath: Absolute or relative path where the PNG will be written.
Relative paths resolve against the MCP server's working directory (typically the
user's current project). If the file already exists it will be overwritten.
Missing parent directories are created automatically.
If omitted, saves to ./screenshots/ios_screenshot_<yyyyMMdd_HHmmss>.png relative to
the server's working directory (i.e. the current project, not the visiontest install dir).

Returns the absolute path of the saved PNG.
""".trimIndent(),
timeoutMs = 30000
) { request ->
captureScreenshot(request.optionalString("outputPath"))
}
}

internal suspend fun captureScreenshot(outputPath: String?): String {
if (!iosAutomationClient.isServerRunning()) {
return "iOS automation server is not running. Use 'ios_start_automation_server' first."
}

val response = iosAutomationClient.screenshot()
val root = try {
JsonParser.parseString(response).asJsonObject
} catch (e: Exception) {
return "Screenshot failed: unable to parse response from iOS automation server (${e.message})."
}

// JSON-RPC 2.0 envelope: either `result` OR `error` is present at the top level.
// Check `error` first so we can surface the server's message and map `methodNotFound`
// to the outdated-bundle guidance (older bundles won't know about `ui.screenshot`).
val errorElement = root.get("error")
if (errorElement != null && !errorElement.isJsonNull) {
if (errorElement.isJsonObject) {
val errorObj = errorElement.asJsonObject
val codeElement = errorObj.get("code")
val code = if (codeElement?.isJsonPrimitive == true && codeElement.asJsonPrimitive.isNumber) {
codeElement.asInt
} else null
val messageElement = errorObj.get("message")
val message = if (messageElement?.isJsonPrimitive == true) {
messageElement.asString
} else "unknown error"
if (code == JSON_RPC_METHOD_NOT_FOUND) {
return "Screenshot failed: the iOS automation server does not recognize 'ui.screenshot' " +
"(JSON-RPC methodNotFound). This indicates an outdated iOS automation server bundle " +
"— rebuild from source or update the installed bundle."
}
return if (code != null) {
"Screenshot failed: iOS automation server returned error ($code): $message"
} else {
"Screenshot failed: iOS automation server returned an error: $message"
}
}
return "Screenshot failed: iOS automation server returned a malformed error envelope."
}

val resultElement = root.get("result")
if (resultElement == null || resultElement.isJsonNull) {
return "Screenshot failed: response missing 'result' object."
}
if (!resultElement.isJsonObject) {
return "Screenshot failed: response 'result' is not a JSON object."
}
val result = resultElement.asJsonObject

val successElement = result.get("success")
if (successElement == null || successElement.isJsonNull || !successElement.isJsonPrimitive) {
return "Screenshot failed: response 'result' has a missing or non-primitive 'success' field."
}
val successPrimitive = successElement.asJsonPrimitive
if (!successPrimitive.isBoolean) {
return "Screenshot failed: response 'result.success' is not a boolean (got: $successElement)."
}
if (!successPrimitive.asBoolean) {
val errorElement = result.get("error")
val error = if (errorElement != null && !errorElement.isJsonNull && errorElement.isJsonPrimitive && errorElement.asJsonPrimitive.isString) {
errorElement.asString
} else {
"unknown error"
}
return "Screenshot failed on the iOS automation server: $error"
}

val pngBase64Element = result.get("pngBase64")
if (pngBase64Element == null || pngBase64Element.isJsonNull) {
return "Screenshot failed: response missing 'pngBase64'. This may indicate an outdated iOS automation server bundle — rebuild from source or update the installed bundle."
}
if (!pngBase64Element.isJsonPrimitive || !pngBase64Element.asJsonPrimitive.isString) {
return "Screenshot failed: response 'result.pngBase64' is not a string (got: $pngBase64Element)."
}
val pngBase64 = pngBase64Element.asString
if (pngBase64.isEmpty()) {
return "Screenshot failed: response missing 'pngBase64'. This may indicate an outdated iOS automation server bundle — rebuild from source or update the installed bundle."
}

val targetFile = resolveScreenshotPath(outputPath)
return writeScreenshot(targetFile, pngBase64)
}

internal fun resolveScreenshotPath(outputPath: String?): File {
if (outputPath != null && outputPath.isNotBlank()) {
return File(outputPath).absoluteFile
}
val timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"))
// Default to the MCP server's working directory so screenshots land in the
// user's current project (not the visiontest install dir). Coding agents like
// Claude Code launch the server with CWD set to the project they're working on.
return File("screenshots/ios_screenshot_$timestamp.png").absoluteFile
}

/**
* Decodes the base64 PNG and writes it atomically to [target].
* Runs on Dispatchers.IO so we don't block the tool handler's coroutine context.
* Writes to a sibling temp file first, then moves into place so a failure or cancellation
* mid-write cannot leave a partial PNG at [target].
*
* Returns a user-facing result string (success or a specific error message).
*/
internal suspend fun writeScreenshot(target: File, pngBase64: String): String = withContext(Dispatchers.IO) {
val bytes = try {
Base64.getDecoder().decode(pngBase64)
} catch (e: IllegalArgumentException) {
return@withContext "Screenshot failed: iOS automation server returned invalid base64 PNG data (${e.message})."
}

val targetPath = target.toPath()
val parentDir = target.parentFile
?: return@withContext "Screenshot failed: cannot determine parent directory for ${target.absolutePath}."

try {
Files.createDirectories(parentDir.toPath())
} catch (e: IOException) {
return@withContext "Screenshot failed: unable to create parent directory ${parentDir.absolutePath} (${e.message})."
}

val tempFile = try {
Files.createTempFile(parentDir.toPath(), ".ios_screenshot_", ".png.tmp")
} catch (e: IOException) {
return@withContext "Screenshot failed: unable to create temp file in ${parentDir.absolutePath} (${e.message})."
}

try {
Files.write(tempFile, bytes)
// ATOMIC_MOVE isn't guaranteed across filesystems, but tempFile is a sibling of
// target so they're on the same FS. Fall back to plain replace on rare failures.
try {
Files.move(tempFile, targetPath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE)
} catch (_: java.nio.file.AtomicMoveNotSupportedException) {
Files.move(tempFile, targetPath, StandardCopyOption.REPLACE_EXISTING)
}
"Screenshot saved to ${target.absolutePath}"
} catch (e: IOException) {
Files.deleteIfExists(tempFile)
"Screenshot failed: unable to write PNG to ${target.absolutePath} (${e.message})."
} catch (e: Exception) {
Files.deleteIfExists(tempFile)
throw e
}
}

companion object {
/** Standard JSON-RPC 2.0 error code for an unknown method. */
private const val JSON_RPC_METHOD_NOT_FOUND = -32601
}

private fun registerStopAutomationServer(scope: ToolScope) {
scope.tool(
name = "ios_stop_automation_server",
Expand Down
Loading
Loading