Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ Both automation servers expose `GET /health` and `POST /jsonrpc` (JSON-RPC 2.0)
| `android_input_text` | Type text into the currently focused element |
| `android_press_back` | Press the back button |
| `android_press_home` | Press the home button |
| `android_screenshot` | Capture the device display and save as a PNG file (optional `outputPath`; defaults to `./screenshots/` in the project's CWD) |

### UI Automation (iOS)
| Tool | Description |
Expand All @@ -121,6 +122,7 @@ Both automation servers expose `GET /health` and `POST /jsonrpc` (JSON-RPC 2.0)
| `ios_get_device_info` | Get display size, rotation, and iOS version |
| `ios_input_text` | Type text into the currently focused element |
| `ios_press_home` | Press home button |
| `ios_screenshot` | Capture the simulator display and save as a PNG file (optional `outputPath`; defaults to `./screenshots/` in the project's CWD) |
| `ios_stop_automation_server` | Stop the running XCUITest server |

### Typical Automation Workflow
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@ class AutomationClient(
return sendRequest("ui.dumpHierarchy")
}

/**
* Captures the current device display as a PNG and returns the raw JSON-RPC response.
* The response `result` contains `success: Boolean`, `pngBase64: String?`, and `error: String?`.
*/
suspend fun screenshot(): String {
return sendRequest("ui.screenshot")
}

/**
* Gets device information.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,14 @@ class IOSAutomationClient(
return sendRequest("device.pressHome")
}

/**
* Captures a screenshot of the current simulator display.
* Returns the raw JSON-RPC response containing a base64-encoded PNG in `result.pngBase64`.
*/
suspend fun screenshot(): String {
return sendRequest("ui.screenshot")
}

/**
* Finds an element by selector. Returns element info if found.
* @param text Exact text match
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,18 @@ import com.example.visiontest.android.AutomationClient
import com.example.visiontest.common.DeviceConfig
import com.example.visiontest.config.AutomationConfig
import com.example.visiontest.discovery.ToolDiscovery
import com.google.gson.JsonParser
import io.modelcontextprotocol.kotlin.sdk.Tool
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.delay
import kotlinx.coroutines.withContext
import java.io.File
import java.io.IOException
import java.nio.file.Files
import java.nio.file.StandardCopyOption
import java.time.LocalDateTime
import java.time.format.DateTimeFormatter
import java.util.Base64

class AndroidAutomationToolRegistrar(
private val android: DeviceConfig,
Expand All @@ -31,6 +39,7 @@ class AndroidAutomationToolRegistrar(
registerInputText(scope)
registerGetDeviceInfo(scope)
registerGetInteractiveElements(scope)
registerScreenshot(scope)
}

private fun registerInstallAutomationServer(scope: ToolScope) {
Expand Down Expand Up @@ -531,4 +540,161 @@ class AndroidAutomationToolRegistrar(
automationClient.getInteractiveElements(includeDisabled)
}
}

private fun registerScreenshot(scope: ToolScope) {
scope.tool(
name = "android_screenshot",
description = """
Captures a screenshot of the current Android device display and saves it as a PNG file on the host.
The automation server must be running first (use start_automation_server).

OPTIONAL PARAMETERS:
- outputPath: Absolute or relative path where the PNG will be written.
Relative paths resolve against the MCP server's working directory (typically the
user's current project). If the file already exists it will be overwritten.
Missing parent directories are created automatically.
If omitted, saves to ./screenshots/android_screenshot_<yyyyMMdd_HHmmss>.png relative to
the server's working directory (i.e. the current project, not the visiontest install dir).

Returns the absolute path of the saved PNG.
""".trimIndent(),
timeoutMs = 30000
) { request ->
captureScreenshot(request.optionalString("outputPath"))
}
}

internal suspend fun captureScreenshot(outputPath: String?): String {
if (!automationClient.isServerRunning()) {
return "Automation server is not running. Use 'start_automation_server' first."
}

val response = automationClient.screenshot()
val root = try {
JsonParser.parseString(response).asJsonObject
} catch (e: Exception) {
return "Screenshot failed: unable to parse response from Android automation server (${e.message})."
}

val errorElement = root.get("error")
if (errorElement != null && !errorElement.isJsonNull) {
if (errorElement.isJsonObject) {
val errorObj = errorElement.asJsonObject
val codeElement = errorObj.get("code")
val code = if (codeElement?.isJsonPrimitive == true && codeElement.asJsonPrimitive.isNumber) {
codeElement.asInt
} else null
val messageElement = errorObj.get("message")
val message = if (messageElement?.isJsonPrimitive == true && messageElement.asJsonPrimitive.isString) {
messageElement.asString
} else "unknown error"
if (code == JSON_RPC_METHOD_NOT_FOUND) {
return "Screenshot failed: the Android automation server does not recognize 'ui.screenshot' " +
"(JSON-RPC methodNotFound). This indicates an outdated Android automation server APK " +
"— rebuild from source or update the installed APK."
}
return if (code != null) {
"Screenshot failed: Android automation server returned error ($code): $message"
} else {
"Screenshot failed: Android automation server returned an error: $message"
}
}
return "Screenshot failed: Android automation server returned a malformed error envelope."
}

val resultElement = root.get("result")
if (resultElement == null || resultElement.isJsonNull) {
return "Screenshot failed: response missing 'result' object."
}
if (!resultElement.isJsonObject) {
return "Screenshot failed: response 'result' is not a JSON object."
}
val result = resultElement.asJsonObject

val successElement = result.get("success")
if (successElement == null || successElement.isJsonNull || !successElement.isJsonPrimitive) {
return "Screenshot failed: response 'result' has a missing or non-primitive 'success' field."
}
val successPrimitive = successElement.asJsonPrimitive
if (!successPrimitive.isBoolean) {
return "Screenshot failed: response 'result.success' is not a boolean (got: $successElement)."
}
if (!successPrimitive.asBoolean) {
val errorElement = result.get("error")
val error = if (errorElement != null && !errorElement.isJsonNull && errorElement.isJsonPrimitive && errorElement.asJsonPrimitive.isString) {
errorElement.asString
} else {
"unknown error"
}
return "Screenshot failed on the Android automation server: $error"
}

val pngBase64Element = result.get("pngBase64")
if (pngBase64Element == null || pngBase64Element.isJsonNull) {
return "Screenshot failed: response missing 'pngBase64'. This may indicate an outdated Android automation server APK — rebuild from source or update the installed APK."
}
if (!pngBase64Element.isJsonPrimitive || !pngBase64Element.asJsonPrimitive.isString) {
return "Screenshot failed: response 'result.pngBase64' is not a string (got: $pngBase64Element)."
}
val pngBase64 = pngBase64Element.asString
if (pngBase64.isEmpty()) {
return "Screenshot failed: response missing 'pngBase64'. This may indicate an outdated Android automation server APK — rebuild from source or update the installed APK."
}

val targetFile = resolveScreenshotPath(outputPath)
return writeScreenshot(targetFile, pngBase64)
}

internal fun resolveScreenshotPath(outputPath: String?): File {
if (outputPath != null && outputPath.isNotBlank()) {
return File(outputPath).absoluteFile
}
val timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"))
return File("screenshots/android_screenshot_$timestamp.png").absoluteFile
}

internal suspend fun writeScreenshot(target: File, pngBase64: String): String = withContext(Dispatchers.IO) {
val bytes = try {
Base64.getDecoder().decode(pngBase64)
} catch (e: IllegalArgumentException) {
return@withContext "Screenshot failed: Android automation server returned invalid base64 PNG data (${e.message})."
}

val targetPath = target.toPath()
val parentDir = target.parentFile
?: return@withContext "Screenshot failed: cannot determine parent directory for ${target.absolutePath}."

try {
Files.createDirectories(parentDir.toPath())
} catch (e: IOException) {
return@withContext "Screenshot failed: unable to create parent directory ${parentDir.absolutePath} (${e.message})."
}

val tempFile = try {
Files.createTempFile(parentDir.toPath(), ".android_screenshot_", ".png.tmp")
} catch (e: IOException) {
return@withContext "Screenshot failed: unable to create temp file in ${parentDir.absolutePath} (${e.message})."
}

try {
Files.write(tempFile, bytes)
try {
Files.move(tempFile, targetPath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE)
} catch (_: java.nio.file.AtomicMoveNotSupportedException) {
Files.move(tempFile, targetPath, StandardCopyOption.REPLACE_EXISTING)
}
"Screenshot saved to ${target.absolutePath}"
} catch (e: IOException) {
runCatching { Files.deleteIfExists(tempFile) }
"Screenshot failed: unable to write PNG to ${target.absolutePath} (${e.message})."
} catch (e: Exception) {
runCatching { Files.deleteIfExists(tempFile) }
throw e
Comment thread
docer1990 marked this conversation as resolved.
}
}

companion object {
/** Standard JSON-RPC 2.0 error code for an unknown method. */
private const val JSON_RPC_METHOD_NOT_FOUND = -32601
}
}
Loading
Loading