From 73a08cd90ac9c4877a4c0d2645782d6474d9e8c6 Mon Sep 17 00:00:00 2001 From: Alejandro Maggi Date: Tue, 3 Mar 2026 18:11:43 -0300 Subject: [PATCH] Fix scroll and scrollUntilVisible on Android Use ADB input swipe for reliable scrolling instead of Appium gestures, which are unreliable on many Android devices/emulators. Log a warning when falling back to the Appium scroll path due to missing ADB. Also distinguish "element not found" errors from infrastructure failures in scrollUntilVisible so connection errors are propagated immediately instead of being silently swallowed until all scrolls are exhausted. Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 1 + pkg/driver/uiautomator2/commands.go | 106 +++++++++++++--- pkg/driver/uiautomator2/commands_test.go | 29 +++++ pkg/driver/uiautomator2/driver_test.go | 152 +++++++++++++++++++++++ 4 files changed, 273 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a6605a..d11380d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - `runFlow: when` conditions with variable expressions (e.g., `${output.element.id}`) were never expanded, causing conditions to always evaluate as false and silently skip conditional blocks - iOS real device: `acceptAlertButtonSelector` matched "Don't Allow" instead of "Allow" — `CONTAINS[c] 'Allow'` matched both buttons, causing WDA to reject permission dialogs. Changed to `BEGINSWITH[c] 'Allow'` with `OK` fallback for older iOS versions +- Android: `scroll` and `scrollUntilVisible` did not scroll — Appium `/appium/gestures/scroll` endpoint is unreliable on many devices. Replaced with ADB `input swipe` for direct OS-level input injection (falls back to Appium if ADB is unavailable). Also added on-screen bounds verification to prevent false positives from off-screen elements in the Android view hierarchy ## [1.0.7] - 2026-02-20 diff --git a/pkg/driver/uiautomator2/commands.go b/pkg/driver/uiautomator2/commands.go index 73153be..3804672 100644 --- a/pkg/driver/uiautomator2/commands.go +++ b/pkg/driver/uiautomator2/commands.go @@ -2,6 +2,7 @@ package uiautomator2 import ( "context" + "errors" "fmt" "strconv" "strings" @@ -395,23 +396,37 @@ func (d *Driver) scroll(step *flow.ScrollStep) *core.CommandResult { direction = "down" } - // Get screen size for dynamic scroll area + // Get screen size for scroll coordinates width, height, err := d.screenSize() if err != nil { return errorResult(err, "Failed to get screen size") } - // Use most of screen for scroll area (leave margins) - area := uiautomator2.NewRect(0, height/8, width, height*3/4) - - // /appium/gestures/scroll already uses scroll semantics — no inversion needed - if err := d.client.ScrollInArea(area, direction, 0.5, 0); err != nil { + // Use ADB input swipe for reliable scrolling + if err := d.scrollBySwipe(direction, width, height); err != nil { return errorResult(err, fmt.Sprintf("Failed to scroll: %v", err)) } return successResult(fmt.Sprintf("Scrolled %s", direction), nil) } +// isElementNotFoundError returns true if the error indicates the element was simply +// not found (expected during scrolling). Returns false for infrastructure errors +// (connection refused, request failures, etc.) which should be propagated immediately. +func isElementNotFoundError(err error) bool { + if errors.Is(err, context.DeadlineExceeded) { + return true + } + msg := strings.ToLower(err.Error()) + notFoundPhrases := []string{"not found", "no elements match", "no such element", "could not be located", "context deadline exceeded"} + for _, phrase := range notFoundPhrases { + if strings.Contains(msg, phrase) { + return true + } + } + return false +} + func (d *Driver) scrollUntilVisible(step *flow.ScrollUntilVisibleStep) *core.CommandResult { direction := strings.ToLower(step.Direction) if direction == "" { @@ -428,26 +443,30 @@ func (d *Driver) scrollUntilVisible(step *flow.ScrollUntilVisibleStep) *core.Com } deadline := time.Now().Add(timeout) - // Get screen size for dynamic scroll area + // Get screen size for scroll coordinates width, height, err := d.screenSize() if err != nil { return errorResult(err, "Failed to get screen size") } - // Use most of screen for scroll area (leave margins) - area := uiautomator2.NewRect(0, height/8, width, height*3/4) - for i := 0; i < maxScrolls && time.Now().Before(deadline); i++ { // Try to find element (short timeout - includes page source fallback) _, info, err := d.findElement(step.Element, true, 1000) if err == nil && info != nil { - // Element found - return success - return successResult(fmt.Sprintf("Element found after %d scrolls", i), info) + // On Android, UIAutomator can find elements that exist in the view hierarchy + // but are off-screen (e.g., in ScrollView). Verify the element is actually + // visible on screen by checking its bounds overlap with the viewport. + if isElementOnScreen(info, width, height) { + return successResult(fmt.Sprintf("Element found after %d scrolls", i), info) + } + // Element exists in hierarchy but is off-screen - continue scrolling + } else if err != nil && info == nil && !isElementNotFoundError(err) { + return errorResult(err, "Failed to find element") } - // /appium/gestures/scroll already uses scroll semantics — no inversion needed - if err := d.client.ScrollInArea(area, direction, 0.3, 0); err != nil { - return errorResult(err, fmt.Sprintf("Failed to scroll: %v", err)) + // Use ADB input swipe for reliable scrolling (Appium gestures/scroll is unreliable) + if err := d.scrollBySwipe(direction, width, height); err != nil { + return errorResult(err, "Failed to scroll") } time.Sleep(300 * time.Millisecond) @@ -456,6 +475,63 @@ func (d *Driver) scrollUntilVisible(step *flow.ScrollUntilVisibleStep) *core.Com return errorResult(fmt.Errorf("element not found"), fmt.Sprintf("Element not found after %d scrolls", maxScrolls)) } +// scrollBySwipe performs a scroll gesture using ADB input swipe for reliability. +// Falls back to Appium gestures/scroll if ADB is not available. +func (d *Driver) scrollBySwipe(direction string, screenWidth, screenHeight int) error { + centerX := screenWidth / 2 + startY := screenHeight * 3 / 5 + endY := screenHeight * 2 / 5 + durationMs := 300 + + // Calculate swipe coordinates based on direction + // Swipe direction is opposite of scroll direction: + // scroll DOWN (see content below) = swipe finger UP + // scroll UP (see content above) = swipe finger DOWN + var fromX, fromY, toX, toY int + switch direction { + case "up": + fromX, fromY = centerX, endY + toX, toY = centerX, startY + case "down": + fromX, fromY = centerX, startY + toX, toY = centerX, endY + case "left": + centerY := screenHeight / 2 + fromX, fromY = screenWidth*2/5, centerY + toX, toY = screenWidth*3/5, centerY + case "right": + centerY := screenHeight / 2 + fromX, fromY = screenWidth*3/5, centerY + toX, toY = screenWidth*2/5, centerY + default: + fromX, fromY = centerX, startY + toX, toY = centerX, endY + } + + // Prefer ADB shell for reliable input injection + if d.device != nil { + cmd := fmt.Sprintf("input swipe %d %d %d %d %d", fromX, fromY, toX, toY, durationMs) + _, err := d.device.Shell(cmd) + return err + } + + // Fallback to Appium gestures if no ADB access — this path is unreliable + // on many Android devices/emulators, so log a warning to aid debugging. + logger.Warn("ADB not available, falling back to Appium scroll (may be unreliable)") + area := uiautomator2.NewRect(0, screenHeight/8, screenWidth, screenHeight*3/4) + return d.client.ScrollInArea(area, direction, 0.5, 0) +} + +// isElementOnScreen checks if an element's bounds overlap with the visible screen area. +// Returns false if bounds have no area (zero width or height) or are entirely off-screen. +func isElementOnScreen(info *core.ElementInfo, screenWidth, screenHeight int) bool { + b := info.Bounds + if b.Width == 0 || b.Height == 0 { + return false + } + return b.X+b.Width > 0 && b.X < screenWidth && b.Y+b.Height > 0 && b.Y < screenHeight +} + func (d *Driver) swipe(step *flow.SwipeStep) *core.CommandResult { // Check if coordinate-based swipe (percentage or absolute) if step.Start != "" && step.End != "" { diff --git a/pkg/driver/uiautomator2/commands_test.go b/pkg/driver/uiautomator2/commands_test.go index e3f6ed0..eefb363 100644 --- a/pkg/driver/uiautomator2/commands_test.go +++ b/pkg/driver/uiautomator2/commands_test.go @@ -1,6 +1,7 @@ package uiautomator2 import ( + "context" "errors" "fmt" "net/http" @@ -4175,6 +4176,34 @@ func TestScrollUntilVisibleDefaultMaxScrolls(t *testing.T) { // Verify MockUIA2Client satisfies UIA2Client at compile time. var _ UIA2Client = (*MockUIA2Client)(nil) +func TestIsElementNotFoundError(t *testing.T) { + tests := []struct { + name string + err error + expected bool + }{ + {"context deadline exceeded", context.DeadlineExceeded, true}, + {"wrapped deadline exceeded", fmt.Errorf("element 'x' not found: %w", context.DeadlineExceeded), true}, + {"element not found", fmt.Errorf("element not found"), true}, + {"no elements match", fmt.Errorf("no elements match selector"), true}, + {"no such element", fmt.Errorf("no such element: An element could not be located"), true}, + {"could not be located", fmt.Errorf("An element could not be located on the page"), true}, + {"appium deadline with no such element", fmt.Errorf("context deadline exceeded: no such element: An element could not be located on the page using the given search parameters"), true}, + {"connection refused", fmt.Errorf("connection refused"), false}, + {"send request failed", fmt.Errorf("send request failed"), false}, + {"EOF", fmt.Errorf("unexpected EOF"), false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isElementNotFoundError(tt.err) + if got != tt.expected { + t.Errorf("isElementNotFoundError(%q) = %v, want %v", tt.err, got, tt.expected) + } + }) + } +} + // Verify uiautomator2.DeviceInfo is used correctly. var _ = &uiautomator2.DeviceInfo{} diff --git a/pkg/driver/uiautomator2/driver_test.go b/pkg/driver/uiautomator2/driver_test.go index 5220b2e..d33143e 100644 --- a/pkg/driver/uiautomator2/driver_test.go +++ b/pkg/driver/uiautomator2/driver_test.go @@ -1977,6 +1977,158 @@ func TestScrollUntilVisibleScrollError(t *testing.T) { } } +func TestScrollUntilVisibleOffScreenElement(t *testing.T) { + // Element is found by UiAutomator but is off-screen (y=3000 on 2400px screen). + // scrollUntilVisible should scroll via ADB until the element moves on-screen. + findCount := 0 + server := setupMockServer(t, map[string]func(w http.ResponseWriter, r *http.Request){ + "POST /element": func(w http.ResponseWriter, r *http.Request) { + writeJSON(w, map[string]interface{}{ + "value": map[string]string{"ELEMENT": "elem-offscreen"}, + }) + }, + "GET /element/elem-offscreen/text": func(w http.ResponseWriter, r *http.Request) { + writeJSON(w, map[string]interface{}{"value": "Target"}) + }, + "GET /element/elem-offscreen/rect": func(w http.ResponseWriter, r *http.Request) { + findCount++ + // First 2 finds: element is off-screen (below viewport) + // Third find: element has scrolled into view + y := 3000 + if findCount >= 3 { + y = 500 + } + writeJSON(w, map[string]interface{}{ + "value": map[string]int{"x": 100, "y": y, "width": 200, "height": 50}, + }) + }, + "GET /source": func(w http.ResponseWriter, r *http.Request) { + writeJSON(w, map[string]interface{}{ + "value": ``, + }) + }, + }) + defer server.Close() + + shell := &MockShellExecutor{} + client := newMockHTTPClient(server.URL) + info := &core.PlatformInfo{ScreenWidth: 1080, ScreenHeight: 2400} + driver := New(client.Client, info, shell) + + step := &flow.ScrollUntilVisibleStep{ + Element: flow.Selector{ID: "target-button"}, + Direction: "UP", + } + result := driver.Execute(step) + + if !result.Success { + t.Errorf("expected success, got error: %v", result.Error) + } + // Should have issued at least 2 ADB swipe commands before finding on-screen + swipeCount := 0 + for _, cmd := range shell.commands { + if strings.HasPrefix(cmd, "input swipe") { + swipeCount++ + // Direction UP = finger moves down = fromY < toY + var fromX, fromY, toX, toY, dur int + fmt.Sscanf(cmd, "input swipe %d %d %d %d %d", &fromX, &fromY, &toX, &toY, &dur) + if fromY >= toY { + t.Errorf("scroll UP should produce finger-down swipe (fromY < toY), got fromY=%d toY=%d", fromY, toY) + } + } + } + if swipeCount < 2 { + t.Errorf("expected at least 2 ADB swipes for off-screen element, got %d", swipeCount) + } +} + +func TestScrollUntilVisibleUsesADBSwipe(t *testing.T) { + server := setupMockServer(t, map[string]func(w http.ResponseWriter, r *http.Request){ + "POST /element": func(w http.ResponseWriter, r *http.Request) { + writeJSON(w, map[string]interface{}{ + "value": map[string]string{"ELEMENT": ""}, + }) + }, + "GET /source": func(w http.ResponseWriter, r *http.Request) { + writeJSON(w, map[string]interface{}{ + "value": ``, + }) + }, + }) + defer server.Close() + + shell := &MockShellExecutor{} + client := newMockHTTPClient(server.URL) + info := &core.PlatformInfo{ScreenWidth: 1080, ScreenHeight: 2400} + driver := New(client.Client, info, shell) + + step := &flow.ScrollUntilVisibleStep{ + Element: flow.Selector{Text: "Target"}, + Direction: "down", + MaxScrolls: 1, + } + driver.Execute(step) + + if len(shell.commands) == 0 { + t.Fatal("expected ADB shell commands for scroll") + } + cmd := shell.commands[0] + if !strings.HasPrefix(cmd, "input swipe") { + t.Errorf("expected 'input swipe' command, got: %s", cmd) + } + // Direction DOWN = finger moves up = fromY > toY + var fromX, fromY, toX, toY, dur int + fmt.Sscanf(cmd, "input swipe %d %d %d %d %d", &fromX, &fromY, &toX, &toY, &dur) + if fromY <= toY { + t.Errorf("scroll DOWN should produce finger-up swipe (fromY > toY), got fromY=%d toY=%d", fromY, toY) + } +} + +func TestScrollUntilVisibleConnectionError(t *testing.T) { + // Create a server that we'll shut down mid-scroll to simulate connection failure + callCount := 0 + server := setupMockServer(t, map[string]func(w http.ResponseWriter, r *http.Request){ + "POST /element": func(w http.ResponseWriter, r *http.Request) { + callCount++ + writeJSON(w, map[string]interface{}{ + "value": map[string]string{"ELEMENT": ""}, + }) + }, + "GET /source": func(w http.ResponseWriter, r *http.Request) { + writeJSON(w, map[string]interface{}{ + "value": ``, + }) + }, + "GET /appium/device/info": func(w http.ResponseWriter, r *http.Request) { + writeJSON(w, map[string]interface{}{ + "value": map[string]interface{}{"realDisplaySize": "1080x2400"}, + }) + }, + }) + + client := newMockHTTPClient(server.URL) + info := &core.PlatformInfo{ScreenWidth: 1080, ScreenHeight: 2400} + driver := New(client.Client, info, nil) + + // Shut down the server to simulate connection failure + server.Close() + + step := &flow.ScrollUntilVisibleStep{ + Element: flow.Selector{Text: "Target"}, + Direction: "down", + MaxScrolls: 10, + } + result := driver.Execute(step) + + if result.Success { + t.Fatal("expected failure on connection error") + } + // Should get a connection error, not "element not found after N scrolls" + if strings.Contains(result.Error.Error(), "not found after") { + t.Errorf("expected connection error to be propagated, got: %s", result.Error.Error()) + } +} + // ============================================================================ // Relative Selector Tests (uses HTTP mock for anchor + page source for target) // ============================================================================