wireapp · emmaoke-w · Feb 2, 2026 · Feb 3, 2026 · Feb 6, 2026 · Feb 19, 2026
diff --git a/.github/workflows/qa-android-ui-tests.yml b/.github/workflows/qa-android-ui-tests.yml
@@ -69,6 +69,18 @@ on:
         default: ""
         type: string
 
+      rerunFailedEnabled:
+        description: "Automatically rerun only failed tests in this run."
+        required: true
+        default: true
+        type: boolean
+
+      rerunFailedCount:
+        description: "How many failed-test rerun attempts (0-3). Default is 1."
+        required: true
+        default: "1"
+        type: string
+
 permissions:
   contents: read
 
@@ -94,6 +106,14 @@ jobs:
           OLD_BUILD_NUMBER: ${{ inputs.oldBuildNumber }}
         run: bash scripts/qa_android_ui_tests/validation.sh validate-upgrade-inputs
 
+      # Validate retry toggle/count before any runner work starts.
+      - name: Validate rerun inputs
+        shell: bash
+        env:
+          RERUN_FAILED_ENABLED: ${{ inputs.rerunFailedEnabled }}
+          RERUN_FAILED_COUNT: ${{ inputs.rerunFailedCount }}
+        run: bash scripts/qa_android_ui_tests/validation.sh validate-rerun-inputs
+
       # Resolve TAGS into CI selectors and expose them as job outputs.
       - name: Resolve selector from TAGS
         id: resolve_selector
@@ -186,6 +206,10 @@ jobs:
           RESOLVED_TESTCASE_ID: ${{ needs.validate-and-resolve-inputs.outputs.resolvedTestCaseId }}
         run: bash scripts/qa_android_ui_tests/execution_setup.sh detect-target-devices
 
+      # Clear stale device-side Allure files early so setup failures cannot publish old reports.
+      - name: Clear stale Allure results on device(s)
+        run: bash scripts/qa_android_ui_tests/execution_setup.sh clear-allure-results-on-devices
+
       # Install app/test prerequisites on each selected device.
       - name: Install APK(s) on device(s)
         env:
@@ -214,27 +238,30 @@ jobs:
       - name: Resolve AndroidX Test Services APKs (for Allure TestStorage)
         run: bash scripts/qa_android_ui_tests/execution_setup.sh resolve-test-services-apks
 
-      # Run instrumentation on selected devices and stream per-device logs.
-      - name: Run UI tests (one shard per device, adb instrumentation)
+      # Run attempt 0, pull results immediately, then rerun only the still-failing tests.
+      - name: Run UI tests (auto-rerun failed tests)
         env:
           RESOLVED_TESTCASE_ID: ${{ needs.validate-and-resolve-inputs.outputs.resolvedTestCaseId }}
           RESOLVED_CATEGORY: ${{ needs.validate-and-resolve-inputs.outputs.resolvedCategory }}
           IS_UPGRADE: ${{ inputs.isUpgrade }}
+          RERUN_FAILED_ENABLED: ${{ inputs.rerunFailedEnabled }}
+          RERUN_FAILED_COUNT: ${{ inputs.rerunFailedCount }}
+          ALLURE_RESULTS_ROOT: ${{ runner.temp }}/allure-results
         run: bash scripts/qa_android_ui_tests/run_ui_tests.sh
 
       # Remove runtime secrets before report generation and publish steps.
       - name: Remove runtime secrets (before Allure/Pages)
         if: always()
         run: bash scripts/qa_android_ui_tests/reporting.sh remove-runtime-secrets
 
-      # Pull raw allure-results from each device even when tests fail.
-      - name: Pull Allure results from device(s)
+      # Fallback pull: keep this as a safety net in case per-attempt pull was interrupted.
+      - name: Pull Allure results from device(s) (fallback)
         if: always()
         env:
           OUT_DIR: ${{ runner.temp }}/allure-results
         run: bash scripts/qa_android_ui_tests/reporting.sh pull-allure-results
 
-      # Merge per-device results and attach run metadata labels.
+      # Merge all attempts into one final dataset and stamp passed_on_rerun=true where needed.
       - name: Merge Allure results (add device label)
         if: always()
         env:

diff --git a/scripts/qa_android_ui_tests/README.md b/scripts/qa_android_ui_tests/README.md
@@ -18,7 +18,7 @@ Flavor resolution is runner-driven, not hardcoded in the repo.
 
 - `validation.sh`: input validation, TAG selector parsing, and resolved value logging.
 - `execution_setup.sh`: runner prep, flavor/APK resolution, device prep, secrets fetch, and test artifact setup.
-- `run_ui_tests.sh`: instrumentation execution/sharding across connected devices.
+- `run_ui_tests.sh`: instrumentation execution/sharding plus failed-test auto-reruns (explicit per-device retry lists with even count balancing).
 - `reporting.sh`: Allure pull/merge/generate/publish plus cleanup subcommands.
 
 ## Python Helpers
@@ -27,3 +27,4 @@ Flavor resolution is runner-driven, not hardcoded in the repo.
 - `select_apks.py`: resolve NEW/OLD APK keys based on input/build selection rules.
 - `fetch_secrets_json.py`: build runtime `secrets.json` from 1Password vault items.
 - `merge_allure_results.py`: merge per-device Allure outputs and attach metadata.
+- `extract_failed_tests.py`: extract failed test IDs (`Class#method`) from one attempt's Allure result files.
diff --git a/scripts/qa_android_ui_tests/__pycache__/extract_failed_tests.cpython-313.pyc b/scripts/qa_android_ui_tests/__pycache__/extract_failed_tests.cpython-313.pyc
diff --git a/scripts/qa_android_ui_tests/__pycache__/merge_allure_results.cpython-313.pyc b/scripts/qa_android_ui_tests/__pycache__/merge_allure_results.cpython-313.pyc
diff --git a/scripts/qa_android_ui_tests/execution_setup.sh b/scripts/qa_android_ui_tests/execution_setup.sh
@@ -4,7 +4,7 @@ set -euo pipefail
 # Set up runner, device, and app prerequisites for qa-android-ui-tests workflow.
 
 usage() {
-  echo "Usage: $0 {ensure-required-tools|resolve-flavor|download-apks|detect-target-devices|install-apks-on-devices|fetch-runtime-secrets|build-test-apk|resolve-test-apk-path|resolve-test-services-apks}" >&2
+  echo "Usage: $0 {ensure-required-tools|resolve-flavor|download-apks|detect-target-devices|clear-allure-results-on-devices|install-apks-on-devices|fetch-runtime-secrets|build-test-apk|resolve-test-apk-path|resolve-test-services-apks}" >&2
   exit 2
 }
 
@@ -169,6 +169,17 @@ detect_target_devices() {
   echo "Using ${device_count} device(s)"
 }
 
+clear_allure_results_on_devices() {
+  : "${DEVICE_LIST:?DEVICE_LIST missing}"
+
+  read -ra DEVICES <<< "${DEVICE_LIST}"
+  for serial in "${DEVICES[@]}"; do
+    adb -s "${serial}" wait-for-device
+    # Clear stale device-side Allure files before the workflow reaches any later setup step that might fail.
+    adb -s "${serial}" shell "rm -rf '/sdcard/googletest/test_outputfiles/allure-results' && mkdir -p '/sdcard/googletest/test_outputfiles/allure-results'" >/dev/null 2>&1 || true
+  done
+}
+
 install_apks_on_devices() {
   : "${DEVICE_LIST:?DEVICE_LIST missing}"
   : "${APP_ID:?APP_ID missing}"
@@ -301,9 +312,48 @@ resolve_test_services_apks() {
   test_services_apk="$(find_newest "*test-services*.apk" "${roots[@]}")"
   orchestrator_apk="$(find_newest "*orchestrator*.apk" "${roots[@]}")"
 
+  read_version_from_catalog() {
+    local key="$1"
+    awk -F'"' -v wanted="${key}" '$1 ~ ("^" wanted " *= *$") { print $2; exit }' gradle/libs.versions.toml
+  }
+
+  download_from_google_maven() {
+    local group_path="$1"
+    local artifact="$2"
+    local version="$3"
+    local out_dir="${RUNNER_TEMP:-/tmp}/androidx-test-apks"
+    local out_path="${out_dir}/${artifact}-${version}.apk"
+
+    mkdir -p "${out_dir}"
+    curl -fsSL \
+      -o "${out_path}" \
+      "https://dl.google.com/dl/android/maven2/${group_path}/${artifact}/${version}/${artifact}-${version}.apk"
+    echo "${out_path}"
+  }
+
+  # On a clean/self-hosted runner, these APK artifacts may not exist in cache yet.
+  # If cache lookup misses, download them directly from the official Google Maven repository.
   if [[ -z "${test_services_apk}" || ! -f "${test_services_apk}" ]]; then
-    echo "ERROR: Could not locate AndroidX Test Services APK in Gradle cache."
+    local test_services_version
+    test_services_version="$(read_version_from_catalog "androidx-test-services")"
+    if [[ -n "${test_services_version}" ]]; then
+      test_services_apk="$(download_from_google_maven "androidx/test/services" "test-services" "${test_services_version}")"
+    fi
+  fi
+
+  if [[ -z "${orchestrator_apk}" || ! -f "${orchestrator_apk}" ]]; then
+    local orchestrator_version
+    orchestrator_version="$(read_version_from_catalog "androidx-test-orchestrator")"
+    if [[ -n "${orchestrator_version}" ]]; then
+      orchestrator_apk="$(download_from_google_maven "androidx/test" "orchestrator" "${orchestrator_version}")"
+    fi
+  fi
+
+  if [[ -z "${test_services_apk}" || ! -f "${test_services_apk}" ]]; then
+    echo "ERROR: Could not locate or download AndroidX Test Services APK."
     echo "This APK is required for Allure TestStorage (content://androidx.test.services.storage...)."
+    printf 'Searched cache roots:\n' >&2
+    printf '  - %s\n' "${roots[@]}" >&2
     exit 1
   fi
 
@@ -326,6 +376,9 @@ case "${1:-}" in
   detect-target-devices)
     detect_target_devices
     ;;
+  clear-allure-results-on-devices)
+    clear_allure_results_on_devices
+    ;;
   install-apks-on-devices)
     install_apks_on_devices
     ;;

diff --git a/scripts/qa_android_ui_tests/extract_failed_tests.py b/scripts/qa_android_ui_tests/extract_failed_tests.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+"""Extract failed test IDs (Class#method) from one attempt of Allure results."""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+from pathlib import Path
+
+attempt_dir = Path(os.environ["ATTEMPT_RESULTS_DIR"])
+failed_output = Path(os.environ["FAILED_TESTS_FILE"])
+
+if not attempt_dir.is_dir():
+    print(f"ERROR: ATTEMPT_RESULTS_DIR does not exist: {attempt_dir}", file=sys.stderr)
+    sys.exit(1)
+
+FAILED_STATUSES = {"failed", "broken", "unknown"}
+
+
+def test_id_from_labels(data: dict) -> str:
+    labels = data.get("labels", [])
+    if not isinstance(labels, list):
+        return ""
+    class_name = ""
+    method_name = ""
+    for label in labels:
+        if not isinstance(label, dict):
+            continue
+        name = label.get("name")
+        value = label.get("value")
+        if not isinstance(value, str):
+            continue
+        if name == "testClass" and not class_name:
+            class_name = value.strip()
+        elif name == "testMethod" and not method_name:
+            method_name = value.strip()
+    if class_name and method_name:
+        return f"{class_name}#{method_name}"
+    return ""
+
+
+def test_id_from_full_name(data: dict) -> str:
+    full_name = data.get("fullName")
+    if not isinstance(full_name, str):
+        return ""
+    full_name = full_name.strip()
+    if not full_name:
+        return ""
+    if "#" in full_name:
+        return full_name
+    if "." not in full_name:
+        return ""
+    class_name, method_name = full_name.rsplit(".", 1)
+    class_name = class_name.strip()
+    method_name = method_name.strip()
+    if class_name and method_name:
+        return f"{class_name}#{method_name}"
+    return ""
+
+
+def resolve_test_id(data: dict) -> str:
+    return test_id_from_labels(data) or test_id_from_full_name(data)
+
+
+def result_dirs(base: Path) -> list[Path]:
+    out = []
+    for device_dir in sorted(p for p in base.iterdir() if p.is_dir()):
+        candidate = device_dir / "allure-results"
+        out.append(candidate if candidate.is_dir() else device_dir)
+    return out
+
+
+failed = set()
+executed = set()
+
+for src_dir in result_dirs(attempt_dir):
+    for result_file in sorted(src_dir.glob("*-result.json")):
+        try:
+            data = json.loads(result_file.read_text(encoding="utf-8"))
+        except Exception:
+            continue
+        test_id = resolve_test_id(data)
+        if not test_id:
+            continue
+        executed.add(test_id)
+        status = data.get("status")
+        if isinstance(status, str) and status in FAILED_STATUSES:
+            failed.add(test_id)
+
+failed_output.parent.mkdir(parents=True, exist_ok=True)
+failed_output.write_text("\n".join(sorted(failed)) + ("\n" if failed else ""), encoding="utf-8")
+
+print(f"executed={len(executed)}")
+print(f"failed={len(failed)}")