From 817ccfce6fd5b100501c317080209db081697a54 Mon Sep 17 00:00:00 2001 From: Sam Edwards Date: Mon, 15 Jun 2026 12:10:42 -0400 Subject: [PATCH] Upstream 2026.06.15 --- .trailblaze-sync | 1 + docs/generated/external-config.md | 2 +- .../functions/custom/assertVisible.md | 4 +- docs/generated/functions/custom/clearText.md | 35 ++ examples/playwright-electron/build.gradle.kts | 11 +- .../sample-app/provision-electron.sh | 36 ++ gradle/libs.versions.toml | 2 + .../agent/blaze/BlazeGoalPlanner.kt | 35 ++ .../trailblaze/agent/blaze/ReflectionNode.kt | 115 +++++ .../agent/blaze/ReflectionNodeTest.kt | 145 ++++++ .../mcp/handlers/RunYamlRequestHandler.kt | 13 +- .../mcp/handlers/RunYamlRequestHandlerTest.kt | 35 ++ .../AccessibilityDeviceManager.kt | 121 ++++- .../TrailblazeAccessibilityService.kt | 85 +++- .../trailblaze/tools/clearText.tool.yaml | 2 + .../AssertVisibleBySelectorTrailblazeTool.kt | 182 +++++++- .../commands/AssertVisibleTrailblazeTool.kt | 23 +- .../commands/ClearTextTrailblazeTool.kt | 84 ++++ .../commands/InputTextTrailblazeTool.kt | 12 +- .../toolcalls/commands/TapTrailblazeTool.kt | 18 + .../toolcalls/commands/TextMatchMode.kt | 15 + .../commands/VolatileTextDetector.kt | 56 +++ .../AssertVisibleTextMatchModeTest.kt | 434 ++++++++++++++++++ .../AssertVisibleTrailblazeToolTest.kt | 75 +++ .../commands/ClearTextTrailblazeToolTest.kt | 80 ++++ .../trailblaze/util/AndroidHostAdbUtils.kt | 21 +- .../util/PrecompiledApkInstaller.kt | 24 +- .../util/AndroidHostAdbUtilsTest.kt | 40 ++ .../util/PrecompiledApkInstallerTest.kt | 26 ++ .../java/xyz/block/trailblaze/cli/McpProxy.kt | 6 +- .../OnDeviceRpcDeviceScreenStream.kt | 12 +- .../scripting/DaemonScriptedToolBundler.kt | 18 +- .../LazyYamlScriptedToolRegistration.kt | 18 +- .../block/trailblaze/ui/MainTrailblazeApp.kt | 17 +- .../trailblaze/ui/TrailblazeDesktopUtil.kt | 3 + .../trailblaze/ui/TrailblazeDeviceManager.kt | 48 +- .../DaemonScriptedToolBundlerTest.kt | 29 ++ .../api/AndroidCompactElementList.kt | 83 +++- .../api/TrailblazeNodeSelectorMinimizer.kt | 21 + .../trailblaze/toolsets/core_interaction.yaml | 1 + .../api/AndroidCompactElementListTest.kt | 171 +++++++ ...lectorGeneratorAndroidAccessibilityTest.kt | 41 +- .../TrailblazeNodeSelectorMinimizerTest.kt | 136 ++++++ .../quickjs/tools/QuickJsToolSerializer.kt | 11 +- .../quickjs/tools/QuickJsTrailblazeTool.kt | 29 ++ .../report/GenerateTestResultsCliCommand.kt | 3 + .../trailblaze/report/models/SessionResult.kt | 3 + .../block/trailblaze/report/utils/LogsRepo.kt | 81 +++- .../block/trailblaze/logs/server/SslConfig.kt | 5 +- .../trailblaze/mcp/newtools/StepToolSet.kt | 12 +- .../ui/models/TrailblazeServerState.kt | 8 + 51 files changed, 2414 insertions(+), 74 deletions(-) create mode 100644 .trailblaze-sync create mode 100644 docs/generated/functions/custom/clearText.md create mode 100755 examples/playwright-electron/sample-app/provision-electron.sh create mode 100644 trailblaze-common/src/commonMain/resources/trails/config/trailmaps/trailblaze/tools/clearText.tool.yaml create mode 100644 trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/ClearTextTrailblazeTool.kt create mode 100644 trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/TextMatchMode.kt create mode 100644 trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/VolatileTextDetector.kt create mode 100644 trailblaze-common/src/jvmAndAndroidTest/kotlin/xyz/block/trailblaze/toolcalls/commands/AssertVisibleTextMatchModeTest.kt create mode 100644 trailblaze-common/src/jvmAndAndroidTest/kotlin/xyz/block/trailblaze/toolcalls/commands/ClearTextTrailblazeToolTest.kt create mode 100644 trailblaze-common/src/jvmTest/kotlin/xyz/block/trailblaze/util/PrecompiledApkInstallerTest.kt diff --git a/.trailblaze-sync b/.trailblaze-sync new file mode 100644 index 00000000..0b22831d --- /dev/null +++ b/.trailblaze-sync @@ -0,0 +1 @@ +593c43b070fc9636da56b078c7beb062d44bc242 diff --git a/docs/generated/external-config.md b/docs/generated/external-config.md index 527eceb5..821ad699 100644 --- a/docs/generated/external-config.md +++ b/docs/generated/external-config.md @@ -156,7 +156,7 @@ Toolsets are declared in `trailmaps//toolsets/*.yaml`. They are pure YAML gr | `android_primitives` | Yes | `android-ondevice-accessibility`, `android-ondevice-instrumentation` | 5 | | `compose_core` | No | `compose` | 6 | | `compose_verification` | No | `compose` | 2 | -| `core_interaction` | Yes | `android-ondevice-accessibility`, `android-ondevice-instrumentation`, `ios-host` | 16 | +| `core_interaction` | Yes | `android-ondevice-accessibility`, `android-ondevice-instrumentation`, `ios-host` | 17 | | `memory` | No | `all drivers` | 8 | | `meta` | Yes | `all drivers` | 2 | | `mobile_primitives` | Yes | `android-ondevice-accessibility`, `android-ondevice-instrumentation`, `ios-host` | 4 | diff --git a/docs/generated/functions/custom/assertVisible.md b/docs/generated/functions/custom/assertVisible.md index fee6090f..87aa89e7 100644 --- a/docs/generated/functions/custom/assertVisible.md +++ b/docs/generated/functions/custom/assertVisible.md @@ -4,7 +4,7 @@ # `assertVisible` -Assert an element is visible on screen by its ref ID from the snapshot. Use the short hash ref shown in square brackets (e.g., y778 from [y778] "Network & internet"). These refs are stable across captures of the same screen. +Assert an element is visible on screen by its ref ID from the snapshot. Use the short hash ref shown in square brackets (e.g., y778 from [y778] "Network & internet"). These refs are stable across captures of the same screen. Optionally pass `expectedText` to also verify the element's rendered text — use it whenever the case asks to verify a specific value (e.g. "verify the checkout button shows $5.00", "expect status to be Active") instead of just confirming the element exists. ## Source @@ -26,6 +26,8 @@ Assert an element is visible on screen by its ref ID from the snapshot. Use the ### Optional parameters +- `expectedText` — `String` + Optional. When set, asserts the resolved element's rendered text equals this value after whitespace trimming (case-sensitive). Pass the stable rendered text verbatim — e.g. "Charge $5.00", not "the checkout button". Exclude volatile state that changes run-to-run (live item counts like "3 items", timestamps, quantities); pin only the part that stays constant. Leave null when only the element's presence matters. - `reasoning` — `String` ## Output diff --git a/docs/generated/functions/custom/clearText.md b/docs/generated/functions/custom/clearText.md new file mode 100644 index 00000000..633c5fdd --- /dev/null +++ b/docs/generated/functions/custom/clearText.md @@ -0,0 +1,35 @@ + + + + +# `clearText` + +Clear all text from the currently focused text field. Use BEFORE `inputText` when you need +to replace whatever's already in a field (search bar, amount field, form input). Takes no +parameters — the tool reads the field's current length from the view hierarchy. + +Prefer this over `eraseText` whenever your intent is "wipe the field, then type fresh". Use +`eraseText` only when you genuinely need to remove a specific number of trailing characters +(e.g. backspacing one digit off an amount). + +## Source + +- Kind: class-backed +- Class: `xyz.block.trailblaze.toolcalls.commands.ClearTextTrailblazeTool` + +## Contract + +- Visible to LLM: yes (`surface_to_llm: true`) +- Recordable: yes (`is_recordable: true`) +- Host-only: no (`requires_host: false`) + +## Input schema + +_(no parameters)_ + +## Output + +Returns: `string` (opaque text content) + +Typed result schemas (`kind: query | action`, MCP `structuredContent`) are not yet carried by the resolved manifest — this section will gain detail when that lands. + diff --git a/examples/playwright-electron/build.gradle.kts b/examples/playwright-electron/build.gradle.kts index 262cf6fe..7de88613 100644 --- a/examples/playwright-electron/build.gradle.kts +++ b/examples/playwright-electron/build.gradle.kts @@ -33,9 +33,18 @@ val npmInstallElectron by tasks.registering(Exec::class) { onlyIf { gradle.taskGraph.hasTask(tasks.test.get()) } } +val downloadElectronBinary by tasks.registering(Exec::class) { + description = "Download the Electron platform binary" + workingDir = sampleAppDir + commandLine("sh", "provision-electron.sh") + dependsOn(npmInstallElectron) + outputs.upToDateWhen { false } + onlyIf { gradle.taskGraph.hasTask(tasks.test.get()) } +} + tasks.test { useJUnitPlatform() - dependsOn(npmInstallElectron) + dependsOn(downloadElectronBinary) workingDir = rootProject.projectDir.resolve("opensource") // Pass paths to the Electron binary and app directory diff --git a/examples/playwright-electron/sample-app/provision-electron.sh b/examples/playwright-electron/sample-app/provision-electron.sh new file mode 100755 index 00000000..387d73b6 --- /dev/null +++ b/examples/playwright-electron/sample-app/provision-electron.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# Ensure Electron's prebuilt platform binary is present under node_modules/electron/dist. +# +# npm >=11.16 no longer runs Electron's postinstall (the step that downloads this binary), so we fetch +# it explicitly. If a download base URL has been written to /tmp/electron-download-base (some CI agents +# set this because their egress can't reach github.com release assets), pull and extract the archive +# from there; otherwise fall back to Electron's own installer, which downloads from github.com. +set -euo pipefail +cd "$(dirname "$0")" + +base_file=/tmp/electron-download-base +rm -rf node_modules/electron/dist node_modules/electron/path.txt + +if [ -f "$base_file" ]; then + base="$(cat "$base_file")" + ver="$(node -p "require('electron/package.json').version")" + plat="$(node -p "process.platform + '-' + process.arch")" + curl -fsSL -o /tmp/electron-archive.zip "${base}v${ver}/electron-v${ver}-${plat}.zip" + size="$(wc -c < /tmp/electron-archive.zip)" + if [ "$size" -lt 50000000 ]; then + printf 'electron archive too small (%s bytes) — the download base did not serve a binary\n' "$size" >&2 + head -c 200 /tmp/electron-archive.zip >&2 + exit 1 + fi + mkdir -p node_modules/electron/dist + unzip -q -o /tmp/electron-archive.zip -d node_modules/electron/dist + case "$(node -p process.platform)" in + darwin) printf 'Electron.app/Contents/MacOS/Electron' > node_modules/electron/path.txt ;; + win32) printf 'electron.exe' > node_modules/electron/path.txt ;; + *) printf 'electron' > node_modules/electron/path.txt ;; + esac +else + node node_modules/electron/install.js +fi + +test -f node_modules/electron/path.txt diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 183037a5..a4cfa4a4 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -44,6 +44,7 @@ kotlinx-serialization = "1.10.0" ktor = "3.3.3" logback = "1.5.21" maestro = "2.3.0" +micrometer = "1.15.12" mcp-sdk = "0.11.1" multiplatformMarkdownRenderer = "0.39.2" multiplatformSettings = "1.3.0" @@ -125,6 +126,7 @@ ktor-client-okhttp = { module = "io.ktor:ktor-client-okhttp", version.ref = "kto ktor-client-core-jvm = { module = "io.ktor:ktor-client-core-jvm", version.ref = "ktor" } ktor-client-core = { module = "io.ktor:ktor-client-core", version.ref = "ktor" } ktor-client-js = { module = "io.ktor:ktor-client-js", version.ref = "ktor" } +ktor-client-mock = { module = "io.ktor:ktor-client-mock", version.ref = "ktor" } kotlin-csv = { module = "com.github.doyaaaaaken:kotlin-csv-jvm", version.ref = "kotlin-csv" } kotlinpoet = { module = "com.squareup:kotlinpoet", version.ref = "kotlinpoet" } ktor-http = { module = "io.ktor:ktor-http", version.ref = "ktor" } diff --git a/trailblaze-agent/src/main/java/xyz/block/trailblaze/agent/blaze/BlazeGoalPlanner.kt b/trailblaze-agent/src/main/java/xyz/block/trailblaze/agent/blaze/BlazeGoalPlanner.kt index c7e758f5..790b8698 100644 --- a/trailblaze-agent/src/main/java/xyz/block/trailblaze/agent/blaze/BlazeGoalPlanner.kt +++ b/trailblaze-agent/src/main/java/xyz/block/trailblaze/agent/blaze/BlazeGoalPlanner.kt @@ -741,6 +741,41 @@ class BlazeGoalPlanner( ) } + // Step 5b: Target discipline — when the step's named target isn't among the tappable + // refs, don't let the LLM tap an unrelated clickable (the Hardware-Hub trap). Either the + // target is scrolled off (affordance present → scroll toward it) or it isn't on this + // screen at all (surface the wrong-screen signal and stop). + val screenText = screenState.viewHierarchyTextRepresentation + when ( + detectTargetMissingRecovery( + objective = currentObjective, + screenText = screenText, + recommendedTool = analysis.recommendedTool, + ) + ) { + // Skip the distractor tap and steer the NEXT analysis to scroll the named direction. + // buildProgressSummary surfaces the latest reflection note into the next iteration's + // prompt, so this directive reaches the analyzer before it chooses again. + TargetMissingRecovery.SCROLL_TO_REVEAL -> { + val direction = extractTargetPhrase(currentObjective) + ?.let { scrollDirectionFromAffordance(it, screenText) } + ?: "down" + return state.copy( + iteration = state.iteration + 1, + reflectionNotes = state.reflectionNotes + + "[Target discipline] The named target is off-screen. Do NOT tap an unrelated " + + "element — scroll $direction to reveal it, then act on it.", + ) + } + TargetMissingRecovery.WRONG_SCREEN -> return state.copy( + stuck = true, + stuckReason = WRONG_SCREEN_MESSAGE, + screenSummary = analysis.screenSummary, + iteration = state.iteration + 1, + ) + TargetMissingRecovery.PROCEED -> Unit + } + // Step 6: Add low-confidence reflection note before execution val stateForExecution = if (config.enableReflection && analysis.confidence == Confidence.LOW) { val reflectionNote = "Low confidence on iteration ${state.iteration + 1}: " + diff --git a/trailblaze-agent/src/main/java/xyz/block/trailblaze/agent/blaze/ReflectionNode.kt b/trailblaze-agent/src/main/java/xyz/block/trailblaze/agent/blaze/ReflectionNode.kt index c87c478a..461ed143 100644 --- a/trailblaze-agent/src/main/java/xyz/block/trailblaze/agent/blaze/ReflectionNode.kt +++ b/trailblaze-agent/src/main/java/xyz/block/trailblaze/agent/blaze/ReflectionNode.kt @@ -353,6 +353,121 @@ private data class ProgressInfo( val correction: String? = null, ) +/** + * Outcome of the target-discipline check: when the step's named target is absent from the + * tappable refs, the agent must recover rather than tap an unrelated distractor. + * + * @see detectTargetMissingRecovery + */ +enum class TargetMissingRecovery { + /** Target is present (or no named target) — proceed with the recommended action. */ + PROCEED, + + /** A scroll-to-reveal affordance for the target is on screen — scroll toward it, then retry. */ + SCROLL_TO_REVEAL, + + /** No target and no affordance anywhere — likely the wrong screen; surface and stop. */ + WRONG_SCREEN, +} + +/** Human-facing message surfaced when the target is not on this screen at all. */ +const val WRONG_SCREEN_MESSAGE: String = + "target not found on this screen — you may be on the wrong screen; this step may need to be revised" + +/** Tap-style tool name fragments. Matches `tap`, `tapOnElementByNodeId`, `compose_click`, etc. */ +private val TAP_TOOL_FRAGMENTS = listOf("tap", "click") + +/** + * Decides whether the agent should avoid tapping an unrelated clickable when the step's named + * target isn't present among the tappable refs (the Hardware-Hub trap). + * + * Only engages for tap-style actions — scrolls, inputs, waits, and status calls are left alone. + * The decision is driven entirely by the compact snapshot text the analyzer saw: + * - The target text appears in the snapshot → [TargetMissingRecovery.PROCEED] (the LLM can act). + * - The target is absent but a `(scroll … to reveal)` affordance is present → + * [TargetMissingRecovery.SCROLL_TO_REVEAL] (the offscreen-target flavor: scroll, don't tap). + * - The target is absent and no affordance exists → [TargetMissingRecovery.WRONG_SCREEN] + * (the no-target flavor: surface the wrong-screen signal and stop). + * + * @param objective The step's natural-language instruction (e.g. "Tap the search field"). + * @param screenText The compact view-hierarchy snapshot text the analyzer was shown. + * @param recommendedTool The tool the analyzer chose this iteration. + */ +fun detectTargetMissingRecovery( + objective: String, + screenText: String?, + recommendedTool: String, +): TargetMissingRecovery { + val tool = recommendedTool.lowercase() + if (TAP_TOOL_FRAGMENTS.none { it in tool }) return TargetMissingRecovery.PROCEED + + val target = extractTargetPhrase(objective) ?: return TargetMissingRecovery.PROCEED + + // Absent compact text is not evidence of a wrong screen: drivers like Android HOST mode + // intentionally leave viewHierarchyTextRepresentation null and feed the analyzer a JSON/tree + // fallback instead. Don't intercept when we can't see the snapshot. + val text = screenText ?: return TargetMissingRecovery.PROCEED + + // The target counts as "present and tappable" only when it appears on a line carrying a + // ref marker (e.g. `[c596]`, `[n12]`). Static labels, container headers, and the + // non-tappable `(scroll … to reveal)` affordance must NOT satisfy the present check. + val lines = text.lines() + if (lines.any { it.containsRefMarker() && it.contains(target, ignoreCase = true) }) { + return TargetMissingRecovery.PROCEED + } + + val affordanceLines = lines.filter { it.contains("to reveal)", ignoreCase = true) } + return if (affordanceLines.any { it.contains(target, ignoreCase = true) }) { + TargetMissingRecovery.SCROLL_TO_REVEAL + } else { + TargetMissingRecovery.WRONG_SCREEN + } +} + +/** + * Matches a compact element-list ref marker — one letter, 1-3 digits, optional collision + * suffix letter, e.g. `[a1]`, `[k42]`, `[z103]`, `[k42b]` (see [xyz.block.trailblaze.api.ElementRef]). + * Deliberately excludes state annotations like `[checked]`, `[disabled]`, `[id=…]`. + */ +private val REF_MARKER = Regex("\\[[a-z]\\d{1,3}[a-z]?]", RegexOption.IGNORE_CASE) + +private fun String.containsRefMarker(): Boolean = REF_MARKER.containsMatchIn(this) + +/** + * Reads the scroll direction ("up" / "down") from the target's `(scroll … to reveal)` + * affordance line, so the recovery note can name the concrete direction. Defaults to "down". + */ +fun scrollDirectionFromAffordance(target: String, screenText: String?): String { + val line = screenText?.lines()?.firstOrNull { + it.contains("to reveal)", ignoreCase = true) && it.contains(target, ignoreCase = true) + } ?: return "down" + return if (line.contains("scroll up", ignoreCase = true)) "up" else "down" +} + +/** + * Extracts the named target phrase from a step instruction so it can be checked against the + * snapshot. Prefers an explicitly quoted phrase; otherwise takes the noun phrase after a + * leading action verb (tap/select/find/search/open/choose). Returns null when no specific + * target can be identified (e.g. "go back", "scroll down") — those steps are left to proceed. + */ +internal fun extractTargetPhrase(objective: String): String? { + Regex("[\"“']([^\"”']{2,})[\"”']").find(objective)?.let { return it.groupValues[1].trim() } + + val verb = Regex( + "^\\s*(?:tap|click|select|find|search\\s+for|open|choose|press)\\s+(?:on\\s+|the\\s+)*(.+)$", + RegexOption.IGNORE_CASE, + ).find(objective) ?: return null + + return verb.groupValues[1] + .trim() + .removeSuffix(".") + .removeSuffix(" field") + .removeSuffix(" button") + .removeSuffix(" icon") + .trim() + .takeIf { it.length >= 2 } +} + /** * Determines whether reflection should be triggered based on state. * diff --git a/trailblaze-agent/src/test/kotlin/xyz/block/trailblaze/agent/blaze/ReflectionNodeTest.kt b/trailblaze-agent/src/test/kotlin/xyz/block/trailblaze/agent/blaze/ReflectionNodeTest.kt index aa38b051..8458f183 100644 --- a/trailblaze-agent/src/test/kotlin/xyz/block/trailblaze/agent/blaze/ReflectionNodeTest.kt +++ b/trailblaze-agent/src/test/kotlin/xyz/block/trailblaze/agent/blaze/ReflectionNodeTest.kt @@ -475,6 +475,151 @@ class ReflectionNodeTest { assertNotNull(result.progressAssessment) } + // ====== TARGET DISCIPLINE (Hardware-Hub trap) ====== + + @Test + fun `target absent with scroll affordance recommends scrolling not tapping`() { + val screen = """ + [c100] ImageView "Open Hardware Hub" + (scroll up to reveal) EditText "Search all items" + """.trimIndent() + + val result = detectTargetMissingRecovery( + objective = "Tap the \"Search all items\" field", + screenText = screen, + recommendedTool = "tapOnElementByNodeId", + ) + + assertEquals(TargetMissingRecovery.SCROLL_TO_REVEAL, result) + } + + @Test + fun `target absent with no affordance surfaces wrong-screen signal`() { + val screen = """ + [c100] ImageView "Open Hardware Hub" + [c101] Button "Charge" + """.trimIndent() + + val result = detectTargetMissingRecovery( + objective = "Tap the \"Search all items\" field", + screenText = screen, + recommendedTool = "tap", + ) + + assertEquals(TargetMissingRecovery.WRONG_SCREEN, result) + } + + @Test + fun `target present on screen proceeds with the action`() { + val screen = """ + [c100] ImageView "Open Hardware Hub" + [c101] EditText "Search all items" + """.trimIndent() + + val result = detectTargetMissingRecovery( + objective = "Tap the \"Search all items\" field", + screenText = screen, + recommendedTool = "tap", + ) + + assertEquals(TargetMissingRecovery.PROCEED, result) + } + + @Test + fun `non-tap actions are never intercepted`() { + val screen = "[c100] ImageView \"Open Hardware Hub\"" + + val result = detectTargetMissingRecovery( + objective = "Tap the \"Search all items\" field", + screenText = screen, + recommendedTool = "scroll", + ) + + assertEquals(TargetMissingRecovery.PROCEED, result) + } + + @Test + fun `objective with no identifiable target proceeds`() { + val screen = "[c100] ImageView \"Open Hardware Hub\"" + + val result = detectTargetMissingRecovery( + objective = "go back", + screenText = screen, + recommendedTool = "tap", + ) + + assertEquals(TargetMissingRecovery.PROCEED, result) + } + + @Test + fun `null screen text is not treated as wrong screen`() { + // Drivers like Android HOST mode leave viewHierarchyTextRepresentation null and feed + // the analyzer a tree/JSON fallback — absence of compact text must not stop the step. + val result = detectTargetMissingRecovery( + objective = "Tap the \"Save\" button", + screenText = null, + recommendedTool = "tap", + ) + + assertEquals(TargetMissingRecovery.PROCEED, result) + } + + @Test + fun `target on a non-interactive label line without a ref does not count as present`() { + // A static label "Search all items" with no ref marker is not tappable, so the guard + // must not let the distractor tap through — with no affordance it's a wrong screen. + val screen = """ + [c100] ImageView "Open Hardware Hub" + "Search all items" + """.trimIndent() + + val result = detectTargetMissingRecovery( + objective = "Tap the \"Search all items\" field", + screenText = screen, + recommendedTool = "tap", + ) + + assertEquals(TargetMissingRecovery.WRONG_SCREEN, result) + } + + @Test + fun `state annotation brackets are not mistaken for ref markers`() { + // `[disabled]` is a state annotation, not a ref marker; a target on an annotated but + // unreffed line still must not count as a tappable ref. + val screen = """ + [c100] ImageView "Open Hardware Hub" + TextView "Search all items" [disabled] + """.trimIndent() + + val result = detectTargetMissingRecovery( + objective = "Tap the \"Search all items\" field", + screenText = screen, + recommendedTool = "tap", + ) + + assertEquals(TargetMissingRecovery.WRONG_SCREEN, result) + } + + @Test + fun `scroll direction is read from the affordance line`() { + val screenUp = "(scroll up to reveal) EditText \"Search all items\"" + val screenDown = "(scroll down to reveal) EditText \"Search all items\"" + + assertEquals("up", scrollDirectionFromAffordance("Search all items", screenUp)) + assertEquals("down", scrollDirectionFromAffordance("Search all items", screenDown)) + assertEquals("down", scrollDirectionFromAffordance("Search all items", null)) + } + + @Test + fun `extractTargetPhrase prefers quoted phrase`() { + assertEquals("Search all items", extractTargetPhrase("Tap the \"Search all items\" field")) + } + + @Test + fun `extractTargetPhrase falls back to noun phrase after verb`() { + assertEquals("Library", extractTargetPhrase("Open the Library button")) + } + @Test fun `should calculate success rate correctly`() { val actions = listOf( diff --git a/trailblaze-android-ondevice-mcp/src/main/java/xyz/block/trailblaze/mcp/handlers/RunYamlRequestHandler.kt b/trailblaze-android-ondevice-mcp/src/main/java/xyz/block/trailblaze/mcp/handlers/RunYamlRequestHandler.kt index d4f0fe42..bb096650 100644 --- a/trailblaze-android-ondevice-mcp/src/main/java/xyz/block/trailblaze/mcp/handlers/RunYamlRequestHandler.kt +++ b/trailblaze-android-ondevice-mcp/src/main/java/xyz/block/trailblaze/mcp/handlers/RunYamlRequestHandler.kt @@ -370,7 +370,14 @@ class RunYamlRequestHandler( setCurrentJob(job) if (request.awaitCompletion) { - val resolved = withTimeoutOrNull(OnDeviceRpcTimeouts.HANDLER_AWAIT_CAP_MS) { outcome.await() } + // The launched job lives in the standalone backgroundScope; tie it to this await so an + // originating HTTP-call cancellation cancels it instead of stalling to the handler cap. + val resolved = try { + withTimeoutOrNull(OnDeviceRpcTimeouts.HANDLER_AWAIT_CAP_MS) { outcome.await() } + } catch (e: CancellationException) { + job.cancel(CancellationException("Originating HTTP call cancelled before completion")) + throw e + } if (resolved == null) { // Hard cap so a hung tool can't tie up an HTTP socket indefinitely. Cancel the job // and launch a background cleanup: the launched block's catch path exits via @@ -434,6 +441,10 @@ class RunYamlRequestHandler( // progress events. Enforced by RunYamlResponse's init-block require. Pass `emptyMap()` // explicitly (rather than relying on the default) for symmetry with the sync paths. RpcResult.Success(RunYamlResponse(sessionId = session.sessionId, memorySnapshot = emptyMap())) + } catch (e: CancellationException) { + // Caller disconnect (the HTTP call was cancelled) isn't a test failure — propagate the + // cancellation instead of recording a failed session. Mirrors the launched-job catch above. + throw e } catch (e: Exception) { sessionManager.endSession( session = session, diff --git a/trailblaze-android-ondevice-mcp/src/test/java/xyz/block/trailblaze/mcp/handlers/RunYamlRequestHandlerTest.kt b/trailblaze-android-ondevice-mcp/src/test/java/xyz/block/trailblaze/mcp/handlers/RunYamlRequestHandlerTest.kt index 2e77551f..c20e2d78 100644 --- a/trailblaze-android-ondevice-mcp/src/test/java/xyz/block/trailblaze/mcp/handlers/RunYamlRequestHandlerTest.kt +++ b/trailblaze-android-ondevice-mcp/src/test/java/xyz/block/trailblaze/mcp/handlers/RunYamlRequestHandlerTest.kt @@ -259,6 +259,41 @@ class RunYamlRequestHandlerTest { ) } + /** + * Defense-in-depth for the 15-min stall: when the originating HTTP call coroutine is + * cancelled (host crash / adb-forward break) while awaiting completion, the handler must + * cancel the launched job rather than letting it run to [HANDLER_AWAIT_CAP_MS]. The + * launched job lives in the standalone backgroundScope, so it only unwinds if `handle` + * actively cancels it on its own cancellation. + */ + @Test + fun `originating call cancellation cancels the launched job before the handler cap`() = runTest { + val hungCallbackReleased = CompletableDeferred() + val handler = createHandler( + runTrailblazeYaml = { _, _, _ -> + try { + awaitCancellation() + } finally { + hungCallbackReleased.complete(Unit) + } + }, + ) + + val dispatch = async { handler.handle(testRequest.copy(awaitCompletion = true)) } + + // Let the launched job start and the await begin, then simulate the originating HTTP + // call dropping — well before HANDLER_AWAIT_CAP_MS. + advanceUntilIdle() + dispatch.cancel() + advanceUntilIdle() + + assertTrue( + hungCallbackReleased.isCompleted, + "Launched job was not cancelled when the originating call was cancelled — it would " + + "have stalled to HANDLER_AWAIT_CAP_MS", + ) + } + /** * Fire-and-forget dispatch (`awaitCompletion = false`): handler must return immediately * with `success = null` (the fire-and-forget sentinel) and an empty `memorySnapshot` diff --git a/trailblaze-android/src/main/java/xyz/block/trailblaze/android/accessibility/AccessibilityDeviceManager.kt b/trailblaze-android/src/main/java/xyz/block/trailblaze/android/accessibility/AccessibilityDeviceManager.kt index a8c1a761..2b52279a 100644 --- a/trailblaze-android/src/main/java/xyz/block/trailblaze/android/accessibility/AccessibilityDeviceManager.kt +++ b/trailblaze-android/src/main/java/xyz/block/trailblaze/android/accessibility/AccessibilityDeviceManager.kt @@ -285,10 +285,15 @@ class AccessibilityDeviceManager( * `waitForIdle()` is not a blind sleep — it returns immediately once the event stream has * been quiet for the platform's idle window. No content events fire → no extra wait. */ - private inline fun dispatchAndAwaitSettleBlocking(action: () -> R): R = try { - action() - } finally { - awaitSettle() + private inline fun dispatchAndAwaitSettleBlocking(action: () -> R): R { + // If a prior hideKeyboard() left the soft IME in SHOW_MODE_HIDDEN, restore SHOW_MODE_AUTO + // before the next dispatch so subsequent inputText/tap actions see a normally-behaving IMF. + TrailblazeAccessibilityService.restoreSoftKeyboardIfPending() + return try { + action() + } finally { + awaitSettle() + } } /** Taps at the given coordinates and waits for the UI to settle. */ @@ -462,11 +467,18 @@ class AccessibilityDeviceManager( // --- Keyboard --- /** - * Dismisses the soft IME via [TrailblazeAccessibilityService.hideKeyboard], which gates - * GLOBAL_ACTION_BACK on [isKeyboardVisible] so it only fires when there's actually a - * keyboard to dismiss. Throws on failure rather than silently logging — earlier versions - * fell back to ACTION_CLEAR_FOCUS, which is a no-op against modern soft keyboards and - * caused trails to report success without dismissing anything. + * Dismisses the soft IME via [TrailblazeAccessibilityService.hideKeyboard]. The dispatch + * step (sending GLOBAL_ACTION_BACK through the accessibility service) is fatal on + * failure — that means the accessibility service rejected the action, which is real + * framework misuse. + * + * The post-check (waiting for the IME window to actually leave) is best-effort: a + * Compose `BackHandler` registered on a modal can consume BACK before the IME framework + * sees it, and the IME stays up. The framework asked for a hide and did everything it + * can. Log a warning and return — the caller decides what to do next. The downstream + * safety net for this is [executeTapOnElement]'s pre-tap IME-occlusion check, which + * catches the silent-mis-tap case at the actual point of impact (the tap), not at the + * housekeeping that came before it. */ fun hideKeyboard() { val dispatched = dispatchAndAwaitSettleBlocking { TrailblazeAccessibilityService.hideKeyboard() } @@ -480,7 +492,11 @@ class AccessibilityDeviceManager( // FLAG_DONT_SUPPRESS_ACCESSIBILITY_SERVICES would otherwise let a stuck keyboard // pass the post-check). See `waitForImeDismissed` for the full rationale. if (!TrailblazeAccessibilityService.waitForImeDismissed(timeoutMs = HIDE_KEYBOARD_POST_CHECK_TIMEOUT_MS)) { - error("hideKeyboard failed: IME window still present after dismissal attempt") + Console.log( + "[hideKeyboard] IME window still present after dismissal attempt — proceeding. " + + "If a subsequent tap target falls inside the IME's window bounds, the pre-tap " + + "occlusion check will fail there with a clear error.", + ) } } @@ -542,6 +558,81 @@ class AccessibilityDeviceManager( // --- Private helpers --- + /** + * Pre-tap safety net for the case where the soft IME refused to dismiss (e.g. a Compose + * modal that consumes BACK before the IME framework sees it). If the resolved tap + * coordinate falls inside the live IME window's screen bounds, dispatching the tap there + * would land the touch on the keyboard instead of the intended target — the silent + * mis-tap codex correctly worried about when we made [hideKeyboard]'s post-check + * non-fatal. + * + * Two layers of detection: + * 1. `imeWindowBoundsInScreen()` gives us the IME's bounds when accessibility window + * enumeration is healthy. We do a precise contains-point check. + * 2. When `imeWindowBoundsInScreen()` returns null because window enumeration is + * degraded (some accessibility-service flag combinations leak null windows lists), + * we fall back to `isImeShownAuthoritative()` (dumpsys) — if the IME is up but we + * can't measure it, treat the tap as conservatively occluded. + * + * On detection, try one more dismissal, then re-check. If still occluded (or still + * indeterminate-but-shown), fail loudly with a message that names the target and the + * coordinate. That's the actual product failure ("the framework can't reach this + * target because the keyboard won't get out of the way") surfaced at the point where + * it actually matters. + * + * When no IME signal exists at all (the common case), this is a no-op. + */ + private fun failIfTapPointOccludedByIme(x: Int, y: Int, targetDescription: String) { + val firstSignal = imeOcclusionSignal(x, y) + if (firstSignal == null) return + Console.log( + "[ime-occlusion] resolved tap target ($x, $y) is occluded by the IME ($firstSignal) " + + "— re-attempting dismissal before failing.", + ) + try { + hideKeyboard() + } catch (e: Exception) { + // Even GLOBAL_ACTION_BACK refused — propagate so caller sees the real reason. + error( + "Tap target '$targetDescription' at ($x, $y) is occluded by the soft IME " + + "($firstSignal), and the accessibility service refused GLOBAL_ACTION_BACK: ${e.message}", + ) + } + val secondSignal = imeOcclusionSignal(x, y) + if (secondSignal != null) { + error( + "Tap target '$targetDescription' at ($x, $y) is occluded by the soft IME " + + "($secondSignal), which would not dismiss. Tap would land on the keyboard " + + "instead of the intended target. (Likely cause: a modal screen whose " + + "BackHandler consumes GLOBAL_ACTION_BACK before the IME framework sees it.)", + ) + } + Console.log( + "[ime-occlusion] retry-dismiss cleared the keyboard; resuming tap dispatch.", + ) + } + + /** + * Returns a non-null description of the occlusion signal when (x, y) is occluded by the + * IME, or null when the tap point is clear. Combines the precise window-bounds check + * (when available) with the authoritative dumpsys fallback (for the degraded-windows + * case codex correctly flagged). + */ + private fun imeOcclusionSignal(x: Int, y: Int): String? { + val bounds = TrailblazeAccessibilityService.imeWindowBoundsInScreen() + if (bounds != null) { + return if (bounds.contains(x, y)) "window bounds $bounds" else null + } + // No bounds available — windows enumeration may be degraded. Fall back to dumpsys + // for the conservative shown/not-shown signal. If shown, we can't know whether + // (x, y) is occluded specifically; conservatively treat as occluded. + return if (TrailblazeAccessibilityService.isImeShownAuthoritative()) { + "dumpsys reports IME shown but window bounds unavailable (windows enumeration degraded)" + } else { + null + } + } + /** * Resolves element via [TrailblazeNodeSelector] and taps on the matched element, * with fallback to recorded coordinates. @@ -566,6 +657,7 @@ class AccessibilityDeviceManager( val center = result.node.centerPoint() ?: error("Element matched but has no bounds: ${action.nodeSelector.description()}") Console.log("Resolved via TrailblazeNode: ${action.nodeSelector.description()} at (${center.first}, ${center.second})") + failIfTapPointOccludedByIme(center.first, center.second, action.nodeSelector.description()) tapOrLongPressOnResolvedNode(result.node, center.first, center.second, action.longPress) return ExecutionResult(resolvedX = center.first, resolvedY = center.second) } @@ -576,6 +668,7 @@ class AccessibilityDeviceManager( Console.log( "TrailblazeNode selector matched ${result.nodes.size} elements, using first at (${center.first}, ${center.second})" ) + failIfTapPointOccludedByIme(center.first, center.second, action.nodeSelector.description()) tapOrLongPressOnResolvedNode(first, center.first, center.second, action.longPress) return ExecutionResult(resolvedX = center.first, resolvedY = center.second) } @@ -599,6 +692,14 @@ class AccessibilityDeviceManager( Console.log( "Using fallback coordinates (${action.fallbackX}, ${action.fallbackY})" ) + // Apply the same pre-tap occlusion check on the coordinate-fallback path that the + // selector-resolved branches use. Without it, a recorded-coordinate tap on a + // BackHandler-modal screen would silently land on the still-visible IME. + failIfTapPointOccludedByIme( + action.fallbackX, + action.fallbackY, + "fallback coordinates for ${action.nodeSelector.description()}", + ) tapOrLongPress(action.fallbackX, action.fallbackY, action.longPress) return ExecutionResult(resolvedX = action.fallbackX, resolvedY = action.fallbackY) } else if (action.optional) { diff --git a/trailblaze-android/src/main/java/xyz/block/trailblaze/android/accessibility/TrailblazeAccessibilityService.kt b/trailblaze-android/src/main/java/xyz/block/trailblaze/android/accessibility/TrailblazeAccessibilityService.kt index 0aad9a4d..5c71293a 100644 --- a/trailblaze-android/src/main/java/xyz/block/trailblaze/android/accessibility/TrailblazeAccessibilityService.kt +++ b/trailblaze-android/src/main/java/xyz/block/trailblaze/android/accessibility/TrailblazeAccessibilityService.kt @@ -95,6 +95,7 @@ class TrailblazeAccessibilityService : AccessibilityService() { override fun onDestroy() { super.onDestroy() + restoreSoftKeyboardIfPending() accessibilityServiceInstance = null } @@ -506,8 +507,69 @@ class TrailblazeAccessibilityService : AccessibilityService() { return windows.any { it.type == AccessibilityWindowInfo.TYPE_INPUT_METHOD } } + /** + * Returns the screen-space bounds of the currently-visible soft IME window, or `null` + * if no IME window is enumerated. Used by [AccessibilityDeviceManager]'s pre-tap + * occlusion check to detect when a resolved tap coordinate is going to land on the + * keyboard instead of the intended target — the silent-mis-tap scenario that occurs + * on screens where the IME refuses to dismiss (e.g. Compose modals that consume BACK). + */ + fun imeWindowBoundsInScreen(): android.graphics.Rect? { + val windows = getServiceWindows() ?: return null + val imeWindow = windows.firstOrNull { it.type == AccessibilityWindowInfo.TYPE_INPUT_METHOD } + ?: return null + val rect = android.graphics.Rect() + imeWindow.getBoundsInScreen(rect) + return rect.takeIf { !it.isEmpty } + } + + /** + * Companion check for [imeWindowBoundsInScreen] — returns true when the soft IME is + * currently up according to the authoritative `dumpsys input_method` signal, even + * when [getServiceWindows] is degraded (returns `null` under certain accessibility + * service flags). This is the same dumpsys gate [waitForImeDismissed] uses; exposing + * it here lets the pre-tap occlusion check fail conservatively when we can't measure + * the IME's bounds but know it's present. + */ + fun isImeShownAuthoritative(): Boolean = isImeShownViaDumpsys() + + /** + * Tracks whether the most recent [hideKeyboard] call put the soft IME into + * `SHOW_MODE_HIDDEN`. Cleared on the next dispatched action (via + * [restoreSoftKeyboardIfPending]) so the suppression is scoped to "the moment after + * hideKeyboard" rather than sticking globally until the service unbinds. + */ + private val softKeyboardHideRequested = AtomicBoolean(false) + + /** + * Reads `TRAILBLAZE_IME_DISMISS_VIA_SHOW_MODE`. When set, [hideKeyboard] routes the + * dismissal through `SoftKeyboardController.setShowMode(SHOW_MODE_HIDDEN)` instead of + * a synthetic BACK key — bypassing Compose `BackHandler` callbacks that otherwise eat + * the BACK before the IME framework can hide the keyboard. Read on every call so an + * oncall can flip it on a running daemon without restarting. + */ + private fun imeDismissViaShowModeEnabled(): Boolean { + val raw = System.getenv("TRAILBLAZE_IME_DISMISS_VIA_SHOW_MODE")?.lowercase() + return raw == "1" || raw == "true" + } + + /** + * Restores the soft keyboard to `SHOW_MODE_AUTO` if a prior [hideKeyboard] put it in + * `SHOW_MODE_HIDDEN`. Called at the start of [AccessibilityDeviceManager]'s next + * dispatch and from [onDestroy]. No-op when no hide is pending or the service is gone. + */ + internal fun restoreSoftKeyboardIfPending() { + if (!softKeyboardHideRequested.compareAndSet(true, false)) return + val service = accessibilityServiceInstance ?: return + try { + service.softKeyboardController.setShowMode(AccessibilityService.SHOW_MODE_AUTO) + } catch (e: Exception) { + Console.log("[hideKeyboard] restore to SHOW_MODE_AUTO failed: ${e.message}") + } + } + fun hideKeyboard(): Boolean { - // Gate GLOBAL_ACTION_BACK on an authoritative IME-up check. Two signals, in order: + // Gate dismissal on an authoritative IME-up check. Two signals, in order: // 1. [isImeWindowVisible] — strict windows-only, in-process and cheap. // 2. `dumpsys input_method` — authoritative shell fallback for environments where // [getServiceWindows] is degraded (empty / SecurityException) under @@ -516,7 +578,26 @@ class TrailblazeAccessibilityService : AccessibilityService() { // editable after the IME is dismissed and would cause back-to-back hideKeyboard // calls to navigate back out of the current screen. if (!isImeWindowVisible() && !isImeShownViaDumpsys()) return true - return requireService().performGlobalAction(GLOBAL_ACTION_BACK) + val service = requireService() + if (imeDismissViaShowModeEnabled()) { + // SHOW_MODE_HIDDEN talks straight to ImeVisibilityStateComputer over the + // accessibility client binder — never dispatches a KeyEvent, so the modal's + // BackHandler / OnBackPressedCallback can't swallow the dismissal. Sticky until + // restored; [restoreSoftKeyboardIfPending] flips it back on the next dispatch. + val accepted = + try { + service.softKeyboardController.setShowMode(AccessibilityService.SHOW_MODE_HIDDEN) + } catch (e: Exception) { + Console.log("[hideKeyboard] setShowMode threw: ${e.message}") + false + } + if (accepted) { + softKeyboardHideRequested.set(true) + return true + } + Console.log("[hideKeyboard] SHOW_MODE_HIDDEN rejected — falling back to GLOBAL_ACTION_BACK") + } + return service.performGlobalAction(GLOBAL_ACTION_BACK) } /** diff --git a/trailblaze-common/src/commonMain/resources/trails/config/trailmaps/trailblaze/tools/clearText.tool.yaml b/trailblaze-common/src/commonMain/resources/trails/config/trailmaps/trailblaze/tools/clearText.tool.yaml new file mode 100644 index 00000000..0ad1b5c7 --- /dev/null +++ b/trailblaze-common/src/commonMain/resources/trails/config/trailmaps/trailblaze/tools/clearText.tool.yaml @@ -0,0 +1,2 @@ +id: clearText +class: xyz.block.trailblaze.toolcalls.commands.ClearTextTrailblazeTool diff --git a/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/AssertVisibleBySelectorTrailblazeTool.kt b/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/AssertVisibleBySelectorTrailblazeTool.kt index 4056f856..ac932f9a 100644 --- a/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/AssertVisibleBySelectorTrailblazeTool.kt +++ b/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/AssertVisibleBySelectorTrailblazeTool.kt @@ -14,8 +14,11 @@ import maestro.orchestra.AssertConditionCommand import maestro.orchestra.Command import maestro.orchestra.Condition import xyz.block.trailblaze.AgentMemory +import xyz.block.trailblaze.api.DriverNodeDetail import xyz.block.trailblaze.api.TrailblazeElementSelector +import xyz.block.trailblaze.api.TrailblazeNode import xyz.block.trailblaze.api.TrailblazeNodeSelector +import xyz.block.trailblaze.api.TrailblazeNodeSelectorResolver import xyz.block.trailblaze.model.NodeSelectorMode import xyz.block.trailblaze.toolcalls.MapsToMaestroCommands import xyz.block.trailblaze.toolcalls.TrailblazeToolClass @@ -74,6 +77,26 @@ data class AssertVisibleBySelectorTrailblazeTool( * entirely — Maestro's own assert timeout is always used there. */ val timeoutMs: Long? = null, + /** + * Optional value-equality check applied AFTER the visibility check passes. When set, the + * resolved element's driver-native text (text → contentDescription → accessibilityText/ + * label/ariaName, depending on the driver) must equal this string after whitespace + * trimming on both sides. Case-sensitive. When null, only presence is enforced — the + * pre-modernization behavior. + * + * Folded onto this tool (rather than a separate `assertVisibleWithText` replay class) + * so the LLM-facing surface stays one tool with one optional field instead of forking + * the family. + */ + val expectedText: String? = null, + /** + * How [expectedText] is compared against the live element text at replay. [TextMatchMode.EXACT] + * keeps the original strict-equality pin; [TextMatchMode.PREFIX] / [TextMatchMode.REGEX] let a + * capture keep a stable head while tolerating volatile tails (e.g. live item counts). Defaults + * to [TextMatchMode.EXACT] so trails recorded before this field deserialize to the original + * behavior, and (with `encodeDefaults = false`) EXACT captures don't write the field at all. + */ + val textMatchMode: TextMatchMode = TextMatchMode.EXACT, ) : MapsToMaestroCommands() { override fun toMaestroCommands(memory: AgentMemory): List { val maestroSelector = lowerToMaestroSelector(selector, nodeSelector) @@ -81,9 +104,19 @@ data class AssertVisibleBySelectorTrailblazeTool( "AssertVisibleBySelectorTrailblazeTool.toMaestroCommands called with neither " + "`selector` nor `nodeSelector` set — malformed recording.", ) - return listOf( - AssertConditionCommand(condition = Condition(visible = maestroSelector.toMaestroElementSelector())), - ) + // When expectedText is set on the legacy fallback path, narrow the Maestro selector to + // also require that text — that's the closest analogue to selector-pinned text equality + // the Maestro path can express. Drivers that support the modern node-selector path + // (accessibility, etc.) hit the richer post-pass check in execute() below. + // + // textMatchMode controls how that text becomes the Maestro textRegex: EXACT pins the full + // interpolated value (byte-identical to the original behavior); PREFIX escapes only the + // stable head so a volatile tail (e.g. live item count) can't fail the match; REGEX passes + // the value through as the regex pattern directly. + val maestroElement = maestroSelector.toMaestroElementSelector().let { base -> + if (expectedText != null) base.copy(textRegex = maestroTextRegexFor(memory)) else base + } + return listOf(AssertConditionCommand(condition = Condition(visible = maestroElement))) } override suspend fun execute( @@ -148,8 +181,151 @@ data class AssertVisibleBySelectorTrailblazeTool( ?: nodeSelector?.androidMaestro?.resourceIdRegex ?: nodeSelector?.iosMaestro?.resourceIdRegex ?: "element" + // When expectedText is set, the visibility check above only confirmed the element is + // present. Now re-resolve against a fresh tree to read the matched element's text + // and assert equality with the expected value. Soft-fall back to the visibility result + // if no tree is available (the Maestro path above already enforced textRegex, so a + // success there implies the text matched). + if (expectedText != null) { + return verifyTextEquality(toolExecutionContext, desc, result) + } return TrailblazeToolResult.Success(message = "Verified '$desc' visible") } return result } + + /** + * Post-pass text-equality check, invoked only when [expectedText] is set. The + * visibility check has already passed at this point; this method re-resolves the + * selector against the live tree, reads the matched element's driver-native text, and + * compares to [expectedText] after whitespace trimming on both sides. Returns a + * surfaceable [TrailblazeToolResult.Error] on mismatch. + */ + private fun verifyTextEquality( + toolExecutionContext: TrailblazeToolExecutionContext, + desc: String, + visibilityResult: TrailblazeToolResult, + ): TrailblazeToolResult { + val fresh = toolExecutionContext.screenStateProvider?.invoke() ?: toolExecutionContext.screenState + val tree = fresh?.trailblazeNodeTree ?: return visibilityResult + val effective = nodeSelector + ?: selector?.toTrailblazeNodeSelector(toolExecutionContext.trailblazeDeviceInfo.platform) + ?: return visibilityResult + + val matched = when ( + val r = TrailblazeNodeSelectorResolver.resolve(tree, effective) + ) { + is TrailblazeNodeSelectorResolver.ResolveResult.SingleMatch -> r.node + is TrailblazeNodeSelectorResolver.ResolveResult.MultipleMatches -> r.nodes.first() + // Visibility check said the element was there but the post-pass re-resolution + // didn't find it. Don't surface as a failure — would be flaky on drivers where + // the tree changes between the wait + re-read. + is TrailblazeNodeSelectorResolver.ResolveResult.NoMatch -> return visibilityResult + } + + val expected = toolExecutionContext.memory.interpolateVariables(expectedText!!).trim() + // Pick the candidate set whose text we compare against `expected`. The candidate + // depends on the structural predicates on the selector: + // + // - `containsChild` / `containsDescendants`: the candidate(s) are the descendants + // that satisfy the inner selector, NOT any node in the matched container's + // subtree. This binds the text check to the structurally-selected element so + // a sibling or unrelated descendant with the same text can't accidentally pass + // the assertion (Codex review on #3660). + // + // - no structural predicate: the candidate is `matched` itself. The pre-fix + // behavior (read the matched node's own text) is preserved when the selector + // directly targets a leaf. + val candidates = collectTextCandidates(matched, effective) + val foundText = candidates.asSequence() + .mapNotNull { it.extractText()?.trim() } + .firstOrNull { matchesExpected(it, expected) } + return if (foundText != null) { + TrailblazeToolResult.Success(message = "Verified '$desc' shows text='$expected'") + } else { + val candidateTexts = candidates.mapNotNull { it.extractText()?.trim() } + .filter { it.isNotBlank() } + val sample = candidateTexts.take(5).joinToString(", ") { "'$it'" } + TrailblazeToolResult.Error.ExceptionThrown( + errorMessage = "assertVisible: element matched '$desc' but expected text '$expected' " + + "not found on the selector-matched element(s). " + + (if (sample.isNotEmpty()) "Actual text(s): $sample" else "Matched element has no readable text."), + ) + } + } + + /** + * Compares a live element's [actual] text against the (already-interpolated, trimmed) + * [expected] value using [textMatchMode]. EXACT preserves the original strict equality. A + * malformed REGEX pattern is treated as a non-match (surfaced as a normal assertion failure) + * rather than thrown, so one bad hand-authored pattern can't turn replay into an infra error. + */ + private fun matchesExpected(actual: String, expected: String): Boolean = when (textMatchMode) { + TextMatchMode.EXACT -> actual == expected + TextMatchMode.PREFIX -> actual.startsWith(expected) + TextMatchMode.REGEX -> runCatching { Regex(expected).matches(actual) }.getOrDefault(false) + } + + /** + * Builds the Maestro `textRegex` for the legacy fallback path from [expectedText] under + * [textMatchMode]. EXACT pins the full interpolated value (unchanged from the original + * behavior); PREFIX escapes the stable head and allows any volatile tail (incl. newlines) + * so the count etc. can't fail the match regardless of Maestro's anchoring; REGEX forwards + * the value as the pattern, escaping it to a literal if it doesn't compile so Maestro never + * receives a malformed pattern (which would surface as an execution error, not a clean miss). + */ + private fun maestroTextRegexFor(memory: AgentMemory): String { + val interpolated = memory.interpolateVariables(expectedText!!) + return when (textMatchMode) { + TextMatchMode.EXACT -> interpolated + TextMatchMode.PREFIX -> Regex.escape(interpolated) + "[\\s\\S]*" + TextMatchMode.REGEX -> + if (runCatching { Regex(interpolated) }.isSuccess) interpolated else Regex.escape(interpolated) + } + } + + /** + * Returns the nodes whose text should be compared against `expectedText` for an + * assertVisible-with-text check. + * + * - If the selector carries `containsChild` or `containsDescendants`, those inner + * selectors identify the specific descendants the user is structurally pointing at + * — the text check binds to those. Resolving each inner against `matched.children` + * (not `matched` itself) avoids the matched outer container leaking into the candidate + * set if it happens to coincidentally satisfy the inner predicate. + * - Otherwise the matched element is itself the leaf and is the only candidate. + */ + private fun collectTextCandidates( + matched: TrailblazeNode, + selector: TrailblazeNodeSelector, + ): List { + val innerSelectors = buildList { + selector.containsChild?.let { add(it) } + selector.containsDescendants?.let { addAll(it) } + } + if (innerSelectors.isEmpty()) return listOf(matched) + val out = LinkedHashSet() + for (inner in innerSelectors) { + for (child in matched.children) { + when (val r = TrailblazeNodeSelectorResolver.resolve(child, inner)) { + is TrailblazeNodeSelectorResolver.ResolveResult.SingleMatch -> out += r.node + is TrailblazeNodeSelectorResolver.ResolveResult.MultipleMatches -> out += r.nodes + is TrailblazeNodeSelectorResolver.ResolveResult.NoMatch -> {} + } + } + } + // If a containsChild/containsDescendants predicate was present but didn't re-resolve + // (tree drift between visibility check and post-pass), fall back to the matched + // element rather than auto-passing the assertion. + return if (out.isEmpty()) listOf(matched) else out.toList() + } + + private fun TrailblazeNode.extractText(): String? = when (val d = driverDetail) { + is DriverNodeDetail.AndroidAccessibility -> d.text ?: d.contentDescription ?: d.labeledByText + is DriverNodeDetail.AndroidMaestro -> d.text ?: d.accessibilityText + is DriverNodeDetail.IosMaestro -> d.text ?: d.accessibilityText + is DriverNodeDetail.IosAxe -> d.label + is DriverNodeDetail.Web -> d.ariaName + else -> null + } } diff --git a/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/AssertVisibleTrailblazeTool.kt b/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/AssertVisibleTrailblazeTool.kt index eabddb61..600ae2a6 100644 --- a/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/AssertVisibleTrailblazeTool.kt +++ b/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/AssertVisibleTrailblazeTool.kt @@ -29,17 +29,34 @@ import xyz.block.trailblaze.viewmatcher.TapSelectorV2.findBestTrailblazeElementS @LLMDescription( "Assert an element is visible on screen by its ref ID from the snapshot. Use the " + "short hash ref shown in square brackets (e.g., y778 from [y778] \"Network & internet\"). " + - "These refs are stable across captures of the same screen.", + "These refs are stable across captures of the same screen. Optionally pass " + + "`expectedText` to also verify the element's rendered text — use it whenever the case " + + "asks to verify a specific value (e.g. \"verify the checkout button shows \$5.00\", " + + "\"expect status to be Active\") instead of just confirming the element exists.", ) data class AssertVisibleTrailblazeTool( @param:LLMDescription("The element ref from the snapshot (e.g., 'y778')") val ref: String, + @param:LLMDescription( + "Optional. When set, asserts the resolved element's rendered text equals this value " + + "after whitespace trimming (case-sensitive). Pass the stable rendered text verbatim — " + + "e.g. \"Charge \$5.00\", not \"the checkout button\". Exclude volatile state that " + + "changes run-to-run (live item counts like \"3 items\", timestamps, quantities); pin " + + "only the part that stays constant. Leave null when only the element's presence matters.", + ) + val expectedText: String? = null, override val reasoning: String? = null, ) : DelegatingTrailblazeTool, ReasoningTrailblazeTool { override fun toExecutableTrailblazeTools( executionContext: TrailblazeToolExecutionContext, ): List { + // Strip volatile state (e.g. live item counts) out of the captured expectedText before it + // becomes a strict EXACT pin. A captured "Review sale\n3 items" would fail replay whenever + // the live count differs; emitting the stable head with PREFIX keeps the value-pin without + // the brittleness. No volatile token → forward verbatim as EXACT (byte-identical to before). + val resolvedText = VolatileTextDetector.resolve(expectedText) + val screenState = executionContext.screenState ?: throw TrailblazeToolExecutionException( message = "assertVisible: No screen state available", @@ -88,6 +105,8 @@ data class AssertVisibleTrailblazeTool( AssertVisibleBySelectorTrailblazeTool( reason = reasoning, nodeSelector = accessibilityNodeSelector, + expectedText = resolvedText.expectedText, + textMatchMode = resolvedText.mode, ), ) } @@ -132,6 +151,8 @@ data class AssertVisibleTrailblazeTool( reason = reasoning, selector = selectorWithStrategy.selector, nodeSelector = nodeSelector, + expectedText = resolvedText.expectedText, + textMatchMode = resolvedText.mode, ), ) } diff --git a/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/ClearTextTrailblazeTool.kt b/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/ClearTextTrailblazeTool.kt new file mode 100644 index 00000000..73ce7130 --- /dev/null +++ b/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/ClearTextTrailblazeTool.kt @@ -0,0 +1,84 @@ +package xyz.block.trailblaze.toolcalls.commands + +import ai.koog.agents.core.tools.annotations.LLMDescription +import kotlinx.serialization.Serializable +import maestro.orchestra.EraseTextCommand +import xyz.block.trailblaze.api.ViewHierarchyTreeNode +import xyz.block.trailblaze.toolcalls.ExecutableTrailblazeTool +import xyz.block.trailblaze.toolcalls.TrailblazeToolClass +import xyz.block.trailblaze.toolcalls.TrailblazeToolExecutionContext +import xyz.block.trailblaze.toolcalls.TrailblazeToolResult + +/** + * Clears every character from the currently focused text field, regardless of how long the + * existing content is. + * + * Why this needs to be a Kotlin tool rather than a YAML wrapper around `eraseText`: + * the bare `eraseText` Maestro command, when invoked with no `charactersToErase` value, + * is clamped to 50 in every driver path — `MaestroAndroidUiAutomatorDriver` (instrumentation) + * literally `repeat(50) { pressDelete() }`, `MaestroCommandConverter` (accessibility) defaults + * to 50, the iOS axe converter defaults to 50. Anything past 50 characters in the focused + * field silently survives the erase, which then concatenates onto the next `inputText`. + * + * This tool reads the focused field's current text length from the view hierarchy snapshot + * and passes that count explicitly. Drivers see "erase exactly N characters" where N matches + * the field — accessibility clamps it down further as a no-op safety, instrumentation runs + * exactly N pressDelete calls instead of an arbitrary cap or an unbounded loop. + * + * When the focused field can't be located in the snapshot (stale screenState, non-focused + * dispatch, view hierarchy that doesn't expose `focused = true`), we fall back to + * [FALLBACK_ERASE_COUNT] — large enough to clear any realistic single-field content, + * small enough that the instrumentation `repeat(N)` loop stays under a second. + */ +@Serializable +@TrailblazeToolClass("clearText") +@LLMDescription( + """ +Clear all text from the currently focused text field. Use BEFORE `inputText` when you need +to replace whatever's already in a field (search bar, amount field, form input). Takes no +parameters — the tool reads the field's current length from the view hierarchy. + +Prefer this over `eraseText` whenever your intent is "wipe the field, then type fresh". Use +`eraseText` only when you genuinely need to remove a specific number of trailing characters +(e.g. backspacing one digit off an amount). + """, +) +data object ClearTextTrailblazeTool : ExecutableTrailblazeTool { + + override suspend fun execute(toolExecutionContext: TrailblazeToolExecutionContext): TrailblazeToolResult { + val agent = toolExecutionContext.maestroTrailblazeAgent + ?: return TrailblazeToolResult.Error.ExceptionThrown( + errorMessage = "clearText requires a Maestro-backed agent but ran in a Playwright-native context.", + ) + + val focusedLength = toolExecutionContext.screenState?.viewHierarchy?.let(::focusedEditableTextLength) + val charactersToErase = focusedLength ?: FALLBACK_ERASE_COUNT + + return agent.runMaestroCommands( + maestroCommands = listOf(EraseTextCommand(charactersToErase = charactersToErase)), + traceId = toolExecutionContext.traceId, + ) + } + + /** + * Returns the length of the currently focused editable node's text content, or null if no + * focused node carrying readable text was found in the snapshot. Picks the deepest focused + * node so a focused EditText nested under a focused container (Compose merged-semantics + * tree) wins over its parent. + */ + internal fun focusedEditableTextLength(root: ViewHierarchyTreeNode): Int? { + // aggregate() walks the tree depth-first; the LAST focused node in that order is the + // innermost one, which is the actual EditText/UITextField the user is typing into rather + // than a focused ancestor container. + val focused = root.aggregate().lastOrNull { it.focused && it.text != null } ?: return null + return focused.text?.length + } + + /** + * Used when the view hierarchy doesn't expose a focused field. Bounded so the + * instrumentation driver's `repeat(N) { pressDelete() }` loop doesn't run unboundedly when + * the snapshot is unreliable. 500 chars covers every realistic single-field value (passwords, + * usernames, addresses, amount fields, search queries). + */ + internal const val FALLBACK_ERASE_COUNT = 500 +} diff --git a/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/InputTextTrailblazeTool.kt b/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/InputTextTrailblazeTool.kt index 2956a684..2ab23e4c 100644 --- a/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/InputTextTrailblazeTool.kt +++ b/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/InputTextTrailblazeTool.kt @@ -32,11 +32,13 @@ data class InputTextTrailblazeTool( * * **Pass `false` from interactive / live-forwarding paths** (the wasm `/devices` viewer's * per-keystroke flush). The user is still typing — they don't want the keyboard - * dismissed after every word. And with the daemon's `AndroidSoftKeyboardSuppressor` - * active, there is no soft keyboard to hide; `HideKeyboardCommand` on Android falls - * through to a `BACK` keycode that navigates the current activity backwards instead. - * That's how "typing 'sam' navigated away from the screen" reproduced — Sam's repro on - * PR #3021 caught it. + * dismissed after every word. And on the accessibility driver the daemon's `inputText` + * routes through `ACTION_SET_TEXT` directly on the focused node, which sidesteps the + * soft IME entirely on the happy path (no synthesized key events ever bring up a + * keyboard window). With no soft keyboard up, `HideKeyboardCommand` falls through to + * a `BACK` keycode that navigates the current activity backwards instead. That's how + * "typing 'sam' navigated away from the screen" reproduced — Sam's repro on PR #3021 + * caught it. * * Recorded trail YAMLs continue to omit this field, so replay keeps the existing * dismiss-keyboard behavior intact. Only direct in-process callers that pass `false` diff --git a/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/TapTrailblazeTool.kt b/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/TapTrailblazeTool.kt index e2f0383a..0699773a 100644 --- a/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/TapTrailblazeTool.kt +++ b/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/TapTrailblazeTool.kt @@ -114,6 +114,24 @@ data class TapTrailblazeTool( // both the user's intent (the ref points at the meaningful element) and the OS's // touch-routing target. val accessibilityHitTestNode = tree.hitTest(center.first, center.second) ?: targetNode + // [tap-divergence] log: when hitTest at the target's centerPoint returns a + // different node than the LLM picked by `ref`, the recorded selector will + // describe the hitTest winner rather than the LLM's intended target. This is + // the documented round-trip invariant (the OS routes the actual tap at this + // coordinate too), but on screens where adjacent rows share overlapping bounds + // or a wrapper's center sits over a non-target child, the LLM's intent and + // the recorded selector diverge — producing a selector that semantically + // doesn't match the NL step. Logging both nodes here makes that observable + // in every future capture; the bug was previously invisible because the + // recorded selector "looked correct" relative to the resolved coordinate. + if (accessibilityHitTestNode.ref != targetNode.ref) { + Console.log( + "[tap-divergence] ref='$ref' wanted=${targetNode.describe()} " + + "hit_ref=${accessibilityHitTestNode.ref} " + + "hit=${accessibilityHitTestNode.describe()} " + + "reasoning=${reasoning ?: "(none)"}" + ) + } val accessibilityNodeSelector = TrailblazeNodeSelectorGenerator.findBestSelector( tree, accessibilityHitTestNode, diff --git a/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/TextMatchMode.kt b/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/TextMatchMode.kt new file mode 100644 index 00000000..dc9de858 --- /dev/null +++ b/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/TextMatchMode.kt @@ -0,0 +1,15 @@ +package xyz.block.trailblaze.toolcalls.commands + +import kotlinx.serialization.Serializable + +/** + * How an assertVisible text check compares the captured [expectedText] against the live + * element text at replay. [EXACT] is the default so trails without the field keep their + * original strict-equality behavior. + */ +@Serializable +enum class TextMatchMode { + EXACT, + PREFIX, + REGEX, +} diff --git a/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/VolatileTextDetector.kt b/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/VolatileTextDetector.kt new file mode 100644 index 00000000..a2f83efd --- /dev/null +++ b/trailblaze-common/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/toolcalls/commands/VolatileTextDetector.kt @@ -0,0 +1,56 @@ +package xyz.block.trailblaze.toolcalls.commands + +/** + * Rewrites a captured assertVisible [expectedText] so volatile run-to-run state (starting with + * live item counts) is matched by shape instead of pinned as a strict EXACT literal that fails + * replay. The stable text around the volatile span is kept verbatim (regex-escaped) and the + * volatile span is made optional — so the assertion still requires the stable text and rejects + * unrelated text in that position, but tolerates the value changing OR disappearing entirely + * (e.g. a captured "Review sale\n3 items" later showing "2 items" or just "Review sale"). + * + * Only fires when there is stable text to anchor on. A capture that is *only* a volatile token + * (e.g. "1 item") is left EXACT, so a trail that deliberately pins a count still catches a wrong + * count. Extensible by adding more [VolatileToken] entries (e.g. currency, timestamps) — kept + * deliberately minimal here. + */ +object VolatileTextDetector { + + data class ResolvedExpectedText( + val expectedText: String?, + val mode: TextMatchMode, + ) + + /** + * A volatile span to tolerate at replay. [locator] matches the span (anchored to a trailing + * subtitle position, including the line separator) so the whole thing can be dropped; + * [optionalGroup] is the regex fragment (already wrapped to be optional) substituted for it. + */ + private data class VolatileToken(val locator: Regex, val optionalGroup: String) + + /** + * Volatile tokens to tolerate. Only a trailing item-count *subtitle* for now: a count on its + * own line at the very end (e.g. "Review sale\n3 items"). Deliberately NOT any inline + * " items" — that keeps stable copy like "Buy 2 items get 1 free" or a deliberately-pinned + * count an exact assertion. Add currency / timestamp tokens here when the corpus shows them + * failing. + */ + private val volatileTokens: List = listOf( + VolatileToken( + locator = Regex("\\n\\s*\\d+\\s+items?\\s*$", RegexOption.IGNORE_CASE), + optionalGroup = "(?:\\n\\s*\\d+\\s+items?\\s*)?", + ), + ) + + fun resolve(expectedText: String?): ResolvedExpectedText { + if (expectedText == null) return ResolvedExpectedText(null, TextMatchMode.EXACT) + for (token in volatileTokens) { + val match = token.locator.find(expectedText) ?: continue + val head = expectedText.substring(0, match.range.first) + // Need stable text to anchor on; a bare count line stays an exact assertion. + if (head.isBlank()) continue + val pattern = Regex.escape(head) + token.optionalGroup + return ResolvedExpectedText(pattern, TextMatchMode.REGEX) + } + return ResolvedExpectedText(expectedText, TextMatchMode.EXACT) + } +} diff --git a/trailblaze-common/src/jvmAndAndroidTest/kotlin/xyz/block/trailblaze/toolcalls/commands/AssertVisibleTextMatchModeTest.kt b/trailblaze-common/src/jvmAndAndroidTest/kotlin/xyz/block/trailblaze/toolcalls/commands/AssertVisibleTextMatchModeTest.kt new file mode 100644 index 00000000..00eaf27c --- /dev/null +++ b/trailblaze-common/src/jvmAndAndroidTest/kotlin/xyz/block/trailblaze/toolcalls/commands/AssertVisibleTextMatchModeTest.kt @@ -0,0 +1,434 @@ +package xyz.block.trailblaze.toolcalls.commands + +import kotlin.test.assertEquals +import kotlin.test.assertIs +import kotlin.test.assertTrue +import kotlinx.coroutines.runBlocking +import kotlinx.datetime.Clock +import maestro.orchestra.AssertConditionCommand +import maestro.orchestra.Command +import org.junit.Test +import xyz.block.trailblaze.AgentMemory +import xyz.block.trailblaze.MaestroTrailblazeAgent +import xyz.block.trailblaze.api.DriverNodeDetail +import xyz.block.trailblaze.api.DriverNodeMatch +import xyz.block.trailblaze.api.ScreenState +import xyz.block.trailblaze.api.TrailblazeElementSelector +import xyz.block.trailblaze.api.TrailblazeNode +import xyz.block.trailblaze.api.TrailblazeNodeSelector +import xyz.block.trailblaze.api.ViewHierarchyTreeNode +import xyz.block.trailblaze.devices.TrailblazeDeviceClassifier +import xyz.block.trailblaze.devices.TrailblazeDeviceId +import xyz.block.trailblaze.devices.TrailblazeDeviceInfo +import xyz.block.trailblaze.devices.TrailblazeDevicePlatform +import xyz.block.trailblaze.devices.TrailblazeDriverType +import xyz.block.trailblaze.logs.client.TrailblazeLogger +import xyz.block.trailblaze.logs.client.TrailblazeSession +import xyz.block.trailblaze.logs.client.TrailblazeSessionProvider +import xyz.block.trailblaze.logs.model.SessionId +import xyz.block.trailblaze.logs.model.TraceId +import xyz.block.trailblaze.model.NodeSelectorMode +import xyz.block.trailblaze.toolcalls.TrailblazeToolExecutionContext +import xyz.block.trailblaze.toolcalls.TrailblazeToolResult +import xyz.block.trailblaze.yaml.TrailblazeYaml + +/** + * Covers [TextMatchMode] on [AssertVisibleBySelectorTrailblazeTool]: the mode-driven replay + * compare in `verifyTextEquality`, the legacy Maestro `textRegex` lowering, the capture-time + * volatile-token detector on [AssertVisibleTrailblazeTool], and back-compat deserialization. + */ +class AssertVisibleTextMatchModeTest { + + // region replay compare (verifyTextEquality, accessibility node-selector path) + + @Test + fun `EXACT passes only when live text equals expected verbatim`() = runBlocking { + val result = runReplay( + liveText = "Review sale\n3 items", + expectedText = "Review sale\n3 items", + mode = TextMatchMode.EXACT, + ) + assertTrue(result is TrailblazeToolResult.Success) + } + + @Test + fun `EXACT fails when live text differs from expected`() = runBlocking { + val result = runReplay( + liveText = "Review sale\n2 items", + expectedText = "Review sale\n3 items", + mode = TextMatchMode.EXACT, + ) + assertTrue(result is TrailblazeToolResult.Error) + } + + @Test + fun `PREFIX passes against live text with a differing volatile tail`() = runBlocking { + val three = runReplay( + liveText = "Review sale\n3 items", + expectedText = "Review sale", + mode = TextMatchMode.PREFIX, + ) + val two = runReplay( + liveText = "Review sale\n2 items", + expectedText = "Review sale", + mode = TextMatchMode.PREFIX, + ) + assertTrue(three is TrailblazeToolResult.Success, "3-item tail should pass under PREFIX") + assertTrue(two is TrailblazeToolResult.Success, "2-item tail should pass under PREFIX") + } + + @Test + fun `PREFIX fails when the stable head is not present`() = runBlocking { + val result = runReplay( + liveText = "Add items\n3 items", + expectedText = "Review sale", + mode = TextMatchMode.PREFIX, + ) + assertTrue(result is TrailblazeToolResult.Error) + } + + @Test + fun `REGEX matches a wildcarded count pattern`() = runBlocking { + val result = runReplay( + liveText = "Review sale\n7 items", + expectedText = "Review sale\\n\\d+ items", + mode = TextMatchMode.REGEX, + ) + assertTrue(result is TrailblazeToolResult.Success) + } + + @Test + fun `REGEX fails when the pattern does not match`() = runBlocking { + val result = runReplay( + liveText = "Review sale\nno items", + expectedText = "Review sale\\n\\d+ items", + mode = TextMatchMode.REGEX, + ) + assertTrue(result is TrailblazeToolResult.Error) + } + + @Test + fun `detector-produced pattern replays against a screen that dropped the count`() = runBlocking { + // End-to-end: a "Review sale\n3 items" capture is rewritten by the detector, then replayed + // against a screen showing just "Review sale" (the motivating absent-count failure). + val captured = VolatileTextDetector.resolve("Review sale\n3 items") + val result = runReplay( + liveText = "Review sale", + expectedText = captured.expectedText!!, + mode = captured.mode, + ) + assertTrue(result is TrailblazeToolResult.Success, "absent count must replay green") + } + + // endregion + + // region legacy Maestro textRegex lowering (toMaestroCommands) + + @Test + fun `EXACT lowers the full expectedText into the Maestro textRegex unchanged`() { + val regex = lowerMaestroTextRegex(expectedText = "Review sale\n3 items", mode = TextMatchMode.EXACT) + assertEquals("Review sale\n3 items", regex) + } + + @Test + fun `PREFIX lowers only the escaped stable head with a tolerant tail`() { + val regex = lowerMaestroTextRegex(expectedText = "Review sale", mode = TextMatchMode.PREFIX) + // The volatile tail must not be pinned; the head is escaped and any tail (incl. newline) is allowed. + assertTrue(Regex(regex).containsMatchIn("Review sale\n3 items"), "should match a 3-item tail") + assertTrue(Regex(regex).containsMatchIn("Review sale\n2 items"), "should match a 2-item tail") + assertTrue(regex.startsWith(Regex.escape("Review sale")), "head should be escaped verbatim") + } + + @Test + fun `REGEX forwards the expectedText through as the Maestro pattern`() { + val regex = lowerMaestroTextRegex(expectedText = "Review sale.*", mode = TextMatchMode.REGEX) + assertEquals("Review sale.*", regex) + } + + @Test + fun `REGEX with a malformed pattern lowers to a compilable Maestro textRegex`() { + // A bad pattern must not reach Maestro raw (it would throw at compile time). It is escaped + // to a literal so Maestro always gets something it can compile. + val regex = lowerMaestroTextRegex(expectedText = "Review [sale", mode = TextMatchMode.REGEX) + assertTrue(runCatching { Regex(regex) }.isSuccess, "lowered pattern must compile") + assertTrue(Regex(regex).containsMatchIn("Review [sale"), "literal must match its own text") + } + + // endregion + + // region capture detector + + @Test + fun `detector pins the head and tolerates the count changing or disappearing`() { + val resolved = VolatileTextDetector.resolve("Review sale\n3 items") + assertEquals(TextMatchMode.REGEX, resolved.mode) + val pattern = Regex(resolved.expectedText!!) + // The changing count is tolerated, and so is the count vanishing entirely (the motivating + // replay where "Review sale\n3 items" later renders as just "Review sale"). + assertTrue(pattern.matches("Review sale\n3 items")) + assertTrue(pattern.matches("Review sale\n2 items")) + assertTrue(pattern.matches("Review sale"), "count disappearing must still pass") + // ...but the stable head stays an exact requirement: unrelated tails must NOT pass. + assertTrue(!pattern.matches("Review sale\nInventory unavailable"), "unrelated tail must fail") + assertTrue(!pattern.matches("Add items\n3 items"), "different head must fail") + } + + @Test + fun `detector keeps a count-only label EXACT so a pinned count still catches a wrong count`() { + val resolved = VolatileTextDetector.resolve("1 item") + assertEquals("1 item", resolved.expectedText) + assertEquals(TextMatchMode.EXACT, resolved.mode) + } + + @Test + fun `detector leaves inline item-count copy EXACT (only a trailing subtitle is volatile)`() { + // The count is part of stable authored copy, not a trailing volatile subtitle, so it must + // stay an exact assertion rather than being relaxed. + for (stable in listOf("Buy 2 items get 1 free", "Minimum 3 items required", "3 items in cart")) { + val resolved = VolatileTextDetector.resolve(stable) + assertEquals(stable, resolved.expectedText, "should not rewrite: $stable") + assertEquals(TextMatchMode.EXACT, resolved.mode, "should stay EXACT: $stable") + } + } + + @Test + fun `detector leaves stable currency text as EXACT`() { + val resolved = VolatileTextDetector.resolve("Charge \$5.00") + assertEquals("Charge \$5.00", resolved.expectedText) + assertEquals(TextMatchMode.EXACT, resolved.mode) + } + + @Test + fun `detector leaves null as EXACT`() { + val resolved = VolatileTextDetector.resolve(null) + assertEquals(null, resolved.expectedText) + assertEquals(TextMatchMode.EXACT, resolved.mode) + } + + @Test + fun `capture forwards a volatile item count as a tolerant REGEX through the delegate`() { + val delegated = captureDelegate(expectedText = "Review sale\n3 items") + assertEquals(TextMatchMode.REGEX, delegated.textMatchMode) + val pattern = Regex(delegated.expectedText!!) + assertTrue(pattern.matches("Review sale\n2 items")) + assertTrue(pattern.matches("Review sale")) + assertTrue(!pattern.matches("Review sale\narchived")) + } + + @Test + fun `capture leaves stable text as EXACT through the delegate`() { + val delegated = captureDelegate(expectedText = "Charge \$5.00") + assertEquals("Charge \$5.00", delegated.expectedText) + assertEquals(TextMatchMode.EXACT, delegated.textMatchMode) + } + + // endregion + + // region malformed REGEX is a clean assertion failure, not an infra crash + + @Test + fun `malformed REGEX pattern fails the assertion instead of throwing`() = runBlocking { + val result = runReplay( + liveText = "Review sale", + expectedText = "Review [sale", // unbalanced bracket — invalid pattern + mode = TextMatchMode.REGEX, + ) + assertTrue(result is TrailblazeToolResult.Error, "invalid regex should surface as a failure") + } + + // endregion + + // region back-compat deserialization + + @Test + fun `tool deserialized from YAML lacking textMatchMode defaults to EXACT`() { + val yaml = TrailblazeYaml.Default.getInstance() + val decoded = yaml.decodeFromString( + AssertVisibleBySelectorTrailblazeTool.serializer(), + """ + |selector: + | textRegex: "Review sale" + |expectedText: "Review sale" + """.trimMargin(), + ) + assertEquals(TextMatchMode.EXACT, decoded.textMatchMode) + assertEquals("Review sale", decoded.expectedText) + } + + @Test + fun `recorded assertVisibleBySelector keeps an exact item-count assertion at replay`() = runBlocking { + // Recordings store the lowered assertVisibleBySelector (not the ref-based assertVisible), so + // the capture-time VolatileTextDetector never re-runs at replay. A trail that deliberately + // pins "Review sale\n3 items" must therefore still fail when the live count differs — the + // detector does not silently relax an already-recorded exact assertion. + val yaml = TrailblazeYaml.Default.getInstance() + val decoded = yaml.decodeFromString( + AssertVisibleBySelectorTrailblazeTool.serializer(), + """ + |expectedText: "Review sale\n3 items" + """.trimMargin(), + ) + assertEquals(TextMatchMode.EXACT, decoded.textMatchMode, "no rewrite happens on a recorded tool") + val result = runReplay( + liveText = "Review sale\n2 items", + expectedText = "Review sale\n3 items", + mode = decoded.textMatchMode, + ) + assertTrue(result is TrailblazeToolResult.Error, "a pinned exact count must still fail on a mismatch") + } + + // endregion + + // region helpers + + private suspend fun runReplay( + liveText: String, + expectedText: String, + mode: TextMatchMode, + ): TrailblazeToolResult { + val matchedNode = TrailblazeNode( + nodeId = 2, + ref = "y778", + bounds = TrailblazeNode.Bounds(100, 200, 300, 260), + driverDetail = DriverNodeDetail.AndroidAccessibility( + text = liveText, + resourceId = "review_sale_row", + ), + ) + val tree = TrailblazeNode( + nodeId = 1, + bounds = TrailblazeNode.Bounds(0, 0, 1000, 1000), + driverDetail = DriverNodeDetail.AndroidAccessibility(), + children = listOf(matchedNode), + ) + // Resolve by a stable resourceId so the post-pass re-resolution is independent of the + // volatile text under test — the mode-driven compare is what we're exercising. + val nodeSelector = TrailblazeNodeSelector.withMatch( + DriverNodeMatch.AndroidAccessibility(resourceIdRegex = "review_sale_row"), + ) + val tool = AssertVisibleBySelectorTrailblazeTool( + nodeSelector = nodeSelector, + expectedText = expectedText, + textMatchMode = mode, + ) + return tool.execute(replayContext(tree)) + } + + private fun lowerMaestroTextRegex(expectedText: String, mode: TextMatchMode): String { + val tool = AssertVisibleBySelectorTrailblazeTool( + selector = TrailblazeElementSelector(textRegex = "Review sale"), + expectedText = expectedText, + textMatchMode = mode, + ) + val commands: List = tool.toMaestroCommands(AgentMemory()) + val assertCommand = assertIs(commands.single()) + return assertCommand.condition.visible?.textRegex + ?: error("expected a visible textRegex on the lowered Maestro selector") + } + + private fun captureDelegate(expectedText: String): AssertVisibleBySelectorTrailblazeTool { + val tree = TrailblazeNode( + nodeId = 1, + bounds = TrailblazeNode.Bounds(0, 0, 1000, 1000), + driverDetail = DriverNodeDetail.AndroidAccessibility(), + children = listOf( + TrailblazeNode( + nodeId = 2, + ref = "y778", + bounds = TrailblazeNode.Bounds(100, 200, 300, 260), + driverDetail = DriverNodeDetail.AndroidAccessibility(text = expectedText), + ), + ), + ) + return assertIs( + AssertVisibleTrailblazeTool(ref = "y778", expectedText = expectedText) + .toExecutableTrailblazeTools(captureContext(tree)) + .single(), + ) + } + + private fun replayContext(tree: TrailblazeNode): TrailblazeToolExecutionContext { + val screen = object : ScreenState { + override val screenshotBytes: ByteArray? = null + override val deviceWidth: Int = 1000 + override val deviceHeight: Int = 1000 + override val viewHierarchy: ViewHierarchyTreeNode = ViewHierarchyTreeNode() + override val trailblazeDevicePlatform = TrailblazeDevicePlatform.ANDROID + override val deviceClassifiers: List = emptyList() + override val trailblazeNodeTree: TrailblazeNode = tree + } + val agent = AlwaysVisibleAgent() + return TrailblazeToolExecutionContext( + screenState = screen, + traceId = null, + trailblazeDeviceInfo = agent.trailblazeDeviceInfoProvider(), + sessionProvider = agent.sessionProvider, + trailblazeLogger = agent.trailblazeLogger, + memory = agent.memory, + maestroTrailblazeAgent = agent, + nodeSelectorMode = NodeSelectorMode.PREFER_NODE_SELECTOR, + ) + } + + private fun captureContext(tree: TrailblazeNode): TrailblazeToolExecutionContext { + val screen = object : ScreenState { + override val screenshotBytes: ByteArray? = null + override val deviceWidth: Int = 1000 + override val deviceHeight: Int = 1000 + override val viewHierarchy: ViewHierarchyTreeNode = ViewHierarchyTreeNode() + override val trailblazeDevicePlatform = TrailblazeDevicePlatform.ANDROID + override val deviceClassifiers: List = emptyList() + override val trailblazeNodeTree: TrailblazeNode = tree + } + return TrailblazeToolExecutionContext( + screenState = screen, + traceId = null, + trailblazeDeviceInfo = TrailblazeDeviceInfo( + trailblazeDeviceId = TrailblazeDeviceId( + instanceId = "t", + trailblazeDevicePlatform = TrailblazeDevicePlatform.ANDROID, + ), + trailblazeDriverType = TrailblazeDriverType.ANDROID_ONDEVICE_INSTRUMENTATION, + widthPixels = 1000, + heightPixels = 1000, + ), + sessionProvider = TrailblazeSessionProvider { + TrailblazeSession(sessionId = SessionId("t"), startTime = Clock.System.now()) + }, + trailblazeLogger = TrailblazeLogger.createNoOp(), + memory = AgentMemory(), + ) + } + + /** Reports the visibility check as passed so `execute()` reaches the text post-pass. */ + private class AlwaysVisibleAgent : MaestroTrailblazeAgent( + trailblazeLogger = TrailblazeLogger.createNoOp(), + trailblazeDeviceInfoProvider = { + TrailblazeDeviceInfo( + trailblazeDeviceId = TrailblazeDeviceId( + instanceId = "test-instance", + trailblazeDevicePlatform = TrailblazeDevicePlatform.ANDROID, + ), + trailblazeDriverType = TrailblazeDriverType.ANDROID_ONDEVICE_INSTRUMENTATION, + widthPixels = 1080, + heightPixels = 1920, + ) + }, + sessionProvider = TrailblazeSessionProvider { + TrailblazeSession(sessionId = SessionId("test-session"), startTime = Clock.System.now()) + }, + ) { + override suspend fun executeNodeSelectorAssertVisible( + nodeSelector: TrailblazeNodeSelector, + timeoutMs: Long?, + traceId: TraceId?, + ): TrailblazeToolResult = TrailblazeToolResult.Success() + + override suspend fun executeMaestroCommands( + commands: List, + traceId: TraceId?, + ): TrailblazeToolResult = TrailblazeToolResult.Success() + } + + // endregion +} diff --git a/trailblaze-common/src/jvmAndAndroidTest/kotlin/xyz/block/trailblaze/toolcalls/commands/AssertVisibleTrailblazeToolTest.kt b/trailblaze-common/src/jvmAndAndroidTest/kotlin/xyz/block/trailblaze/toolcalls/commands/AssertVisibleTrailblazeToolTest.kt index b41cae15..36e698a2 100644 --- a/trailblaze-common/src/jvmAndAndroidTest/kotlin/xyz/block/trailblaze/toolcalls/commands/AssertVisibleTrailblazeToolTest.kt +++ b/trailblaze-common/src/jvmAndAndroidTest/kotlin/xyz/block/trailblaze/toolcalls/commands/AssertVisibleTrailblazeToolTest.kt @@ -136,6 +136,81 @@ class AssertVisibleTrailblazeToolTest { assertNotNull(delegated.nodeSelector) } + @Test + fun `expectedText propagates through to AssertVisibleBySelector on the maestro path`() { + val trailblazeTree = TrailblazeNode( + nodeId = 1, + bounds = TrailblazeNode.Bounds(0, 0, 1000, 1000), + driverDetail = DriverNodeDetail.AndroidMaestro(), + children = listOf( + TrailblazeNode( + nodeId = 2, + ref = "y778", + bounds = TrailblazeNode.Bounds(100, 200, 300, 260), + driverDetail = DriverNodeDetail.AndroidMaestro(text = "Charge \$5.00"), + ), + ), + ) + val viewHierarchy = ViewHierarchyTreeNode( + nodeId = 1, + centerPoint = "500,500", + dimensions = "1000x1000", + children = listOf( + ViewHierarchyTreeNode( + nodeId = 2, + text = "Charge \$5.00", + centerPoint = "200,230", + dimensions = "200x60", + ), + ), + ) + val context = contextWithTree( + trailblazeNodeTree = trailblazeTree, + viewHierarchy = viewHierarchy, + ) + + val delegated = assertIs( + AssertVisibleTrailblazeTool( + ref = "y778", + expectedText = "Charge \$5.00", + reasoning = "verify the checkout total", + ).toExecutableTrailblazeTools(context).single(), + ) + assertEquals("Charge \$5.00", delegated.expectedText) + // The visibility-only fields still propagate as before so non-expectedText callers + // are unaffected. + assertEquals("verify the checkout total", delegated.reason) + assertNotNull(delegated.nodeSelector) + } + + @Test + fun `expectedText propagates through on the accessibility path with nodeSelector-only`() { + val trailblazeTree = TrailblazeNode( + nodeId = 1, + bounds = TrailblazeNode.Bounds(0, 0, 1000, 1000), + driverDetail = DriverNodeDetail.AndroidAccessibility(), + children = listOf( + TrailblazeNode( + nodeId = 2, + ref = "y778", + bounds = TrailblazeNode.Bounds(100, 200, 300, 260), + driverDetail = DriverNodeDetail.AndroidAccessibility(text = "Active"), + ), + ), + ) + val context = contextWithTree(trailblazeNodeTree = trailblazeTree) + + val delegated = assertIs( + AssertVisibleTrailblazeTool( + ref = "y778", + expectedText = "Active", + ).toExecutableTrailblazeTools(context).single(), + ) + assertEquals("Active", delegated.expectedText) + assertEquals(null, delegated.selector) + assertNotNull(delegated.nodeSelector) + } + @Test fun `accessibility driver emits nodeSelector-only with no legacy selector`() { val trailblazeTree = TrailblazeNode( diff --git a/trailblaze-common/src/jvmAndAndroidTest/kotlin/xyz/block/trailblaze/toolcalls/commands/ClearTextTrailblazeToolTest.kt b/trailblaze-common/src/jvmAndAndroidTest/kotlin/xyz/block/trailblaze/toolcalls/commands/ClearTextTrailblazeToolTest.kt new file mode 100644 index 00000000..9708b174 --- /dev/null +++ b/trailblaze-common/src/jvmAndAndroidTest/kotlin/xyz/block/trailblaze/toolcalls/commands/ClearTextTrailblazeToolTest.kt @@ -0,0 +1,80 @@ +package xyz.block.trailblaze.toolcalls.commands + +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertNull +import xyz.block.trailblaze.api.ViewHierarchyTreeNode + +/** + * Locks the contract that `clearText` extracts the focused field's text length from the + * view hierarchy snapshot, then passes that exact count to `eraseText`. This is the bit that + * matters — every driver's `eraseText` cap behavior already has its own tests; the new + * behavior here is "we read the right length first." + */ +class ClearTextTrailblazeToolTest { + + @Test + fun `picks length of focused editable node with text`() { + val root = container( + ViewHierarchyTreeNode( + focused = true, + text = "hello world", + ), + ) + assertEquals(11, ClearTextTrailblazeTool.focusedEditableTextLength(root)) + } + + @Test + fun `returns null when no node is focused`() { + val root = container( + ViewHierarchyTreeNode(focused = false, text = "not the one"), + ViewHierarchyTreeNode(focused = false, text = "also not"), + ) + assertNull(ClearTextTrailblazeTool.focusedEditableTextLength(root)) + } + + @Test + fun `returns null when focused node has no text - drops to fallback at the call site`() { + val root = container( + ViewHierarchyTreeNode(focused = true, text = null), + ) + assertNull(ClearTextTrailblazeTool.focusedEditableTextLength(root)) + } + + @Test + fun `prefers innermost focused node over a focused ancestor`() { + // Compose merged-semantics trees can mark both the EditText and its wrapping row as + // focused. The innermost (last visited in depth-first order) is the actual input. + val root = ViewHierarchyTreeNode( + focused = true, + text = "wrapper container text we should ignore", + children = listOf( + ViewHierarchyTreeNode( + focused = true, + text = "actual field value", + ), + ), + ) + assertEquals(18, ClearTextTrailblazeTool.focusedEditableTextLength(root)) + } + + @Test + fun `handles long content past Maestro's 50-char cap`() { + val longValue = "x".repeat(327) + val root = container( + ViewHierarchyTreeNode(focused = true, text = longValue), + ) + // The whole point of the tool: report the real length, not a clamped 50. + assertEquals(327, ClearTextTrailblazeTool.focusedEditableTextLength(root)) + } + + @Test + fun `fallback constant covers any realistic single-field content but stays bounded`() { + // Sanity-checks the fallback so a future refactor doesn't silently raise it into + // catastrophic-loop territory on the instrumentation driver. + assertEquals(500, ClearTextTrailblazeTool.FALLBACK_ERASE_COUNT) + } + + private fun container(vararg children: ViewHierarchyTreeNode): ViewHierarchyTreeNode = + ViewHierarchyTreeNode(focused = false, children = children.toList()) +} diff --git a/trailblaze-common/src/jvmMain/kotlin/xyz/block/trailblaze/util/AndroidHostAdbUtils.kt b/trailblaze-common/src/jvmMain/kotlin/xyz/block/trailblaze/util/AndroidHostAdbUtils.kt index f49c1794..e3290db0 100644 --- a/trailblaze-common/src/jvmMain/kotlin/xyz/block/trailblaze/util/AndroidHostAdbUtils.kt +++ b/trailblaze-common/src/jvmMain/kotlin/xyz/block/trailblaze/util/AndroidHostAdbUtils.kt @@ -40,6 +40,14 @@ object AndroidHostAdbUtils { Console.log("[AndroidHostAdbUtils] short-call timeout overridden via TRAILBLAZE_ADB_TIMEOUT_MS=$it") } ?: 10_000L + // LLM provider tokens reach the device as `trailblaze.llm.auth.token.` args; redact + // their values so CI shell-command log artifacts don't leak live credentials. + private val AUTH_TOKEN_ARG_REGEX = + Regex("""('?trailblaze\.llm\.auth\.token\.[^'\s]+'?\s+)((?:'[^']*'|\\'|[^\s'])+)""") + + internal fun redactSecretsForLog(command: String): String = + AUTH_TOKEN_ARG_REGEX.replace(command) { "${it.groupValues[1]}" } + // Drains the Dadb client cache and any active port forwards on JVM exit. Without this, a // long-running daemon (`./trailblaze app start`) leaves dadb sockets and host-side forward // listeners open until the OS reclaims them on process death. The legacy ProcessBuilder model @@ -445,7 +453,7 @@ object AndroidHostAdbUtils { */ fun execAdbShellCommand(deviceId: TrailblazeDeviceId, args: List): String { val command = args.joinToString(" ") - Console.log("adb shell $command") + Console.log("adb shell ${redactSecretsForLog(command)}") return withDadb(deviceId) { dadb -> val response = dadb.shell(command) if (response.errorOutput.isEmpty()) response.output else response.allOutput @@ -465,7 +473,8 @@ object AndroidHostAdbUtils { timeoutMs: Long = DEFAULT_SHORT_CALL_TIMEOUT_MS, ): String? { val command = args.joinToString(" ") - Console.log("adb shell ($timeoutMs ms timeout) $command") + val redactedCommand = redactSecretsForLog(command) + Console.log("adb shell ($timeoutMs ms timeout) $redactedCommand") val resultRef = AtomicReference() val errorRef = AtomicReference() val worker = thread(name = "dadb-shell-timed", isDaemon = true) { @@ -495,14 +504,14 @@ object AndroidHostAdbUtils { // — the contract is "data unavailable" — but the diagnostic is now accurate. Console.log( "[AndroidHostAdbUtils] adb shell failed (${error.javaClass.simpleName}: " + - "${error.message}) — command: $command", + "${error.message}) — command: $redactedCommand", ) return null } if (worker.isAlive) { Console.log( "[AndroidHostAdbUtils] adb shell timed out after ${timeoutMs}ms — " + - "evicting dadb client: $command", + "evicting dadb client: $redactedCommand", ) dadbClients.remove(deviceId.instanceId)?.let { runCatching { it.close() } } worker.interrupt() @@ -555,7 +564,7 @@ object AndroidHostAdbUtils { onLine: (String) -> Unit, onExit: ((exitCode: Int) -> Unit)? = null, ): AutoCloseable { - Console.log("adb shell (streaming) $command") + Console.log("adb shell (streaming) ${redactSecretsForLog(command)}") val stream = withDadb(deviceId) { it.openShell(command) } val decoder = StreamingLineDecoder(onLine) val readerThread = thread(name = "dadb-shell-stream", isDaemon = true) { @@ -649,7 +658,7 @@ object AndroidHostAdbUtils { command: String, outputFile: File, ): AutoCloseable { - Console.log("adb shell (streaming -> ${outputFile.name}) $command") + Console.log("adb shell (streaming -> ${outputFile.name}) ${redactSecretsForLog(command)}") val stream = withDadb(deviceId) { it.openShell(command) } // Open the file *after* the stream and close the stream if we can't open the file — // otherwise an open shell stream leaks any time the file system rejects us diff --git a/trailblaze-common/src/jvmMain/kotlin/xyz/block/trailblaze/util/PrecompiledApkInstaller.kt b/trailblaze-common/src/jvmMain/kotlin/xyz/block/trailblaze/util/PrecompiledApkInstaller.kt index 3172dd33..939b9902 100644 --- a/trailblaze-common/src/jvmMain/kotlin/xyz/block/trailblaze/util/PrecompiledApkInstaller.kt +++ b/trailblaze-common/src/jvmMain/kotlin/xyz/block/trailblaze/util/PrecompiledApkInstaller.kt @@ -135,13 +135,35 @@ object PrecompiledApkInstaller { null } + /** + * Builds the marker-write command. [AndroidHostAdbUtils.execAdbShellCommand] space-joins argv + * into a single device-shell command, so the prior `sh -c " '…' > path"` wrapper re-split + * into a bare `sh -c ` — the SHA argument was dropped, a blank line was written, and reuse + * was silently defeated on every run. A direct `printf %s > path` survives the join (the + * SHA is hex, nothing shell-special) and writes the value verbatim. + */ + internal fun shaMarkerWriteArgs(sha: String): List = + listOf("printf", "%s", sha, ">", DEVICE_SHA_MARKER_PATH) + private fun writeShaMarkerToDevice(trailblazeDeviceId: TrailblazeDeviceId) { val sha = bundledApkSha ?: return try { AndroidHostAdbUtils.execAdbShellCommand( deviceId = trailblazeDeviceId, - args = listOf("sh", "-c", "echo '$sha' > $DEVICE_SHA_MARKER_PATH"), + args = shaMarkerWriteArgs(sha), ) + // Read back so a silent write failure (how this regressed) is visible rather than + // quietly reinstalling every run. + val readBack = AndroidHostAdbUtils.execAdbShellCommand( + deviceId = trailblazeDeviceId, + args = listOf("cat", DEVICE_SHA_MARKER_PATH), + ).trim() + if (readBack != sha) { + Console.log( + "APK SHA marker did not persist (read back '$readBack', expected '$sha') — the " + + "on-device server will be reinstalled every run.", + ) + } } catch (e: Exception) { Console.log("Failed to write APK SHA marker to device: ${e.message}") } diff --git a/trailblaze-common/src/jvmTest/kotlin/xyz/block/trailblaze/util/AndroidHostAdbUtilsTest.kt b/trailblaze-common/src/jvmTest/kotlin/xyz/block/trailblaze/util/AndroidHostAdbUtilsTest.kt index 09c9f0a0..1202e619 100644 --- a/trailblaze-common/src/jvmTest/kotlin/xyz/block/trailblaze/util/AndroidHostAdbUtilsTest.kt +++ b/trailblaze-common/src/jvmTest/kotlin/xyz/block/trailblaze/util/AndroidHostAdbUtilsTest.kt @@ -400,6 +400,46 @@ class AndroidHostAdbUtilsTest { ) } + // ── redactSecretsForLog ────────────────────────────────────────────────── + // LLM provider tokens passed as `trailblaze.llm.auth.token.` args must never reach + // the logged command — CI archives shell-command logs as downloadable build artifacts. + + @Test + fun authTokenArgValuesAreRedactedInLoggedCommand() { + val command = + "am instrument -e 'trailblaze.reverseProxy' 'true' " + + "-e 'trailblaze.llm.auth.token.acme' 'SECRET_A' " + + "-e 'trailblaze.llm.auth.token.openai' 'SECRET_B' app/Runner" + assertThat(AndroidHostAdbUtils.redactSecretsForLog(command)).isEqualTo( + "am instrument -e 'trailblaze.reverseProxy' 'true' " + + "-e 'trailblaze.llm.auth.token.acme' " + + "-e 'trailblaze.llm.auth.token.openai' app/Runner", + ) + } + + @Test + fun authTokenArgValueIsRedactedWhenUnquoted() { + val command = "am instrument -e trailblaze.llm.auth.token.openai SECRET_B app/Runner" + assertThat(AndroidHostAdbUtils.redactSecretsForLog(command)).isEqualTo( + "am instrument -e trailblaze.llm.auth.token.openai app/Runner", + ) + } + + @Test + fun commandsWithoutAuthTokenArgsArePassedThroughUnchanged() { + val command = "getprop ro.build.version.sdk" + assertThat(AndroidHostAdbUtils.redactSecretsForLog(command)).isEqualTo(command) + } + + @Test + fun authTokenArgWithEmbeddedSingleQuoteIsFullyRedacted() { + // shellEscape of a token containing a single quote: abc'def -> 'abc'\''def'. + val command = "am instrument -e 'trailblaze.llm.auth.token.openai' 'abc'\\''def' app/Runner" + assertThat(AndroidHostAdbUtils.redactSecretsForLog(command)).isEqualTo( + "am instrument -e 'trailblaze.llm.auth.token.openai' app/Runner", + ) + } + // ── helpers ────────────────────────────────────────────────────────────── /** Tiny env-var fixture so callers can write `env("KEY" to "value", ...)`. */ diff --git a/trailblaze-common/src/jvmTest/kotlin/xyz/block/trailblaze/util/PrecompiledApkInstallerTest.kt b/trailblaze-common/src/jvmTest/kotlin/xyz/block/trailblaze/util/PrecompiledApkInstallerTest.kt new file mode 100644 index 00000000..72c4d347 --- /dev/null +++ b/trailblaze-common/src/jvmTest/kotlin/xyz/block/trailblaze/util/PrecompiledApkInstallerTest.kt @@ -0,0 +1,26 @@ +package xyz.block.trailblaze.util + +import assertk.assertThat +import assertk.assertions.isEqualTo +import org.junit.Test + +/** + * Guards the on-device APK-SHA marker write against the quoting regression that disabled server + * reuse. [AndroidHostAdbUtils.execAdbShellCommand] space-joins argv into a single device-shell + * command, so the prior `sh -c " '…' > path"` wrapper re-split into a bare `sh -c ` — + * the SHA argument was dropped, a blank line was written, and the empty marker made + * `isInstalledApkUpToDate` always false, forcing a reinstall + relaunch on every run. + */ +class PrecompiledApkInstallerTest { + + @Test + fun `marker write prints the sha directly so it survives the argv space-join`() { + assertThat(PrecompiledApkInstaller.shaMarkerWriteArgs("deadbeefcafe").joinToString(" ")) + .isEqualTo("printf %s deadbeefcafe > /data/local/tmp/trailblaze-runner-sha.txt") + } + + @Test + fun `marker write does not wrap in sh -c (the shape that silently wrote a blank marker)`() { + assertThat(PrecompiledApkInstaller.shaMarkerWriteArgs("abc123").first()).isEqualTo("printf") + } +} diff --git a/trailblaze-host/src/main/java/xyz/block/trailblaze/cli/McpProxy.kt b/trailblaze-host/src/main/java/xyz/block/trailblaze/cli/McpProxy.kt index e3876dfe..1acb28d3 100644 --- a/trailblaze-host/src/main/java/xyz/block/trailblaze/cli/McpProxy.kt +++ b/trailblaze-host/src/main/java/xyz/block/trailblaze/cli/McpProxy.kt @@ -40,6 +40,7 @@ import xyz.block.trailblaze.devices.TrailblazeDevicePlatform import xyz.block.trailblaze.devices.TrailblazeDevicePort import xyz.block.trailblaze.devices.WebInstanceIds import xyz.block.trailblaze.mcp.McpToolProfile +import xyz.block.trailblaze.ui.TrailblazeDesktopUtil /** * Lightweight STDIO-to-HTTP proxy for MCP. @@ -160,8 +161,9 @@ class McpProxy( val stdoutForTransport = System.out val origStderr = System.err - // File-based logging to ~/.trailblaze/mcp.log (stderr may be swallowed by MCP clients) - val logDir = File(System.getProperty("user.home"), ".trailblaze") + // File-based logging to the app data dir's mcp.log (stderr may be swallowed by MCP clients). + // Goes through getDefaultAppDataDirectory() so TRAILBLAZE_HOME isolation is honored. + val logDir = TrailblazeDesktopUtil.getDefaultAppDataDirectory() logDir.mkdirs() val logFile = File(logDir, "mcp.log") val logWriter = PrintWriter(java.io.FileOutputStream(logFile, true), true) diff --git a/trailblaze-host/src/main/java/xyz/block/trailblaze/host/recording/OnDeviceRpcDeviceScreenStream.kt b/trailblaze-host/src/main/java/xyz/block/trailblaze/host/recording/OnDeviceRpcDeviceScreenStream.kt index 53ed5e7e..0dc5d37c 100644 --- a/trailblaze-host/src/main/java/xyz/block/trailblaze/host/recording/OnDeviceRpcDeviceScreenStream.kt +++ b/trailblaze-host/src/main/java/xyz/block/trailblaze/host/recording/OnDeviceRpcDeviceScreenStream.kt @@ -150,11 +150,13 @@ class OnDeviceRpcDeviceScreenStream( override suspend fun inputText(text: String) { // `hideKeyboardAfter = false` — this stream's `inputText` serves the wasm `/devices` // viewer's *live typing* path. The default tool behavior (hide the soft keyboard after - // typing) is correct for batch trail runs but wrong here: with the soft keyboard - // suppressed by [AndroidSoftKeyboardSuppressor], `HideKeyboardCommand` on Android falls - // through to a `BACK` keycode that navigates the current activity away. Sam's repro on - // PR #3021: typing "sam" navigated the device back. Recorded trail YAMLs continue to - // omit the flag, so replay still dismisses the keyboard between steps as expected. + // typing) is correct for batch trail runs but wrong here: on the accessibility driver + // the daemon's `inputText` routes through `ACTION_SET_TEXT` directly on the focused + // node, which sidesteps the soft IME entirely on the happy path. With no soft IME + // window up, `HideKeyboardCommand` falls through to a `BACK` keycode that navigates + // the current activity away. Sam's repro on PR #3021: typing "sam" navigated the + // device back. Recorded trail YAMLs continue to omit the flag, so replay still + // dismisses the keyboard between steps as expected. dispatchTool(InputTextTrailblazeTool(text = text, hideKeyboardAfter = false)) } diff --git a/trailblaze-host/src/main/java/xyz/block/trailblaze/scripting/DaemonScriptedToolBundler.kt b/trailblaze-host/src/main/java/xyz/block/trailblaze/scripting/DaemonScriptedToolBundler.kt index 86c54073..0d284383 100644 --- a/trailblaze-host/src/main/java/xyz/block/trailblaze/scripting/DaemonScriptedToolBundler.kt +++ b/trailblaze-host/src/main/java/xyz/block/trailblaze/scripting/DaemonScriptedToolBundler.kt @@ -416,11 +416,20 @@ class DaemonScriptedToolBundler( // re-triggering the sweep on every subsequent first-bundle. Individual delete // failures are already swallowed by the inner `runCatching` so wrapping the // outer block is purely defensive against `listFiles` itself throwing. + // Cross-JVM guard for the multi-daemon case: the per-JVM `sweptParentDirs` dedup only + // serializes sweeps WITHIN one daemon. When several daemons (separate JVMs) bundle in the + // same source dir — e.g. one daemon per parallel CI copy — each JVM still sweeps once, and + // those sweeps race across processes. So only delete wrappers OLDER than the esbuild + // timeout: a peer's live wrapper is at most ~2 min old; a crash-leaked one is far older. + val staleWrapperFloorMs = System.currentTimeMillis() - STALE_WRAPPER_MIN_AGE_MS scriptPath.parentFile?.canonicalFile?.let { parentDir -> sweptParentDirs.computeIfAbsent(parentDir) { runCatching { val swept = parentDir - .listFiles { f -> f.name.startsWith(WRAPPER_FILENAME_PREFIX) && f.name.endsWith(".ts") } + .listFiles { f -> + f.name.startsWith(WRAPPER_FILENAME_PREFIX) && f.name.endsWith(".ts") && + f.lastModified() < staleWrapperFloorMs + } ?.filter { runCatching { it.delete() }.getOrDefault(false) } ?.size ?: 0 @@ -819,6 +828,13 @@ class DaemonScriptedToolBundler( */ internal const val WRAPPER_FILENAME_PREFIX: String = ".trailblaze-wrapper-" + /** + * Minimum age before the stale-wrapper sweep deletes a `.trailblaze-wrapper-*.ts`. Must + * exceed the esbuild bundle timeout (2 min, see [runEsbuild]) so that when several daemons + * (separate JVMs) bundle in one source dir, no daemon sweeps a live peer's wrapper. + */ + private val STALE_WRAPPER_MIN_AGE_MS: Long = TimeUnit.MINUTES.toMillis(10) + /** * Map of script-parent directories this JVM has already swept for stale wrapper * files. Used to ensure the sweep at the top of [bundleOneInternal] runs **once diff --git a/trailblaze-host/src/main/java/xyz/block/trailblaze/scripting/LazyYamlScriptedToolRegistration.kt b/trailblaze-host/src/main/java/xyz/block/trailblaze/scripting/LazyYamlScriptedToolRegistration.kt index 89b866e6..cf57ad5c 100644 --- a/trailblaze-host/src/main/java/xyz/block/trailblaze/scripting/LazyYamlScriptedToolRegistration.kt +++ b/trailblaze-host/src/main/java/xyz/block/trailblaze/scripting/LazyYamlScriptedToolRegistration.kt @@ -58,18 +58,22 @@ class LazyYamlScriptedToolRegistration private constructor( // back to String rather than crashing session startup. Mirrors the leniency applied // by [BundleToolRegistration.buildKoogTool] for the on-device QuickJS path. val descriptor = trailblazeDescriptor.toKoogToolDescriptor(strict = false) - val serializer = QuickJsToolSerializer(name, host) + // Pass the binding to the serializer so the decoded QuickJsTrailblazeTool carries it into + // execute(). This fixes the LLM dispatch path (koogTool.decodeArgs → QuickJsTrailblazeTool → + // agent.runTrailblazeTools) where the tool was previously executed without a + // ContextSettingScriptedTool wrapper and thus never set binding.activeContext — causing the + // QuickJS asyncFunction callback (Dispatchers.Default) to find null context and return + // "no execution context installed" for every nested client.callTool() call. + val serializer = QuickJsToolSerializer(name, host, binding) return TrailblazeKoogTool( argsSerializer = serializer, descriptor = descriptor, executeTool = { args: QuickJsTrailblazeTool -> val context = trailblazeToolContextProvider() - // Set the context directly on the binding for the duration of this dispatch so - // any `client.callTool(...)` from inside the bundled handler can resolve through - // the binding — see `SessionScopedHostBinding.activeContext` for why we bypass - // the ThreadLocal here (QuickJS's async-binding scope doesn't inherit - // `asContextElement` propagation, so the ThreadLocal is unreliable in that - // callback). + // args already carries binding (via QuickJsToolSerializer); args.execute(context) will + // set binding.activeContext internally. The explicit set/clear below is a belt-and- + // suspenders guard for any future caller that reaches this lambda directly (e.g. koog + // itself executing the tool rather than going through agent.runTrailblazeTools). binding.activeContext = context val result = try { args.execute(context) diff --git a/trailblaze-host/src/main/java/xyz/block/trailblaze/ui/MainTrailblazeApp.kt b/trailblaze-host/src/main/java/xyz/block/trailblaze/ui/MainTrailblazeApp.kt index 7918d92b..807f2cfd 100644 --- a/trailblaze-host/src/main/java/xyz/block/trailblaze/ui/MainTrailblazeApp.kt +++ b/trailblaze-host/src/main/java/xyz/block/trailblaze/ui/MainTrailblazeApp.kt @@ -108,6 +108,13 @@ class MainTrailblazeApp( * The window can be shown later via the tray menu. */ headless: Boolean = false, + /** + * Optional extra Ktor routes registered on the daemon alongside the built-in + * ones. Lets a downstream desktop edition add its own daemon endpoints without + * trailblaze-host — the lower layer — having to depend on them. OSS callers + * leave this null. + */ + extraDaemonRoutes: (io.ktor.server.routing.Routing.() -> Unit)? = null, ) { TrailblazeDesktopUtil.setAppConfigForTrailblaze() @@ -142,7 +149,10 @@ class MainTrailblazeApp( port = portManager.httpPort, httpsPort = portManager.httpsPort, wait = true, - additionalRouteRegistration = allRouteRegistrations(trailblazeSavedSettingsRepo, deviceManager, hostDeviceSessionManager), + additionalRouteRegistration = { + allRouteRegistrations(trailblazeSavedSettingsRepo, deviceManager, hostDeviceSessionManager).invoke(this) + extraDaemonRoutes?.invoke(this) + }, ) return } @@ -161,7 +171,10 @@ class MainTrailblazeApp( port = portManager.httpPort, httpsPort = portManager.httpsPort, wait = false, - additionalRouteRegistration = allRouteRegistrations(trailblazeSavedSettingsRepo, deviceManager, hostDeviceSessionManager), + additionalRouteRegistration = { + allRouteRegistrations(trailblazeSavedSettingsRepo, deviceManager, hostDeviceSessionManager).invoke(this) + extraDaemonRoutes?.invoke(this) + }, ) } diff --git a/trailblaze-host/src/main/java/xyz/block/trailblaze/ui/TrailblazeDesktopUtil.kt b/trailblaze-host/src/main/java/xyz/block/trailblaze/ui/TrailblazeDesktopUtil.kt index b7768b93..75e3dd14 100644 --- a/trailblaze-host/src/main/java/xyz/block/trailblaze/ui/TrailblazeDesktopUtil.kt +++ b/trailblaze-host/src/main/java/xyz/block/trailblaze/ui/TrailblazeDesktopUtil.kt @@ -108,6 +108,9 @@ object TrailblazeDesktopUtil { * @return The default app data directory: ~/.trailblaze */ fun getDefaultAppDataDirectory(): File { + // TRAILBLAZE_HOME lets multiple daemons on one host isolate their state dir (logs, TLS + // keystore); without it concurrent daemons race the shared keystore and one dies on boot. + System.getenv("TRAILBLAZE_HOME")?.takeIf { it.isNotBlank() }?.let { return File(it) } return File(System.getProperty("user.home"), DOT_TRAILBLAZE_DIR_NAME) } diff --git a/trailblaze-host/src/main/java/xyz/block/trailblaze/ui/TrailblazeDeviceManager.kt b/trailblaze-host/src/main/java/xyz/block/trailblaze/ui/TrailblazeDeviceManager.kt index e18e9d3b..77e0f610 100644 --- a/trailblaze-host/src/main/java/xyz/block/trailblaze/ui/TrailblazeDeviceManager.kt +++ b/trailblaze-host/src/main/java/xyz/block/trailblaze/ui/TrailblazeDeviceManager.kt @@ -267,6 +267,17 @@ class TrailblazeDeviceManager( referrer: TrailblazeReferrer, agentImplementation: AgentImplementation = AgentImplementation.DEFAULT, traceId: TraceId? = null, + // Optional per-run overrides (Run-config dialog). All default to the + // prior in-app behavior, so existing callers are unaffected: + // selfHeal=null → keep TrailblazeConfig's default; useRecordedSteps=false → + // LLM-drives (unchanged); maxLlmCalls=null → runner default; memory seeds empty. + selfHeal: Boolean? = null, + useRecordedSteps: Boolean = false, + maxLlmCalls: Int? = null, + initialMemorySeeds: Map = emptyMap(), + initialMemorySensitiveSeeds: Map = emptyMap(), + // Per-run override (Run-config dialog); null = appConfig default. + captureNetworkTrafficOverride: Boolean? = null, onComplete: ((TrailExecutionResult) -> Unit)? = null, ) { val settingsState = settingsRepo.serverStateFlow.value @@ -284,28 +295,34 @@ class TrailblazeDeviceManager( } catch (_: Exception) { null } + val baseConfig = TrailblazeConfig( + overrideSessionId = existingSessionId, + sendSessionStartLog = sendSessionStartLog, + sendSessionEndLog = sendSessionEndLog, + browserHeadless = !settingsState.appConfig.showWebBrowser, + preferHostAgent = settingsState.appConfig.preferHostAgent, + captureNetworkTraffic = captureNetworkTrafficOverride ?: settingsState.appConfig.captureNetworkTraffic, + ) val runYamlRequest = RunYamlRequest( yaml = yamlToRun, // Use title with ID appended for method name (e.g., for_your_business_page_5374142) // The class name will be auto-derived from testSectionName metadata testName = "test", - useRecordedSteps = false, + useRecordedSteps = useRecordedSteps, trailblazeLlmModel = currentTrailblazeLlmModelProvider(), targetAppName = settingsState.appConfig.selectedTargetAppId, trailFilePath = null, - config = TrailblazeConfig( - overrideSessionId = existingSessionId, - sendSessionStartLog = sendSessionStartLog, - sendSessionEndLog = sendSessionEndLog, - browserHeadless = !settingsState.appConfig.showWebBrowser, - preferHostAgent = settingsState.appConfig.preferHostAgent, - captureNetworkTraffic = settingsState.appConfig.captureNetworkTraffic, - ), + // Only override selfHeal when the caller set it; otherwise keep the config default. + config = if (selfHeal != null) baseConfig.copy(selfHeal = selfHeal) else baseConfig, trailblazeDeviceId = trailblazeDeviceId, driverType = trailConfigDriver, referrer = referrer, agentImplementation = agentImplementation, traceId = traceId, + // RunYamlRequest rejects maxLlmCalls with MULTI_AGENT_V3 and non-positive values. + maxLlmCalls = maxLlmCalls?.takeIf { it > 0 && agentImplementation != AgentImplementation.MULTI_AGENT_V3 }, + initialMemorySeeds = initialMemorySeeds, + initialMemorySensitiveSeeds = initialMemorySensitiveSeeds, ) val params = DesktopAppRunYamlParams( forceStopTargetApp = forceStopTargetApp, @@ -343,7 +360,12 @@ class TrailblazeDeviceManager( trailblazeDeviceId: TrailblazeDeviceId, forceNewSession: Boolean = false, sessionIdPrefix: String = "session", - deviceSummary: TrailblazeConnectedDeviceSummary? = null + deviceSummary: TrailblazeConnectedDeviceSummary? = null, + // Optional per-run capture overrides (Run-config dialog). null = fall back to + // the daemon's appConfig default, so existing callers are unaffected. + captureVideoOverride: Boolean? = null, + captureLogcatOverride: Boolean? = null, + captureIosLogsOverride: Boolean? = null, ): DeviceSessionResolution { // Two-phase to keep `startForSession` (which can block on adb/ffmpeg/xcrun // startup) OUTSIDE `sessionCreationLock` — otherwise every concurrent session @@ -375,9 +397,9 @@ class TrailblazeDeviceManager( // coordinator's reservation pattern makes that second call a no-op so the // appConfig-derived options here are what win for MCP-only paths. captureOptions = CaptureOptions( - captureVideo = true, - captureLogcat = appConfig.captureLogcat, - captureIosLogs = appConfig.captureIosLogs, + captureVideo = captureVideoOverride ?: true, + captureLogcat = captureLogcatOverride ?: appConfig.captureLogcat, + captureIosLogs = captureIosLogsOverride ?: appConfig.captureIosLogs, spriteFrameFps = 2, spriteFrameHeight = 720, spriteQuality = 80, diff --git a/trailblaze-host/src/test/java/xyz/block/trailblaze/scripting/DaemonScriptedToolBundlerTest.kt b/trailblaze-host/src/test/java/xyz/block/trailblaze/scripting/DaemonScriptedToolBundlerTest.kt index ae112cdd..268e6fa8 100644 --- a/trailblaze-host/src/test/java/xyz/block/trailblaze/scripting/DaemonScriptedToolBundlerTest.kt +++ b/trailblaze-host/src/test/java/xyz/block/trailblaze/scripting/DaemonScriptedToolBundlerTest.kt @@ -594,6 +594,9 @@ class DaemonScriptedToolBundlerTest { // have left behind. val staleWrapper = File(src.parentFile, "${DaemonScriptedToolBundler.WRAPPER_FILENAME_PREFIX}stale1234.ts") staleWrapper.writeText("// pretend this is a leftover from a SIGKILLed daemon") + // Age it past the sweep's min-age floor so it reads as a genuine crash leftover. The sweep + // deliberately spares recent wrappers (a concurrent daemon's live esbuild entry point). + staleWrapper.setLastModified(System.currentTimeMillis() - 60L * 60 * 1000) assertTrue(staleWrapper.isFile, "stale wrapper sentinel must exist before bundleOne") bundler.bundleOne(src, toolName = "doSomething") @@ -617,6 +620,8 @@ class DaemonScriptedToolBundlerTest { val src1 = writeTinyTs("sweep-once-1.ts", exportName = "first") val firstStale = File(src1.parentFile, "${DaemonScriptedToolBundler.WRAPPER_FILENAME_PREFIX}stale-before-first.ts") firstStale.writeText("// stale from a previous JVM crash") + // Age it past the sweep's min-age floor — a real crash leftover is old. + firstStale.setLastModified(System.currentTimeMillis() - 60L * 60 * 1000) bundler.bundleOne(src1, toolName = "first") assertTrue( !firstStale.exists(), @@ -640,6 +645,30 @@ class DaemonScriptedToolBundlerTest { ) } + @Test + fun `stale-wrapper sweep spares recent wrappers from concurrent daemons`() = runBlocking { + // Multi-daemon safety: a second daemon (separate JVM) may have a live wrapper in the shared + // source dir when THIS JVM runs its first sweep. The per-(JVM, dir) dedup can't see across + // processes, so the sweep must spare wrappers younger than the esbuild timeout (mtime-based, + // process-independent). Pins the fix for the cross-JVM "Could not resolve + // .trailblaze-wrapper-*" race observed when one daemon-per-copy bundles in the same dir. + assumeEsbuildPresent() + val src = writeTinyTs("sweep-recent-peer.ts", exportName = "doSomething") + val recentPeerWrapper = File( + src.parentFile, + "${DaemonScriptedToolBundler.WRAPPER_FILENAME_PREFIX}peer-live.ts", + ) + // Freshly written (current mtime) = younger than the sweep's min-age floor. + recentPeerWrapper.writeText("// a concurrent daemon's in-flight wrapper — must survive the sweep") + + bundler.bundleOne(src, toolName = "doSomething") + + assertTrue( + recentPeerWrapper.exists(), + "sweep must spare a recent (live concurrent-daemon) wrapper; deleted at ${recentPeerWrapper.absolutePath}", + ) + } + @Test fun `bundleOne does not leave a partial bundle when esbuild produces empty output`() = runBlocking { // Pins the atomic-write tmp-file pattern added in 520bb7ac5. Pre-fix, an esbuild diff --git a/trailblaze-models/src/commonMain/kotlin/xyz/block/trailblaze/api/AndroidCompactElementList.kt b/trailblaze-models/src/commonMain/kotlin/xyz/block/trailblaze/api/AndroidCompactElementList.kt index 00d89658..585b9b7e 100644 --- a/trailblaze-models/src/commonMain/kotlin/xyz/block/trailblaze/api/AndroidCompactElementList.kt +++ b/trailblaze-models/src/commonMain/kotlin/xyz/block/trailblaze/api/AndroidCompactElementList.kt @@ -199,15 +199,42 @@ object AndroidCompactElementList { // Skip system UI if (props.packageName?.startsWith("com.android.systemui") == true) return - // Skip non-visible nodes (count as offscreen if they have content) + + val scrolledAway = isOffscreen(node, screenHeight) + + // Skip non-visible nodes (count as offscreen if they have content). A scrolled-away + // editable field still gets a scroll-to-reveal affordance instead of being dropped. + // + // Important: invisible container nodes are skipped but their children are still + // recursed. Android's accessibility framework marks certain Compose interop + // containers (e.g. ViewFactoryHolder) as isVisibleToUser=false even when their + // children — including bottom-sheet modal content and overlay buttons — are + // individually visible. A hard return here would silently drop those entire + // subtrees, making the modal and its action targets invisible to the LLM. if (!props.isVisibleToUser && !includeOffscreen) { + if (scrolledAway && props.isEditable) { + lines.add(scrollToRevealLine(node, props, screenHeight, depth)) + return + } if (props.hasIdentifiableProperties) offscreenCounter() + // Don't emit this node, but recurse into children — they carry their own + // isVisibleToUser values and may be individually visible (e.g. modal sheet + // rendered inside a ViewFactoryHolder whose wrapper is marked invisible). + for (child in node.children) { + buildRecursive( + child, depth, lines, elementNodeIds, elementBounds, refMapping, + refTracker, includeBounds, includeOffscreen, includeAllElements, screenHeight, offscreenCounter, + ) + } return } // Track offscreen-but-visible elements (e.g., below the fold in a scroll view) - val offscreen = isOffscreen(node, screenHeight) - if (offscreen && !includeOffscreen && isMeaningful(props, resolveLabel(props))) { + if (scrolledAway && !includeOffscreen && isMeaningful(props, resolveLabel(props))) { + if (props.isEditable) { + lines.add(scrollToRevealLine(node, props, screenHeight, depth)) + return + } offscreenCounter() return } @@ -265,7 +292,18 @@ object AndroidCompactElementList { for (child in node.children) { val childProps = AndroidNodeProps.of(child) ?: continue if (childProps.packageName?.startsWith("com.android.systemui") == true) continue - if (!childProps.isVisibleToUser) continue + if (!childProps.isVisibleToUser) { + // Invisible child — skip emitting it but still recurse into its own children + // so visible grandchildren (e.g. inside a Compose ViewFactoryHolder wrapper) + // still surface as refs. Consistent with the same treatment in the main path. + for (grandchild in child.children) { + buildRecursive( + grandchild, depth + 1, lines, elementNodeIds, elementBounds, refMapping, + refTracker, includeBounds, includeOffscreen, includeAllElements, screenHeight, offscreenCounter, + ) + } + continue + } val childLabel = resolveLabel(childProps) val childInteractive = childProps.isClickable || childProps.isEditable || childProps.isCheckable || childProps.isHeading @@ -437,8 +475,14 @@ object AndroidCompactElementList { private fun hasVisibleDescendants(node: TrailblazeNode): Boolean { for (child in node.children) { val props = AndroidNodeProps.of(child) ?: continue - if (!props.isVisibleToUser) continue if (props.packageName?.startsWith("com.android.systemui") == true) continue + if (!props.isVisibleToUser) { + // Invisible wrapper — look through it so a container whose only visible + // content sits inside a ViewFactoryHolder (isVisibleToUser=false) is still + // correctly detected as having visible descendants and emitted with a header. + if (hasVisibleDescendants(child)) return true + continue + } if (props.isImportantForAccessibility) return true if (hasVisibleDescendants(child)) return true } @@ -453,6 +497,35 @@ object AndroidCompactElementList { private fun isOffscreen(node: TrailblazeNode, screenHeight: Int): Boolean = CompactElementListUtils.isOffscreen(node, screenHeight) + /** + * Emits a non-tappable affordance for a scrolled-away editable field so the agent + * learns the search target exists and must scroll to it, without being handed an + * off-screen ref it could tap. Scoped to editable nodes only — never plain clickables. + */ + private fun scrollToRevealLine( + node: TrailblazeNode, + props: AndroidNodeProps, + screenHeight: Int, + depth: Int, + ): String { + val direction = when { + (node.bounds?.bottom ?: 0) <= 0 -> "scroll up" + (node.bounds?.top ?: 0) >= screenHeight -> "scroll down" + else -> "scroll down" + } + val rawClass = props.className?.substringAfterLast('.') ?: "" + val shortClass = if (rawClass in GENERIC_CLASSES) "" else rawClass + val label = resolveLabel(props) ?: resolveChildLabel(node, props) + val descriptor = when { + label != null && shortClass.isNotEmpty() -> "$shortClass \"$label\"" + label != null -> "\"$label\"" + shortClass.isNotEmpty() -> shortClass + else -> "" + } + val indent = " ".repeat(depth) + return "$indent($direction to reveal) $descriptor".trimEnd() + } + /** * Android scrollable container classes that get a "ClassName [N items]:" header. * diff --git a/trailblaze-models/src/commonMain/kotlin/xyz/block/trailblaze/api/TrailblazeNodeSelectorMinimizer.kt b/trailblaze-models/src/commonMain/kotlin/xyz/block/trailblaze/api/TrailblazeNodeSelectorMinimizer.kt index c757bc04..be109e63 100644 --- a/trailblaze-models/src/commonMain/kotlin/xyz/block/trailblaze/api/TrailblazeNodeSelectorMinimizer.kt +++ b/trailblaze-models/src/commonMain/kotlin/xyz/block/trailblaze/api/TrailblazeNodeSelectorMinimizer.kt @@ -147,6 +147,17 @@ internal object TrailblazeNodeSelectorMinimizer { return when { minimized === match -> selector minimized.isEmpty() -> { + // Clearing the match here would leave a purely-positional selector when + // the selector still carries an index. A naked ordinal shifts whenever + // anything before the target changes, so retain the single most-stable + // anchor instead — re-adding one field only narrows the empty-match set, + // which already resolved uniquely. + if (selector.index != null) { + val anchor = keepMostStableField(match) + if (!anchor.isEmpty()) return selector.replaceDriverMatch(anchor) + // No non-null field existed on the original match (attribute-less node): + // a bare index is the legitimate last resort. Fall through to clearing. + } val withoutMatch = selector.clearDriverMatch() // Double-check uniqueness: a no-op-match selector should still resolve // uniquely since every empty-match candidate already passed `stillUnique` @@ -158,6 +169,16 @@ internal object TrailblazeNodeSelectorMinimizer { } } + /** + * Reduces [match] to its single most-stable non-null field. The per-driver + * greedy drop tries fields least-stable → most-stable; gating each drop on + * "at least one field still set" keeps dropping until only the single + * most-stable field remains (dropping that last one would empty the match and + * is rejected). Returns an empty match when [match] had no non-null fields. + */ + private fun keepMostStableField(match: DriverNodeMatch): DriverNodeMatch = + minimizeMatch(match) { candidate -> !candidate.isEmpty() } + /** * Dispatch helper — the per-driver minimizers all follow the same shape but * each driver has a different field set, so they're written out separately. diff --git a/trailblaze-models/src/commonMain/resources/trails/config/trailmaps/trailblaze/toolsets/core_interaction.yaml b/trailblaze-models/src/commonMain/resources/trails/config/trailmaps/trailblaze/toolsets/core_interaction.yaml index 88cec32a..53b4f5eb 100644 --- a/trailblaze-models/src/commonMain/resources/trails/config/trailmaps/trailblaze/toolsets/core_interaction.yaml +++ b/trailblaze-models/src/commonMain/resources/trails/config/trailmaps/trailblaze/toolsets/core_interaction.yaml @@ -13,6 +13,7 @@ drivers: - android-ondevice-accessibility - ios-host tools: + - clearText - eraseText - hideKeyboard - inputText diff --git a/trailblaze-models/src/jvmTest/kotlin/xyz/block/trailblaze/api/AndroidCompactElementListTest.kt b/trailblaze-models/src/jvmTest/kotlin/xyz/block/trailblaze/api/AndroidCompactElementListTest.kt index 84fe3f01..0da1f4cb 100644 --- a/trailblaze-models/src/jvmTest/kotlin/xyz/block/trailblaze/api/AndroidCompactElementListTest.kt +++ b/trailblaze-models/src/jvmTest/kotlin/xyz/block/trailblaze/api/AndroidCompactElementListTest.kt @@ -694,6 +694,177 @@ class AndroidCompactElementListTest { assertContains(result.text, "EditText \"Email\"") } + // -- Scroll-to-reveal affordance for offscreen editable fields -- + + @Test + fun `offscreen editable field above viewport emits scroll up to reveal with no ref`() { + val search = node( + detail = DriverNodeDetail.AndroidAccessibility( + className = "android.widget.EditText", + hintText = "Search all items", + isEditable = true, + ), + bounds = TrailblazeNode.Bounds(0, -120, 200, -70), + ) + val root = node(children = listOf(search)) + val result = AndroidCompactElementList.build(root, screenHeight = 800) + + assertContains(result.text, "(scroll up to reveal) EditText \"Search all items\"") + assertTrue( + result.elementNodeIds.isEmpty(), + "Offscreen editable affordance must not get a ref. Actual:\n${result.text}", + ) + assertTrue( + !result.text.contains("[ref"), + "Scroll-to-reveal line must not carry a [ref]. Actual:\n${result.text}", + ) + } + + @Test + fun `offscreen editable field below viewport emits scroll down to reveal`() { + val search = node( + detail = DriverNodeDetail.AndroidAccessibility( + className = "android.widget.EditText", + hintText = "Search all items", + isEditable = true, + ), + bounds = TrailblazeNode.Bounds(0, 900, 200, 950), + ) + val root = node(children = listOf(search)) + val result = AndroidCompactElementList.build(root, screenHeight = 800) + + assertContains(result.text, "(scroll down to reveal) EditText \"Search all items\"") + assertTrue(result.elementNodeIds.isEmpty()) + } + + @Test + fun `offscreen editable field hidden behind overlay still emits scroll to reveal`() { + // Trips gate 203 (isVisibleToUser=false) rather than gate 210; the bounds-based + // offscreen signal still classifies it as scrolled-away, so the affordance must fire. + val search = node( + detail = DriverNodeDetail.AndroidAccessibility( + className = "android.widget.EditText", + hintText = "Search all items", + isEditable = true, + isVisibleToUser = false, + ), + bounds = TrailblazeNode.Bounds(0, 900, 200, 950), + ) + val root = node(children = listOf(search)) + val result = AndroidCompactElementList.build(root, screenHeight = 800) + + assertContains(result.text, "(scroll down to reveal) EditText \"Search all items\"") + assertTrue(result.elementNodeIds.isEmpty()) + } + + @Test + fun `on-screen editable field still gets a normal ref`() { + val search = node( + detail = DriverNodeDetail.AndroidAccessibility( + className = "android.widget.EditText", + hintText = "Search all items", + isEditable = true, + ), + bounds = TrailblazeNode.Bounds(0, 100, 200, 150), + ) + val root = node(children = listOf(search)) + val result = AndroidCompactElementList.build(root, screenHeight = 800) + + assertContains(result.text, "EditText \"Search all items\"") + assertTrue(!result.text.contains("scroll"), "On-screen field must not get an affordance line") + assertTrue(result.elementNodeIds.contains(0L), "On-screen editable should still get a ref") + } + + @Test + fun `offscreen non-interactive node only increments hidden counter`() { + val label = node( + detail = DriverNodeDetail.AndroidAccessibility( + className = "android.widget.TextView", + text = "Section header", + isClickable = true, + ), + bounds = TrailblazeNode.Bounds(0, 900, 200, 950), + ) + val root = node(children = listOf(label)) + val result = AndroidCompactElementList.build(root, screenHeight = 800) + + assertTrue(!result.text.contains("scroll"), "Non-editable offscreen node must not emit an affordance") + assertContains(result.text, "offscreen elements hidden") + assertTrue(result.elementNodeIds.isEmpty()) + } + + // -- Invisible container with visible children -- + + @Test + fun `visible children of invisible container still appear in compact list`() { + // Reproduces a real-world failure where a Compose ViewFactoryHolder wrapper is + // isVisibleToUser=false but its children (modal title and Dismiss button) have the + // default isVisibleToUser=true. The compact list must recurse into children rather + // than dropping the entire subtree. + val dismissButton = node( + detail = DriverNodeDetail.AndroidAccessibility( + className = "android.widget.Button", + contentDescription = "Dismiss", + isClickable = true, + isEnabled = true, + // isVisibleToUser defaults to true + ), + ) + val modalTitle = node( + detail = DriverNodeDetail.AndroidAccessibility( + className = "android.widget.TextView", + text = "Finish setting up your account", + ), + ) + val invisibleContainer = node( + bounds = TrailblazeNode.Bounds(0, 135, 1080, 2132), + detail = DriverNodeDetail.AndroidAccessibility( + className = "androidx.compose.ui.viewinterop.ViewFactoryHolder", + isVisibleToUser = false, + ), + children = listOf(modalTitle, dismissButton), + ) + val navItem = node( + detail = DriverNodeDetail.AndroidAccessibility( + className = "android.widget.Button", + contentDescription = "Checkout", + isClickable = true, + ), + ) + val root = node(children = listOf(invisibleContainer, navItem)) + val result = AndroidCompactElementList.build(root) + + // Dismiss must be emitted as a ref-labeled interactive element, not just as + // plain text — the agent needs a ref to be able to act on it. + assertTrue( + result.elementNodeIds.contains(dismissButton.nodeId), + "Dismiss button must be emitted as a ref-labeled element", + ) + assertContains(result.text, "Checkout") + assertTrue( + !result.text.contains("ViewFactoryHolder"), + "Invisible container itself must not be emitted", + ) + } + + @Test + fun `invisible leaf node is still filtered out`() { + // A leaf node (no children) with isVisibleToUser=false should still be excluded — + // only the "recurse into children" part is the new behaviour; the node itself is skipped. + val hidden = node( + detail = DriverNodeDetail.AndroidAccessibility( + className = "android.widget.Button", + contentDescription = "Hidden", + isClickable = true, + isVisibleToUser = false, + ), + ) + val root = node(children = listOf(hidden)) + val result = AndroidCompactElementList.build(root) + + assertTrue(!result.text.contains("Hidden"), "Invisible leaf node must not appear") + } + @Test fun `hintText equal to text does not duplicate`() { // Some inputs echo hint into text; don't render "Search: Search". diff --git a/trailblaze-models/src/jvmTest/kotlin/xyz/block/trailblaze/api/TrailblazeNodeSelectorGeneratorAndroidAccessibilityTest.kt b/trailblaze-models/src/jvmTest/kotlin/xyz/block/trailblaze/api/TrailblazeNodeSelectorGeneratorAndroidAccessibilityTest.kt index a669f4b5..9e5f1f6f 100644 --- a/trailblaze-models/src/jvmTest/kotlin/xyz/block/trailblaze/api/TrailblazeNodeSelectorGeneratorAndroidAccessibilityTest.kt +++ b/trailblaze-models/src/jvmTest/kotlin/xyz/block/trailblaze/api/TrailblazeNodeSelectorGeneratorAndroidAccessibilityTest.kt @@ -3,6 +3,7 @@ package xyz.block.trailblaze.api import kotlin.test.Test import kotlin.test.assertEquals import kotlin.test.assertNotNull +import kotlin.test.assertNull import kotlin.test.assertTrue class TrailblazeNodeSelectorGeneratorAndroidAccessibilityTest : TrailblazeNodeSelectorGeneratorTestBase() { @@ -448,7 +449,11 @@ class TrailblazeNodeSelectorGeneratorAndroidAccessibilityTest : TrailblazeNodeSe // -- Strategy 21: index fallback -- @Test - fun `index fallback for identical nodes`() { + fun `index fallback for identical nodes keeps the className anchor alongside index`() { + // Three nodes that share className=View — disambiguable only by position. + // The selector must carry an index, but it should NOT be purely positional: + // the className anchor is retained so the ordinal isn't naked (a bare index + // shifts whenever anything before the target changes). nextId = 1L val node1 = node( detail = DriverNodeDetail.AndroidAccessibility(className = "android.view.View"), @@ -469,6 +474,40 @@ class TrailblazeNodeSelectorGeneratorAndroidAccessibilityTest : TrailblazeNodeSe val selector = assertUniqueMatch(root, node2) assertNotNull(selector.index, "Expected index-based selector for identical nodes") + val match = selector.driverMatch as? DriverNodeMatch.AndroidAccessibility + assertNotNull(match, "index must be paired with a content/structural anchor, not be naked") + assertEquals("android.view.View", match.classNameRegex) + } + + @Test + fun `index fallback for truly attribute-less nodes is a bare index`() { + // No matchable attributes at all (no text/contentDescription/className/ + // resourceId/uniqueId) — there is nothing to anchor on, so a bare index is + // the legitimate last resort. + nextId = 1L + val node1 = node( + detail = DriverNodeDetail.AndroidAccessibility(), + bounds = TrailblazeNode.Bounds(0, 0, 100, 50), + ) + val node2 = node( + detail = DriverNodeDetail.AndroidAccessibility(), + bounds = TrailblazeNode.Bounds(0, 50, 100, 100), + ) + val node3 = node( + detail = DriverNodeDetail.AndroidAccessibility(), + bounds = TrailblazeNode.Bounds(0, 100, 100, 150), + ) + val root = node( + children = listOf(node1, node2, node3), + bounds = TrailblazeNode.Bounds(0, 0, 100, 150), + ) + + val selector = assertUniqueMatch(root, node2) + assertNotNull(selector.index, "Expected index-based selector for identical nodes") + assertNull( + selector.driverMatch, + "attribute-less nodes have no anchor — bare index is the legitimate last resort", + ) } // -- Degenerate: single node tree -- diff --git a/trailblaze-models/src/jvmTest/kotlin/xyz/block/trailblaze/api/TrailblazeNodeSelectorMinimizerTest.kt b/trailblaze-models/src/jvmTest/kotlin/xyz/block/trailblaze/api/TrailblazeNodeSelectorMinimizerTest.kt index 875a2fb3..ce57bbfe 100644 --- a/trailblaze-models/src/jvmTest/kotlin/xyz/block/trailblaze/api/TrailblazeNodeSelectorMinimizerTest.kt +++ b/trailblaze-models/src/jvmTest/kotlin/xyz/block/trailblaze/api/TrailblazeNodeSelectorMinimizerTest.kt @@ -487,6 +487,142 @@ class TrailblazeNodeSelectorMinimizerTest : TrailblazeNodeSelectorGeneratorTestB ) } + // --------------------------------------------------------------------------- + // Index-carrying selectors must keep an anchor (never collapse to bare index). + // --------------------------------------------------------------------------- + + @Test + fun `index-carrying selector retains the most-stable anchor instead of collapsing to bare index`() { + // Every node — root included — shares className=View, so the class predicate + // is a no-op: the class-filtered index equals the global index. Dropping + // className keeps the target unique under `{index}` alone, so the pre-fix + // minimizer stripped className and emitted a purely-positional `{index: 2}`. + // Post-fix, because the selector carries an index, the most-stable surviving + // match field (className here) must be retained as an anchor. + // + // Distinct stacked bounds make the resolver's top-then-left sort + // deterministic. The root sorts first (top=0), so the target (the 2nd child, + // top=100) lands at global index 2. + nextId = 1L + val node1 = node( + detail = DriverNodeDetail.AndroidAccessibility(className = "android.view.View"), + bounds = TrailblazeNode.Bounds(0, 10, 100, 50), + ) + val target = node( + detail = DriverNodeDetail.AndroidAccessibility(className = "android.view.View"), + bounds = TrailblazeNode.Bounds(0, 100, 100, 150), + ) + val node3 = node( + detail = DriverNodeDetail.AndroidAccessibility(className = "android.view.View"), + bounds = TrailblazeNode.Bounds(0, 200, 100, 250), + ) + val root = node( + detail = DriverNodeDetail.AndroidAccessibility(className = "android.view.View"), + bounds = TrailblazeNode.Bounds(0, 0, 100, 300), + children = listOf(node1, target, node3), + ) + + val raw = TrailblazeNodeSelector( + androidAccessibility = DriverNodeMatch.AndroidAccessibility(classNameRegex = "android\\.view\\.View"), + index = 2, + ) + assertTrue(isUniqueMatch(root, target, raw)) + + val minimized = TrailblazeNodeSelectorMinimizer.minimize(root, target, raw) + + assertEquals(2, minimized.index, "index must survive — it's the disambiguator") + val match = minimized.driverMatch as? DriverNodeMatch.AndroidAccessibility + assertNotNull( + match, + "an index-carrying selector must keep a content/structural anchor, never collapse to bare index", + ) + assertEquals( + "android\\.view\\.View", + match.classNameRegex, + "the most-stable surviving match field must be retained alongside the index", + ) + assertTrue( + isUniqueMatch(root, target, minimized), + "anchor + index must still uniquely resolve to target", + ) + } + + @Test + fun `attribute-less node still collapses to bare index as the legitimate last resort`() { + // A genuinely attribute-less node — no text/contentDescription/className/ + // resourceId/uniqueId. There is no anchor to retain, so a bare `{index}` is + // the only thing the minimizer can emit. This proves the keep-an-anchor + // guard does not over-fix the legitimate attribute-less fallback. + // + // Distinct stacked bounds make the resolver's sort deterministic; the root + // (top=0) sorts first, so the target (2nd child, top=100) is global index 2. + nextId = 1L + val node1 = node( + detail = DriverNodeDetail.AndroidAccessibility(), + bounds = TrailblazeNode.Bounds(0, 10, 100, 50), + ) + val target = node( + detail = DriverNodeDetail.AndroidAccessibility(), + bounds = TrailblazeNode.Bounds(0, 100, 100, 150), + ) + val node3 = node( + detail = DriverNodeDetail.AndroidAccessibility(), + bounds = TrailblazeNode.Bounds(0, 200, 100, 250), + ) + val root = node( + detail = DriverNodeDetail.AndroidAccessibility(), + bounds = TrailblazeNode.Bounds(0, 0, 100, 300), + children = listOf(node1, target, node3), + ) + + val raw = TrailblazeNodeSelector(index = 2) + assertTrue(isUniqueMatch(root, target, raw)) + + val minimized = TrailblazeNodeSelectorMinimizer.minimize(root, target, raw) + + assertEquals(2, minimized.index, "index must survive") + assertNull( + minimized.driverMatch, + "an attribute-less node has no anchor to retain — bare index is the legitimate last resort", + ) + } + + @Test + fun `selector with no index still clears an empty match to a pure content selector`() { + // No index on the selector — when the match minimizes to empty (the + // containsChild predicate alone disambiguates), clearing the empty shell is + // correct: a pure content/relationship selector is not positional, so the + // keep-an-anchor guard must not fire here. + nextId = 1L + val childA = node(detail = DriverNodeDetail.AndroidAccessibility(text = "UniqueChild")) + val target = node( + detail = DriverNodeDetail.AndroidAccessibility(className = "android.view.View"), + children = listOf(childA), + ) + val sibling = node( + detail = DriverNodeDetail.AndroidAccessibility(className = "android.view.View"), + children = listOf(node(detail = DriverNodeDetail.AndroidAccessibility(text = "Other"))), + ) + val root = node(children = listOf(target, sibling)) + + val raw = TrailblazeNodeSelector( + androidAccessibility = DriverNodeMatch.AndroidAccessibility(classNameRegex = "android\\.view\\.View"), + containsChild = TrailblazeNodeSelector( + androidAccessibility = DriverNodeMatch.AndroidAccessibility(textRegex = "UniqueChild"), + ), + ) + assertTrue(isUniqueMatch(root, target, raw)) + + val minimized = TrailblazeNodeSelectorMinimizer.minimize(root, target, raw) + + assertNull(minimized.index, "no index was present — none should appear") + assertNull( + minimized.driverMatch, + "with no index, an empty match clears to a pure content/relationship selector", + ) + assertNotNull(minimized.containsChild, "containsChild is the load-bearing disambiguator") + } + // --------------------------------------------------------------------------- // Idempotence. // --------------------------------------------------------------------------- diff --git a/trailblaze-quickjs-tools/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/quickjs/tools/QuickJsToolSerializer.kt b/trailblaze-quickjs-tools/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/quickjs/tools/QuickJsToolSerializer.kt index e7e58a12..1bddc316 100644 --- a/trailblaze-quickjs-tools/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/quickjs/tools/QuickJsToolSerializer.kt +++ b/trailblaze-quickjs-tools/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/quickjs/tools/QuickJsToolSerializer.kt @@ -16,12 +16,21 @@ import xyz.block.trailblaze.toolcalls.ToolName * * Mirror of the legacy module's `BundleToolSerializer`, but the constructed tool dispatches * via a [QuickJsToolHost] rather than an MCP `Client`. + * + * [binding] should be supplied whenever this serializer is used on the koog dispatch path + * (i.e. from [xyz.block.trailblaze.scripting.LazyYamlScriptedToolRegistration.buildKoogTool]). + * The binding is forwarded to [QuickJsTrailblazeTool] so [QuickJsTrailblazeTool.execute] + * can set [SessionScopedHostBinding.activeContext] before the QuickJS evaluation starts — + * see [QuickJsTrailblazeTool.binding] for the full rationale. */ class QuickJsToolSerializer( private val advertisedName: ToolName, private val host: QuickJsToolHost, + private val binding: SessionScopedHostBinding? = null, ) : KSerializer { + constructor(advertisedName: ToolName, host: QuickJsToolHost) : this(advertisedName, host, null) + override val descriptor: SerialDescriptor = buildClassSerialDescriptor("quickjs:${advertisedName.toolName}") @@ -30,7 +39,7 @@ class QuickJsToolSerializer( ?: error("QuickJsToolSerializer requires JSON decoding (got ${decoder::class.simpleName}).") val argsElement = jsonDecoder.decodeJsonElement() val args = argsElement as? JsonObject ?: JsonObject(emptyMap()) - return QuickJsTrailblazeTool(host = host, advertisedName = advertisedName, args = args) + return QuickJsTrailblazeTool(host = host, advertisedName = advertisedName, args = args, binding = binding) } override fun serialize(encoder: Encoder, value: QuickJsTrailblazeTool) { diff --git a/trailblaze-quickjs-tools/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/quickjs/tools/QuickJsTrailblazeTool.kt b/trailblaze-quickjs-tools/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/quickjs/tools/QuickJsTrailblazeTool.kt index 4202196e..4eb63e1a 100644 --- a/trailblaze-quickjs-tools/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/quickjs/tools/QuickJsTrailblazeTool.kt +++ b/trailblaze-quickjs-tools/src/jvmAndAndroid/kotlin/xyz/block/trailblaze/quickjs/tools/QuickJsTrailblazeTool.kt @@ -30,8 +30,28 @@ class QuickJsTrailblazeTool( internal val advertisedName: ToolName, /** LLM-supplied args, decoded from the tool-call's `arguments` JSON. */ internal val args: JsonObject, + /** + * Session-scoped binding for this tool's host. When non-null, [execute] sets + * [SessionScopedHostBinding.activeContext] for the duration of the QuickJS evaluation so + * nested `client.callTool(...)` calls from inside the bundle can resolve the session context. + * + * Required for the koog dispatch path (LLM agent → [buildKoogTool] argsSerializer → + * [execute] directly). Without it, the QuickJS `asyncFunction` callback fires on + * [kotlinx.coroutines.Dispatchers.Default] where the [ToolExecutionContextThreadLocal] is + * null and [SessionScopedHostBinding.activeContext] was never set, causing every nested + * `trailblaze.call(...)` to return "no execution context installed". + * + * The [ContextSettingScriptedTool][xyz.block.trailblaze.scripting.LazyYamlScriptedToolRegistration] + * wrapper (used by the `tools:` trail-item / [decodeToolCall] path) sets [activeContext] + * from outside; when [binding] is non-null here, both paths set it and the outer wrapper's + * finally-clear is a harmless no-op after the inner finally already cleared it. + */ + internal val binding: SessionScopedHostBinding? = null, ) : HostLocalExecutableTrailblazeTool, RawArgumentTrailblazeTool { + constructor(host: QuickJsToolHost, advertisedName: ToolName, args: JsonObject) : + this(host, advertisedName, args, null) + override val advertisedToolName: String get() = advertisedName.toolName // Surface the LLM-supplied args as `rawToolArguments` so `toLogPayload()` writes them @@ -43,6 +63,13 @@ class QuickJsTrailblazeTool( override suspend fun execute(toolExecutionContext: TrailblazeToolExecutionContext): TrailblazeToolResult { val ctx = buildCtxEnvelope(toolExecutionContext) + // Install the session context on the binding so nested client.callTool() calls from inside + // the JS bundle can resolve it via SessionScopedHostBinding.callFromBundle. The QuickJS + // asyncFunction callback fires on Dispatchers.Default (a different thread) so + // ToolExecutionContextThreadLocal is unreliable there; @Volatile activeContext is the only + // mechanism that crosses the thread boundary safely. Clear in finally so a failed dispatch + // doesn't leak the context into a subsequent unrelated dispatch on the same binding. + binding?.activeContext = toolExecutionContext return try { val resultJson = host.callTool(advertisedName.toolName, args, ctx) resultJson.toTrailblazeToolResult(toolName = advertisedName.toolName) @@ -53,6 +80,8 @@ class QuickJsTrailblazeTool( throw e } catch (e: Exception) { TrailblazeToolResult.Error.ExceptionThrown.fromThrowable(e, this) + } finally { + binding?.activeContext = null } } diff --git a/trailblaze-report/src/main/java/xyz/block/trailblaze/report/GenerateTestResultsCliCommand.kt b/trailblaze-report/src/main/java/xyz/block/trailblaze/report/GenerateTestResultsCliCommand.kt index 53901c35..72ff1e47 100644 --- a/trailblaze-report/src/main/java/xyz/block/trailblaze/report/GenerateTestResultsCliCommand.kt +++ b/trailblaze-report/src/main/java/xyz/block/trailblaze/report/GenerateTestResultsCliCommand.kt @@ -229,6 +229,7 @@ open class GenerateTestResultsCliCommand : CliktCommand(name = "generate-test-re ci_job_id = hostCiContext.ci_job_id, logs_zip_filename = hostCiContext.logs_zip_filename, logs_zip_url = hostCiContext.logs_zip_url, + priority = sessionInfo.trailConfig?.priority, ) ) } catch (e: Exception) { @@ -338,6 +339,8 @@ open class GenerateTestResultsCliCommand : CliktCommand(name = "generate-test-re devices = getEnvList("TRAILBLAZE_DEVICES"), android_build_url = getEnv("ANDROID_BUILD_URL"), ios_build_url = getEnv("IOS_BUILD_URL"), + android_build_version = getEnv("ANDROID_BUILD_VERSION"), + ios_build_version = getEnv("IOS_BUILD_VERSION"), retry_count = getEnv("TRAILBLAZE_TEST_RETRY_COUNT")?.toIntOrNull() ?: 0, ai_enabled = getEnv("TRAILBLAZE_AI_ENABLED")?.toBoolean() ?: true, // Source of truth is the `TRAILBLAZE_SELF_HEAL_ENABLED` env var that a CI pipeline runner diff --git a/trailblaze-report/src/main/java/xyz/block/trailblaze/report/models/SessionResult.kt b/trailblaze-report/src/main/java/xyz/block/trailblaze/report/models/SessionResult.kt index a378caa1..2c50534c 100644 --- a/trailblaze-report/src/main/java/xyz/block/trailblaze/report/models/SessionResult.kt +++ b/trailblaze-report/src/main/java/xyz/block/trailblaze/report/models/SessionResult.kt @@ -88,6 +88,9 @@ data class SessionResult( /** Failure reasons from replaced attempts (populated during dedup when this result superseded earlier failures) */ val replaced_failure_reasons: List = emptyList(), + /** Priority label for this test (e.g. "P0", "P1", "P2"). Null when not set. */ + val priority: String? = null, + // === CI Provenance (per-session) === /** * CI job ID that produced this session — typically the provider's per-step UUID. Captured diff --git a/trailblaze-report/src/main/java/xyz/block/trailblaze/report/utils/LogsRepo.kt b/trailblaze-report/src/main/java/xyz/block/trailblaze/report/utils/LogsRepo.kt index 19ddf61b..625832bf 100644 --- a/trailblaze-report/src/main/java/xyz/block/trailblaze/report/utils/LogsRepo.kt +++ b/trailblaze-report/src/main/java/xyz/block/trailblaze/report/utils/LogsRepo.kt @@ -133,9 +133,20 @@ class LogsRepo( ensureWatchingSession(sessionId) sessionInfoWatcherJobs[sessionId] = fileOperationScope.launch { getSessionLogsFlow(sessionId).collect { - // When an active session's logs change, refresh SessionInfo - _sessionInfoFlow.value = sessionsFlow.value.mapNotNull { sid -> - if (sid in sessionInfoWatcherJobs) getSessionInfo(sid) else getSessionInfoDirect(sid) + // When an active session's logs change, refresh SessionInfo. + // Reuse the existing cached SessionInfo for completed (non-watched) + // sessions — their logs don't change, so there is no reason to + // re-read their log files from disk on every update. + // Use update{} for an atomic read-compute-write so concurrent log + // events from multiple in-progress sessions don't clobber each other. + _sessionInfoFlow.update { currentInfo -> + val cachedInfoById = currentInfo.associateBy { it.sessionId } + sessionsFlow.value.mapNotNull { sid -> + when { + sid in sessionInfoWatcherJobs -> getSessionInfo(sid) + else -> cachedInfoById[sid] ?: getSessionInfoDirect(sid) + } + } } } } @@ -429,6 +440,7 @@ class LogsRepo( } val removedSessions = previousSessionIds.toSet() - currentSessionIds.toSet() + val addedSessions = currentSessionIds.toSet() - previousSessionIds.toSet() // Clean up flows for removed sessions removedSessions.forEach { sessionId -> @@ -437,6 +449,15 @@ class LogsRepo( // Update the flow with new session list _sessionsFlow.value = currentSessionIds + + // For each newly detected session directory, schedule retries so the + // session surfaces in _sessionInfoFlow once its first log is written. + // Directory creation and the first log write are not atomic — the + // sessionsFlow.collect handler may call getSessionInfoDirect before any + // log files exist, getting null and silently dropping the session with + // no retry path. scheduleSessionInfoRetry fixes that by polling until + // info resolves or a generous deadline is reached. + addedSessions.forEach { sessionId -> scheduleSessionInfoRetry(sessionId) } } catch (e: Exception) { Console.log("[LogsRepo] Error processing session list event: ${e.message}") e.printStackTrace() @@ -449,6 +470,60 @@ class LogsRepo( } } + /** + * Schedules a series of retries to surface a newly-detected session once its + * first log file is written. + * + * There is an inherent race between the filesystem event that signals directory + * creation and the daemon writing the first status log into that directory. + * [sessionsFlow.collect] calls [getSessionInfoDirect] immediately on detection; + * if no parseable log exists yet, it returns null and the session is silently + * dropped from [_sessionInfoFlow] with no retry path. + * + * This function closes that gap by polling with exponential back-off. Each + * attempt reads log files directly from disk; on success it patches + * [_sessionInfoFlow] immediately, replacing any stale entry. The next + * [sessionsFlow.collect] emission will then find the session already present + * and re-evaluate it normally. + * + * Back-off schedule: 500 ms → 1 s → 2 s → 4 s → 8 s → 16 s (≈ 31.5 s total). + * Fast local runs typically resolve on the first attempt; slower CI or + * download scenarios resolve within a few retries. + */ + private fun scheduleSessionInfoRetry(sessionId: SessionId) { + fileOperationScope.launch { + var delayMs = 500L + repeat(6) { + delay(delayMs) + val info = getSessionInfoDirect(sessionId) + if (info == null) { + delayMs = minOf(delayMs * 2, 30_000L) + return@repeat + } + // Rebuild _sessionInfoFlow in sessionsFlow order, replacing or inserting + // the resolved info. update{} makes the read-compute-write atomic. + // The guard on currentSessionIds prevents re-adding a session that was + // deleted from the filesystem while this retry coroutine was in flight — + // the retry may have read valid logs just before deletion, but sessionsFlow + // will no longer list the session, so it should not appear in the UI. + _sessionInfoFlow.update { current -> + val currentSessionIds = sessionsFlow.value + if (sessionId !in currentSessionIds) { + // Session removed while retry was running; sessionsFlow.collect will + // have already cleaned _sessionInfoFlow — leave it untouched. + current + } else { + // Merge resolved info into the current snapshot, then project through + // currentSessionIds to preserve order and exclude stale entries. + val merged = current.associateBy { it.sessionId } + (sessionId to info) + currentSessionIds.mapNotNull { sid -> merged[sid] } + } + } + return@launch + } + } + } + /** * Starts watching a specific session and updates its StateFlow with new logs. */ diff --git a/trailblaze-server/src/main/java/xyz/block/trailblaze/logs/server/SslConfig.kt b/trailblaze-server/src/main/java/xyz/block/trailblaze/logs/server/SslConfig.kt index 76b2e659..bb69853b 100644 --- a/trailblaze-server/src/main/java/xyz/block/trailblaze/logs/server/SslConfig.kt +++ b/trailblaze-server/src/main/java/xyz/block/trailblaze/logs/server/SslConfig.kt @@ -18,7 +18,10 @@ object SslConfig { // Store the keystore under ~/.trailblaze/ so it works regardless of the working directory. // Previously used a relative "build/keystore.jks" path which failed when the daemon was // launched from a directory without a build/ folder (e.g., via the MCP stdio proxy). - val trailblazeDir = File(System.getProperty("user.home"), ".trailblaze") + // TRAILBLAZE_HOME lets concurrent daemons isolate the keystore dir; otherwise N daemons + // racing to generate/read one shared keystore.jks on boot can crash before binding. + val trailblazeDir = System.getenv("TRAILBLAZE_HOME")?.takeIf { it.isNotBlank() }?.let { File(it) } + ?: File(System.getProperty("user.home"), ".trailblaze") trailblazeDir.mkdirs() val keyStoreFile = File(trailblazeDir, "keystore.jks") diff --git a/trailblaze-server/src/main/java/xyz/block/trailblaze/mcp/newtools/StepToolSet.kt b/trailblaze-server/src/main/java/xyz/block/trailblaze/mcp/newtools/StepToolSet.kt index 9d5df24c..ad256f4c 100644 --- a/trailblaze-server/src/main/java/xyz/block/trailblaze/mcp/newtools/StepToolSet.kt +++ b/trailblaze-server/src/main/java/xyz/block/trailblaze/mcp/newtools/StepToolSet.kt @@ -382,7 +382,17 @@ class StepToolSet( val recommendationContext = RecommendationContext( objective = objective, progressSummary = context, - hint = if (isVerify) "Verify this assertion using read-only tools only. Do not tap, swipe, or type." else null, + hint = if (isVerify) { + // Verify steps must leave a recording: call at least one assertion tool that returns + // success, otherwise the captured trail has empty recording.tools for the verify and + // strict-replay has nothing to validate. Visual reasoning alone is not enough. + "Verify this assertion using read-only tools only. Do not tap, swipe, or type. " + + "You MUST call at least one assertion tool (e.g. assertVisibleBySelector, " + + "assertVisibleWithText, or assertWithAI) AND have it return success before marking " + + "the objective complete. Visual inspection alone is NOT a valid verification — the " + + "captured trail needs a concrete assertion tool call on record so the trail can be " + + "replayed deterministically." + } else null, attemptNumber = 1, fast = isFast, ) diff --git a/trailblaze-ui/src/commonMain/kotlin/xyz/block/trailblaze/ui/models/TrailblazeServerState.kt b/trailblaze-ui/src/commonMain/kotlin/xyz/block/trailblaze/ui/models/TrailblazeServerState.kt index 2fe696bc..6ab9c078 100644 --- a/trailblaze-ui/src/commonMain/kotlin/xyz/block/trailblaze/ui/models/TrailblazeServerState.kt +++ b/trailblaze-ui/src/commonMain/kotlin/xyz/block/trailblaze/ui/models/TrailblazeServerState.kt @@ -110,6 +110,14 @@ data class TrailblazeServerState( * signals or any cross-cutting network behavior. */ val captureNetworkTraffic: Boolean = false, + /** + * When true, the desktop app connects the device's analytics agent for the + * duration of a run so events emitted during the trail can be surfaced in the + * run timeline. Only yields events for an instrumented build; a plain app + * captures nothing. Off by default. The actual capture mechanism is wired in + * by the desktop app (kept out of this generic model). + */ + val captureAnalytics: Boolean = false, // Last navigation route (restored on app restart) val lastRoute: String? = null, // Qualified class name of the last visited route // Self-test server: expose the live desktop window as a Compose RPC test target