Skip to content

Commit 0b2c98a

Browse files
authored
Merge pull request #39 from webdriverio/release/v3.0.0
Release/v3.0.0 with breaking changes
2 parents 3998141 + 8402bf5 commit 0b2c98a

61 files changed

Lines changed: 2218 additions & 1510 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/lint.yml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
name: Lint
2+
3+
on:
4+
pull_request:
5+
branches:
6+
- main
7+
push:
8+
branches:
9+
- main
10+
11+
permissions:
12+
contents: read
13+
14+
jobs:
15+
lint:
16+
runs-on: ubuntu-latest
17+
steps:
18+
- name: Checkout Code
19+
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
20+
21+
- name: Setup pnpm
22+
uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0
23+
24+
- name: Setup Node.js
25+
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # v6.1.0
26+
with:
27+
node-version-file: '.nvmrc'
28+
cache: 'pnpm'
29+
30+
- name: Install Dependencies
31+
run: pnpm install
32+
33+
- name: Lint & Type Check
34+
run: pnpm run lint

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@
44
node_modules
55
lib
66

7+
settings.local.json
78
*.tgz

CLAUDE.md

Lines changed: 105 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -16,45 +16,81 @@ npm start # Run built server from lib/server.js
1616
```
1717
src/
1818
├── server.ts # MCP server entry, registers all tools + MCP resources
19+
├── session/
20+
│ ├── state.ts # Session state maps, getBrowser(), getState(), SessionMetadata
21+
│ └── lifecycle.ts # registerSession(), handleSessionTransition(), closeSession()
22+
├── providers/
23+
│ ├── types.ts # SessionProvider interface, ConnectionConfig
24+
│ ├── local-browser.provider.ts # Chrome/Firefox/Edge/Safari capability building
25+
│ └── local-appium.provider.ts # iOS/Android via appium.config.ts
1926
├── tools/
20-
│ ├── browser.tool.ts # Session state + start_browser + getBrowser()
21-
│ ├── app-session.tool.ts # start_app_session (iOS/Android via Appium)
22-
│ ├── navigate.tool.ts # URL navigation
23-
│ ├── get-visible-elements.tool.ts # Element detection (web + mobile)
24-
│ ├── click.tool.ts # Click/tap actions
27+
│ ├── session.tool.ts # start_session (browser+mobile), close_session
28+
│ ├── tabs.tool.ts # switch_tab
29+
│ ├── launch-chrome.tool.ts # launch_chrome (remote debugging)
30+
│ ├── navigate.tool.ts # navigateAction() + navigateTool
31+
│ ├── click.tool.ts # clickAction() + clickTool
32+
│ ├── set-value.tool.ts # setValueAction() + setValueTool
33+
│ ├── scroll.tool.ts # scrollAction() + scrollTool
34+
│ ├── gestures.tool.ts # tapAction(), swipeAction(), dragAndDropAction()
35+
│ ├── context.tool.ts # switch_context (native/webview)
36+
│ ├── device.tool.ts # rotate_device, hide_keyboard
37+
│ ├── emulate-device.tool.ts # emulate_device (viewport/UA)
38+
│ ├── cookies.tool.ts # set_cookie, delete_cookies
39+
│ ├── execute-script.tool.ts # execute_script
40+
│ ├── get-elements.tool.ts # get_elements (all elements, incl. below fold)
2541
│ └── ... # Other tools follow same pattern
42+
├── resources/
43+
│ ├── index.ts # ResourceDefinition exports
44+
│ ├── sessions.resource.ts # wdio://sessions, wdio://session/*/steps, wdio://session/*/code
45+
│ ├── elements.resource.ts # wdio://session/current/elements
46+
│ ├── accessibility.resource.ts# wdio://session/current/accessibility
47+
│ ├── screenshot.resource.ts # wdio://session/current/screenshot
48+
│ ├── cookies.resource.ts # wdio://session/current/cookies
49+
│ ├── tabs.resource.ts # wdio://session/current/tabs
50+
│ ├── contexts.resource.ts # wdio://session/current/contexts
51+
│ ├── app-state.resource.ts # wdio://session/current/app-state
52+
│ └── geolocation.resource.ts # wdio://session/current/geolocation
2653
├── recording/
2754
│ ├── step-recorder.ts # withRecording HOF, appendStep, session history access
28-
│ ├── code-generator.ts # SessionHistory → WebdriverIO JS code
29-
│ └── resources.ts # MCP resource builders (sessions index, step log)
55+
│ └── code-generator.ts # SessionHistory → WebdriverIO JS code
3056
├── scripts/
31-
│ └── get-interactable-browser-elements.ts # Browser-context script
57+
│ ├── get-interactable-browser-elements.ts # Browser-context element detection
58+
│ ├── get-browser-accessibility-tree.ts # Browser-context accessibility tree
59+
│ ├── get-visible-mobile-elements.ts # Mobile visible element detection
60+
│ └── get-elements.ts # Filter + paginate elements (used by tool + resource)
3261
├── locators/
3362
│ ├── element-filter.ts # Platform-specific element classification
34-
│ ├── generate-all-locators.ts # Multi-strategy selector generation
35-
│ └── source-parsing.ts # XML page source parsing for mobile
63+
│ ├── locator-generation.ts # Multi-strategy selector generation
64+
│ ├── xml-parsing.ts # XML page source parsing for mobile
65+
│ ├── constants.ts # Shared locator constants
66+
│ ├── types.ts # Locator type definitions
67+
│ └── index.ts # Public exports
3668
├── config/
37-
│ └── appium.config.ts # iOS/Android capability builders
69+
│ └── appium.config.ts # iOS/Android capability builders (used by local-appium.provider)
70+
├── utils/
71+
│ ├── parse-variables.ts # URI template variable parsing (parseBool, parseNumber, etc.)
72+
│ └── zod-helpers.ts # coerceBoolean and other Zod utilities
3873
└── types/
3974
├── tool.ts # ToolDefinition interface
75+
├── resource.ts # ResourceDefinition interface
4076
└── recording.ts # RecordedStep, SessionHistory interfaces
4177
```
4278

4379
### Session State
4480

45-
Single active session model in `browser.tool.ts`:
81+
Single active session model in `src/session/state.ts`:
4682

4783
```typescript
48-
const browsers: Map<string, WebdriverIO.Browser> = new Map();
49-
let currentSession: string | null = null;
50-
const sessionMetadata: Map<string, SessionMetadata> = new Map();
51-
52-
export function getBrowser(): WebdriverIO.Browser {
53-
// Returns current active session or throws
54-
}
84+
// Private state — access via getState() or getBrowser()
85+
export function getBrowser(): WebdriverIO.Browser { ... }
86+
export function getState() { return state; }
87+
export interface SessionMetadata { type: 'browser' | 'ios' | 'android'; capabilities: Record<string, unknown>; isAttached: boolean; }
5588
```
5689

57-
State shared with `app-session.tool.ts` via `(getBrowser as any).__state`.
90+
Session lifecycle managed via `src/session/lifecycle.ts`:
91+
- `registerSession()` — registers browser + metadata + history, handles transition sentinel
92+
- `handleSessionTransition()` — appends `__session_transition__` step to outgoing session
93+
- `closeSession()` — terminates or detaches, marks endedAt, cleans up maps
5894

5995
### Tool Pattern
6096

@@ -81,17 +117,32 @@ export const myTool: ToolCallback = async ({ param }: { param: string }) => {
81117
}
82118
};
83119

84-
// 3. Register in server.ts
85-
server.tool(myToolDefinition.name, myToolDefinition.description, myToolDefinition.inputSchema, myTool);
120+
// 3. Register in server.ts via the registerTool helper
121+
registerTool(myToolDefinition, myTool);
86122
```
87123

88124
### Recording
89125

90-
All tools are wrapped with `withRecording()` in `server.ts`. Steps accumulate in `state.sessionHistory` (keyed by sessionId).
91-
MCP resources expose history without tool calls:
92-
- `wdio://sessions` — index of all sessions (fixed URI, discoverable via ListResources)
93-
- `wdio://session/current/steps` — current session step log + generated JS (fixed URI)
94-
- `wdio://session/{sessionId}/steps` — any session by ID (URI template, NOT listed by ListResources — see `docs/architecture/mcp-resources-notes.md`)
126+
Selected tools are wrapped with `withRecording()` in `server.ts`. Steps accumulate in `state.sessionHistory` (keyed by sessionId).
127+
128+
MCP resources expose live session data — all at fixed URIs discoverable via ListResources:
129+
130+
**Session history:**
131+
- `wdio://sessions` — index of all sessions
132+
- `wdio://session/current/steps` — current session step log
133+
- `wdio://session/current/code` — generated WebdriverIO JS for current session
134+
- `wdio://session/{sessionId}/steps` — step log for any session (URI template)
135+
- `wdio://session/{sessionId}/code` — generated JS for any session (URI template)
136+
137+
**Live page state (current session):**
138+
- `wdio://session/current/elements` — interactable elements (viewport-only; use `get_elements` tool with `inViewportOnly: false` for all)
139+
- `wdio://session/current/accessibility` — accessibility tree
140+
- `wdio://session/current/screenshot` — screenshot (base64)
141+
- `wdio://session/current/cookies` — browser cookies
142+
- `wdio://session/current/tabs` — open browser tabs
143+
- `wdio://session/current/contexts` — native/webview contexts (mobile)
144+
- `wdio://session/current/app-state` — mobile app state
145+
- `wdio://session/current/geolocation` — device geolocation
95146

96147
### Build
97148

@@ -103,46 +154,53 @@ MCP resources expose history without tool calls:
103154

104155
| File | Purpose |
105156
|----------------------------------------------------|-----------------------------------------------|
106-
| `src/server.ts` | MCP server init, tool registration |
107-
| `src/tools/browser.tool.ts` | Session state management, `getBrowser()` |
108-
| `src/tools/app-session.tool.ts` | Appium session creation |
157+
| `src/server.ts` | MCP server init, tool + resource registration |
158+
| `src/session/state.ts` | Session state maps, `getBrowser()`, `getState()` |
159+
| `src/session/lifecycle.ts` | `registerSession()`, `closeSession()`, session transitions |
160+
| `src/tools/session.tool.ts` | `start_session` (browser + mobile), `close_session` |
161+
| `src/tools/tabs.tool.ts` | `switch_tab` |
162+
| `src/tools/get-elements.tool.ts` | `get_elements` — all elements with filtering + pagination |
163+
| `src/resources/` | All MCP resource definitions (10 files) |
164+
| `src/providers/local-browser.provider.ts` | Chrome/Firefox/Edge/Safari capability building |
165+
| `src/providers/local-appium.provider.ts` | iOS/Android capabilities via appium.config.ts |
109166
| `src/scripts/get-interactable-browser-elements.ts` | Browser-context element detection |
110167
| `src/locators/` | Mobile element detection + locator generation |
111-
| `src/recording/step-recorder.ts` | `withRecording(toolName, cb)` HOF — wraps every tool for step logging |
168+
| `src/recording/step-recorder.ts` | `withRecording(toolName, cb)` HOF — wraps tools for step logging |
112169
| `src/recording/code-generator.ts` | Generates runnable WebdriverIO JS from `SessionHistory` |
113-
| `src/recording/resources.ts` | Builds text for `wdio://sessions` and `wdio://session/*/steps` resources |
170+
| `src/utils/zod-helpers.ts` | `coerceBoolean` for client interop |
114171
| `tsup.config.ts` | Build configuration |
115172

116173
## Gotchas
117174

118175
### Console Output
119176

120-
All console methods redirect to stderr. Chrome writes to stdout which corrupts MCP stdio protocol.
177+
All console methods redirect to stderr via `console.error`. Chrome writes to stdout which corrupts MCP stdio protocol.
121178

122179
```typescript
123180
// In server.ts - do not remove
124-
console.log = (...args) => process.stderr.write(util.format(...args) + '\n');
181+
console.log = (...args) => console.error('[LOG]', ...args);
182+
console.info = (...args) => console.error('[INFO]', ...args);
183+
console.warn = (...args) => console.error('[WARN]', ...args);
184+
console.debug = (...args) => console.error('[DEBUG]', ...args);
125185
```
126186

127187
### Browser Scripts Must Be Self-Contained
128188

129189
`get-interactable-browser-elements.ts` executes in browser context via `browser.execute()`. Cannot use Node.js APIs or
130190
external imports.
131191

132-
### Mobile State Sharing Hack
192+
### Auto-Detach Behavior
133193

134-
`app-session.tool.ts` accesses browser.tool.ts state via:
194+
Sessions created with `noReset: true` or without `appPath` automatically detach on close (don't terminate on Appium
195+
server).
135196

136-
```typescript
137-
const state = (getBrowser as any).__state;
138-
```
197+
### MCP Resource URI Templates
139198

140-
This maintains single-session behavior across browser and mobile.
199+
The MCP SDK only supports path-segment templates `{param}` in resource URIs — NOT RFC 6570 query param syntax `{?param}`. Resources using `{?param}` silently return "Resource not found". Keep resources at fixed URIs; expose parameterised access via tools instead.
141200

142-
### Auto-Detach Behavior
201+
### Scripts vs Tools vs Resources
143202

144-
Sessions created with `noReset: true` or without `appPath` automatically detach on close (don't terminate on Appium
145-
server).
203+
Computation logic belongs in `src/scripts/` (no try/catch, returns raw data). Tools wrap scripts with try/catch and return `{ isError: true, content: [...] }` on failure. Resources wrap scripts and set `mimeType` in the response.
146204

147205
### Error Handling
148206

@@ -158,11 +216,12 @@ catch (e) {
158216

159217
1. Create `src/tools/my-tool.tool.ts`
160218
2. Export `myToolDefinition` (Zod schema) and `myTool` (ToolCallback)
161-
3. Import and register in `src/server.ts`:
219+
3. Import and register in `src/server.ts` using the `registerTool` helper:
162220
```typescript
163221
import { myToolDefinition, myTool } from './tools/my-tool.tool';
164-
server.tool(myToolDefinition.name, myToolDefinition.description, myToolDefinition.inputSchema, myTool);
222+
registerTool(myToolDefinition, myTool);
165223
```
224+
To wrap with recording: `registerTool(myToolDefinition, withRecording('my_tool', myTool));`
166225

167226
## Selector Syntax Reference
168227

@@ -179,6 +238,5 @@ catch (e) {
179238

180239
See `docs/architecture/` for proposals:
181240

182-
- `session-configuration-proposal.md` — Cloud provider pattern (BrowserStack, SauceLabs)
183-
- `interaction-sequencing-proposal.md` — Batch actions with state delta detection
241+
- `session-configuration-proposal.md` — Cloud provider pattern (BrowserStack, SauceLabs) — providers/types.ts is the extension point
184242
- `multi-session-proposal.md` — Parallel sessions for sub-agent coordination

docs/architecture/interaction-sequencing-proposal.md

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ interface SequenceResult {
122122
### Why Stability Matters
123123

124124
After clicking a button, the page might:
125+
125126
- Navigate (URL change)
126127
- Show a loading spinner
127128
- Fetch data and render new elements
@@ -201,7 +202,7 @@ src/
201202

202203
1. Create `interaction.tool.ts` with basic `execute_sequence`
203204
2. Implement action dispatch (reuse existing tool logic)
204-
3. Capture before/after state using `getVisibleElements`
205+
3. Capture before/after state using `getElements`
205206
4. Compute simple delta (appeared/disappeared by selector)
206207

207208
### Phase 2: Stability Detection
@@ -241,6 +242,7 @@ execute_sequence({
241242
```
242243

243244
Response:
245+
244246
```json
245247
{
246248
"completed": 3,
@@ -271,6 +273,7 @@ execute_sequence({
271273
```
272274

273275
Response:
276+
274277
```json
275278
{
276279
"completed": 2,
@@ -300,6 +303,7 @@ execute_sequence({
300303
```
301304

302305
Response:
306+
303307
```json
304308
{
305309
"completed": 1,
@@ -331,6 +335,7 @@ execute_sequence({
331335
```
332336

333337
Response:
338+
334339
```json
335340
{
336341
"completed": 3,
@@ -361,6 +366,7 @@ Some actions (like `set_value`) rarely cause async changes. Could skip stability
361366
### 2. How to handle infinite loading states?
362367

363368
Options:
369+
364370
- Hard timeout (current approach) — returns partial delta
365371
- Detect specific loading patterns — report "page still loading"
366372
- Let AI decide — return `{ stable: false, reason: 'loading indicator visible' }`
@@ -369,7 +375,8 @@ Options:
369375

370376
### 3. Should delta include off-screen elements?
371377

372-
Current `getVisibleElements` filters to viewport by default. For delta:
378+
Current `getElements` filters to viewport by default. For delta:
379+
373380
- Viewport only = might miss elements that scrolled in/out
374381
- Full page = more accurate but larger payload
375382

@@ -378,6 +385,7 @@ Current `getVisibleElements` filters to viewport by default. For delta:
378385
### 4. Performance: Full diff vs. key signals
379386

380387
Two comparison strategies:
388+
381389
- **Full diff**: Compare all elements every poll (accurate, expensive)
382390
- **Key signals**: Compare signature only during polling, full diff only at end (fast, might miss rapid changes)
383391

@@ -386,6 +394,7 @@ Two comparison strategies:
386394
### 5. What about conditional actions?
387395

388396
Should we support:
397+
389398
```typescript
390399
{ action: 'click_element', selector: '#cookie-banner', optional: true }
391400
```
@@ -399,13 +408,15 @@ Should we support:
399408
### Existing Tools
400409

401410
`execute_sequence` complements existing tools:
411+
402412
- Simple single actions still use `click_element`, `set_value`, etc.
403413
- Complex workflows use `execute_sequence`
404414
- No breaking changes to existing tools
405415

406416
### Mobile Support
407417

408418
Works identically for mobile sessions:
419+
409420
```typescript
410421
execute_sequence({
411422
actions: [
@@ -419,6 +430,7 @@ execute_sequence({
419430
### Multi-Session (Future)
420431

421432
When multi-session support lands:
433+
422434
```typescript
423435
execute_sequence({
424436
sessionId: 'user-a',

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@
3737
"prebundle": "rimraf lib --glob ./*.tgz",
3838
"bundle": "tsup && shx chmod +x lib/server.js",
3939
"postbundle": "npm pack",
40-
"lint": "eslint src/ --fix && tsc --noEmit",
40+
"lint": "npm run lint:src && npm run lint:tests",
41+
"lint:src": "eslint src/ --fix && tsc --noEmit",
4142
"lint:tests": "eslint tests/ --fix && tsc -p tsconfig.test.json --noEmit",
4243
"start": "node lib/server.js",
4344
"dev": "tsx --watch src/server.ts",

0 commit comments

Comments
 (0)