@@ -16,45 +16,81 @@ npm start # Run built server from lib/server.js
1616```
1717src/
1818├── server.ts # MCP server entry, registers all tools + MCP resources
19+ ├── session/
20+ │ ├── state.ts # Session state maps, getBrowser(), getState(), SessionMetadata
21+ │ └── lifecycle.ts # registerSession(), handleSessionTransition(), closeSession()
22+ ├── providers/
23+ │ ├── types.ts # SessionProvider interface, ConnectionConfig
24+ │ ├── local-browser.provider.ts # Chrome/Firefox/Edge/Safari capability building
25+ │ └── local-appium.provider.ts # iOS/Android via appium.config.ts
1926├── tools/
20- │ ├── browser.tool.ts # Session state + start_browser + getBrowser()
21- │ ├── app-session.tool.ts # start_app_session (iOS/Android via Appium)
22- │ ├── navigate.tool.ts # URL navigation
23- │ ├── get-visible-elements.tool.ts # Element detection (web + mobile)
24- │ ├── click.tool.ts # Click/tap actions
27+ │ ├── session.tool.ts # start_session (browser+mobile), close_session
28+ │ ├── tabs.tool.ts # switch_tab
29+ │ ├── launch-chrome.tool.ts # launch_chrome (remote debugging)
30+ │ ├── navigate.tool.ts # navigateAction() + navigateTool
31+ │ ├── click.tool.ts # clickAction() + clickTool
32+ │ ├── set-value.tool.ts # setValueAction() + setValueTool
33+ │ ├── scroll.tool.ts # scrollAction() + scrollTool
34+ │ ├── gestures.tool.ts # tapAction(), swipeAction(), dragAndDropAction()
35+ │ ├── context.tool.ts # switch_context (native/webview)
36+ │ ├── device.tool.ts # rotate_device, hide_keyboard
37+ │ ├── emulate-device.tool.ts # emulate_device (viewport/UA)
38+ │ ├── cookies.tool.ts # set_cookie, delete_cookies
39+ │ ├── execute-script.tool.ts # execute_script
40+ │ ├── get-elements.tool.ts # get_elements (all elements, incl. below fold)
2541│ └── ... # Other tools follow same pattern
42+ ├── resources/
43+ │ ├── index.ts # ResourceDefinition exports
44+ │ ├── sessions.resource.ts # wdio://sessions, wdio://session/*/steps, wdio://session/*/code
45+ │ ├── elements.resource.ts # wdio://session/current/elements
46+ │ ├── accessibility.resource.ts# wdio://session/current/accessibility
47+ │ ├── screenshot.resource.ts # wdio://session/current/screenshot
48+ │ ├── cookies.resource.ts # wdio://session/current/cookies
49+ │ ├── tabs.resource.ts # wdio://session/current/tabs
50+ │ ├── contexts.resource.ts # wdio://session/current/contexts
51+ │ ├── app-state.resource.ts # wdio://session/current/app-state
52+ │ └── geolocation.resource.ts # wdio://session/current/geolocation
2653├── recording/
2754│ ├── step-recorder.ts # withRecording HOF, appendStep, session history access
28- │ ├── code-generator.ts # SessionHistory → WebdriverIO JS code
29- │ └── resources.ts # MCP resource builders (sessions index, step log)
55+ │ └── code-generator.ts # SessionHistory → WebdriverIO JS code
3056├── scripts/
31- │ └── get-interactable-browser-elements.ts # Browser-context script
57+ │ ├── get-interactable-browser-elements.ts # Browser-context element detection
58+ │ ├── get-browser-accessibility-tree.ts # Browser-context accessibility tree
59+ │ ├── get-visible-mobile-elements.ts # Mobile visible element detection
60+ │ └── get-elements.ts # Filter + paginate elements (used by tool + resource)
3261├── locators/
3362│ ├── element-filter.ts # Platform-specific element classification
34- │ ├── generate-all-locators.ts # Multi-strategy selector generation
35- │ └── source-parsing.ts # XML page source parsing for mobile
63+ │ ├── locator-generation.ts # Multi-strategy selector generation
64+ │ ├── xml-parsing.ts # XML page source parsing for mobile
65+ │ ├── constants.ts # Shared locator constants
66+ │ ├── types.ts # Locator type definitions
67+ │ └── index.ts # Public exports
3668├── config/
37- │ └── appium.config.ts # iOS/Android capability builders
69+ │ └── appium.config.ts # iOS/Android capability builders (used by local-appium.provider)
70+ ├── utils/
71+ │ ├── parse-variables.ts # URI template variable parsing (parseBool, parseNumber, etc.)
72+ │ └── zod-helpers.ts # coerceBoolean and other Zod utilities
3873└── types/
3974 ├── tool.ts # ToolDefinition interface
75+ ├── resource.ts # ResourceDefinition interface
4076 └── recording.ts # RecordedStep, SessionHistory interfaces
4177```
4278
4379### Session State
4480
45- Single active session model in ` browser.tool .ts` :
81+ Single active session model in ` src/session/state .ts` :
4682
4783``` typescript
48- const browsers: Map <string , WebdriverIO .Browser > = new Map ();
49- let currentSession: string | null = null ;
50- const sessionMetadata: Map <string , SessionMetadata > = new Map ();
51-
52- export function getBrowser(): WebdriverIO .Browser {
53- // Returns current active session or throws
54- }
84+ // Private state — access via getState() or getBrowser()
85+ export function getBrowser(): WebdriverIO .Browser { ... }
86+ export function getState() { return state ; }
87+ export interface SessionMetadata { type: ' browser' | ' ios' | ' android' ; capabilities: Record <string , unknown >; isAttached: boolean ; }
5588```
5689
57- State shared with ` app-session.tool.ts ` via ` (getBrowser as any).__state ` .
90+ Session lifecycle managed via ` src/session/lifecycle.ts ` :
91+ - ` registerSession() ` — registers browser + metadata + history, handles transition sentinel
92+ - ` handleSessionTransition() ` — appends ` __session_transition__ ` step to outgoing session
93+ - ` closeSession() ` — terminates or detaches, marks endedAt, cleans up maps
5894
5995### Tool Pattern
6096
@@ -81,17 +117,32 @@ export const myTool: ToolCallback = async ({ param }: { param: string }) => {
81117 }
82118};
83119
84- // 3. Register in server.ts
85- server . tool (myToolDefinition . name , myToolDefinition . description , myToolDefinition . inputSchema , myTool );
120+ // 3. Register in server.ts via the registerTool helper
121+ registerTool (myToolDefinition , myTool );
86122```
87123
88124### Recording
89125
90- All tools are wrapped with ` withRecording() ` in ` server.ts ` . Steps accumulate in ` state.sessionHistory ` (keyed by sessionId).
91- MCP resources expose history without tool calls:
92- - ` wdio://sessions ` — index of all sessions (fixed URI, discoverable via ListResources)
93- - ` wdio://session/current/steps ` — current session step log + generated JS (fixed URI)
94- - ` wdio://session/{sessionId}/steps ` — any session by ID (URI template, NOT listed by ListResources — see ` docs/architecture/mcp-resources-notes.md ` )
126+ Selected tools are wrapped with ` withRecording() ` in ` server.ts ` . Steps accumulate in ` state.sessionHistory ` (keyed by sessionId).
127+
128+ MCP resources expose live session data — all at fixed URIs discoverable via ListResources:
129+
130+ ** Session history:**
131+ - ` wdio://sessions ` — index of all sessions
132+ - ` wdio://session/current/steps ` — current session step log
133+ - ` wdio://session/current/code ` — generated WebdriverIO JS for current session
134+ - ` wdio://session/{sessionId}/steps ` — step log for any session (URI template)
135+ - ` wdio://session/{sessionId}/code ` — generated JS for any session (URI template)
136+
137+ ** Live page state (current session):**
138+ - ` wdio://session/current/elements ` — interactable elements (viewport-only; use ` get_elements ` tool with ` inViewportOnly: false ` for all)
139+ - ` wdio://session/current/accessibility ` — accessibility tree
140+ - ` wdio://session/current/screenshot ` — screenshot (base64)
141+ - ` wdio://session/current/cookies ` — browser cookies
142+ - ` wdio://session/current/tabs ` — open browser tabs
143+ - ` wdio://session/current/contexts ` — native/webview contexts (mobile)
144+ - ` wdio://session/current/app-state ` — mobile app state
145+ - ` wdio://session/current/geolocation ` — device geolocation
95146
96147### Build
97148
@@ -103,46 +154,53 @@ MCP resources expose history without tool calls:
103154
104155| File | Purpose |
105156| ----------------------------------------------------| -----------------------------------------------|
106- | ` src/server.ts ` | MCP server init, tool registration |
107- | ` src/tools/browser.tool.ts ` | Session state management, ` getBrowser() ` |
108- | ` src/tools/app-session.tool.ts ` | Appium session creation |
157+ | ` src/server.ts ` | MCP server init, tool + resource registration |
158+ | ` src/session/state.ts ` | Session state maps, ` getBrowser() ` , ` getState() ` |
159+ | ` src/session/lifecycle.ts ` | ` registerSession() ` , ` closeSession() ` , session transitions |
160+ | ` src/tools/session.tool.ts ` | ` start_session ` (browser + mobile), ` close_session ` |
161+ | ` src/tools/tabs.tool.ts ` | ` switch_tab ` |
162+ | ` src/tools/get-elements.tool.ts ` | ` get_elements ` — all elements with filtering + pagination |
163+ | ` src/resources/ ` | All MCP resource definitions (10 files) |
164+ | ` src/providers/local-browser.provider.ts ` | Chrome/Firefox/Edge/Safari capability building |
165+ | ` src/providers/local-appium.provider.ts ` | iOS/Android capabilities via appium.config.ts |
109166| ` src/scripts/get-interactable-browser-elements.ts ` | Browser-context element detection |
110167| ` src/locators/ ` | Mobile element detection + locator generation |
111- | ` src/recording/step-recorder.ts ` | ` withRecording(toolName, cb) ` HOF — wraps every tool for step logging |
168+ | ` src/recording/step-recorder.ts ` | ` withRecording(toolName, cb) ` HOF — wraps tools for step logging |
112169| ` src/recording/code-generator.ts ` | Generates runnable WebdriverIO JS from ` SessionHistory ` |
113- | ` src/recording/resources .ts ` | Builds text for ` wdio://sessions ` and ` wdio://session/*/steps ` resources |
170+ | ` src/utils/zod-helpers .ts ` | ` coerceBoolean ` for client interop |
114171| ` tsup.config.ts ` | Build configuration |
115172
116173## Gotchas
117174
118175### Console Output
119176
120- All console methods redirect to stderr. Chrome writes to stdout which corrupts MCP stdio protocol.
177+ All console methods redirect to stderr via ` console.error ` . Chrome writes to stdout which corrupts MCP stdio protocol.
121178
122179``` typescript
123180// In server.ts - do not remove
124- console .log = (... args ) => process .stderr .write (util .format (... args ) + ' \n ' );
181+ console .log = (... args ) => console .error (' [LOG]' , ... args );
182+ console .info = (... args ) => console .error (' [INFO]' , ... args );
183+ console .warn = (... args ) => console .error (' [WARN]' , ... args );
184+ console .debug = (... args ) => console .error (' [DEBUG]' , ... args );
125185```
126186
127187### Browser Scripts Must Be Self-Contained
128188
129189` get-interactable-browser-elements.ts ` executes in browser context via ` browser.execute() ` . Cannot use Node.js APIs or
130190external imports.
131191
132- ### Mobile State Sharing Hack
192+ ### Auto-Detach Behavior
133193
134- ` app-session.tool.ts ` accesses browser.tool.ts state via:
194+ Sessions created with ` noReset: true ` or without ` appPath ` automatically detach on close (don't terminate on Appium
195+ server).
135196
136- ``` typescript
137- const state = (getBrowser as any ).__state ;
138- ```
197+ ### MCP Resource URI Templates
139198
140- This maintains single-session behavior across browser and mobile .
199+ The MCP SDK only supports path-segment templates ` {param} ` in resource URIs — NOT RFC 6570 query param syntax ` {?param} ` . Resources using ` {?param} ` silently return "Resource not found". Keep resources at fixed URIs; expose parameterised access via tools instead .
141200
142- ### Auto-Detach Behavior
201+ ### Scripts vs Tools vs Resources
143202
144- Sessions created with ` noReset: true ` or without ` appPath ` automatically detach on close (don't terminate on Appium
145- server).
203+ Computation logic belongs in ` src/scripts/ ` (no try/catch, returns raw data). Tools wrap scripts with try/catch and return ` { isError: true, content: [...] } ` on failure. Resources wrap scripts and set ` mimeType ` in the response.
146204
147205### Error Handling
148206
@@ -158,11 +216,12 @@ catch (e) {
158216
1592171 . Create ` src/tools/my-tool.tool.ts `
1602182 . Export ` myToolDefinition ` (Zod schema) and ` myTool ` (ToolCallback)
161- 3 . Import and register in ` src/server.ts ` :
219+ 3 . Import and register in ` src/server.ts ` using the ` registerTool ` helper :
162220 ``` typescript
163221 import { myToolDefinition , myTool } from ' ./tools/my-tool.tool' ;
164- server . tool (myToolDefinition . name , myToolDefinition . description , myToolDefinition . inputSchema , myTool );
222+ registerTool (myToolDefinition , myTool );
165223 ```
224+ To wrap with recording: ` registerTool(myToolDefinition, withRecording('my_tool', myTool)); `
166225
167226## Selector Syntax Reference
168227
@@ -179,6 +238,5 @@ catch (e) {
179238
180239See ` docs/architecture/ ` for proposals:
181240
182- - ` session-configuration-proposal.md ` — Cloud provider pattern (BrowserStack, SauceLabs)
183- - ` interaction-sequencing-proposal.md ` — Batch actions with state delta detection
241+ - ` session-configuration-proposal.md ` — Cloud provider pattern (BrowserStack, SauceLabs) — providers/types.ts is the extension point
184242- ` multi-session-proposal.md ` — Parallel sessions for sub-agent coordination
0 commit comments