Skip to content

Commit 4a301fe

Browse files
committed
refactor: Refine session lifecycle handling and simplify logic
- Review code changes and consolidate docs
1 parent ff37196 commit 4a301fe

6 files changed

Lines changed: 186 additions & 135 deletions

File tree

CLAUDE.md

Lines changed: 66 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,33 +24,56 @@ src/
2424
│ ├── local-browser.provider.ts # Chrome/Firefox/Edge/Safari capability building
2525
│ └── local-appium.provider.ts # iOS/Android via appium.config.ts
2626
├── tools/
27-
│ ├── browser.tool.ts # start_browser, close_session, readTabs(), switch_tab
28-
│ ├── app-session.tool.ts # start_app_session (iOS/Android via Appium)
27+
│ ├── session.tool.ts # start_session (browser+mobile), close_session
28+
│ ├── tabs.tool.ts # switch_tab
29+
│ ├── launch-chrome.tool.ts # launch_chrome (remote debugging)
2930
│ ├── navigate.tool.ts # navigateAction() + navigateTool
3031
│ ├── click.tool.ts # clickAction() + clickTool
3132
│ ├── set-value.tool.ts # setValueAction() + setValueTool
3233
│ ├── scroll.tool.ts # scrollAction() + scrollTool
3334
│ ├── gestures.tool.ts # tapAction(), swipeAction(), dragAndDropAction()
35+
│ ├── context.tool.ts # switch_context (native/webview)
36+
│ ├── device.tool.ts # rotate_device, hide_keyboard
37+
│ ├── emulate-device.tool.ts # emulate_device (viewport/UA)
38+
│ ├── cookies.tool.ts # set_cookie, delete_cookies
39+
│ ├── execute-script.tool.ts # execute_script
3440
│ ├── execute-sequence.tool.ts # Batch action sequencing with stability + state delta
3541
│ └── ... # Other tools follow same pattern
42+
├── resources/
43+
│ ├── index.ts # ResourceDefinition exports
44+
│ ├── sessions.resource.ts # wdio://sessions, wdio://session/*/steps, wdio://session/*/code
45+
│ ├── elements.resource.ts # wdio://session/current/elements
46+
│ ├── accessibility.resource.ts# wdio://session/current/accessibility
47+
│ ├── screenshot.resource.ts # wdio://session/current/screenshot
48+
│ ├── cookies.resource.ts # wdio://session/current/cookies
49+
│ ├── tabs.resource.ts # wdio://session/current/tabs
50+
│ ├── contexts.resource.ts # wdio://session/current/contexts
51+
│ ├── app-state.resource.ts # wdio://session/current/app-state
52+
│ └── geolocation.resource.ts # wdio://session/current/geolocation
3653
├── recording/
3754
│ ├── step-recorder.ts # withRecording HOF, appendStep, session history access
38-
│ ├── code-generator.ts # SessionHistory → WebdriverIO JS code
39-
│ └── resources.ts # MCP resource builders (sessions index, step log)
55+
│ └── code-generator.ts # SessionHistory → WebdriverIO JS code
4056
├── scripts/
41-
│ └── get-interactable-browser-elements.ts # Browser-context script
57+
│ ├── get-interactable-browser-elements.ts # Browser-context element detection
58+
│ ├── get-browser-accessibility-tree.ts # Browser-context accessibility tree
59+
│ └── get-visible-mobile-elements.ts # Mobile visible element detection
4260
├── locators/
4361
│ ├── element-filter.ts # Platform-specific element classification
44-
│ ├── generate-all-locators.ts # Multi-strategy selector generation
45-
│ └── source-parsing.ts # XML page source parsing for mobile
62+
│ ├── locator-generation.ts # Multi-strategy selector generation
63+
│ ├── xml-parsing.ts # XML page source parsing for mobile
64+
│ ├── constants.ts # Shared locator constants
65+
│ ├── types.ts # Locator type definitions
66+
│ └── index.ts # Public exports
4667
├── config/
4768
│ └── appium.config.ts # iOS/Android capability builders (used by local-appium.provider)
4869
├── utils/
4970
│ ├── parse-variables.ts # URI template variable parsing (parseBool, parseNumber, etc.)
5071
│ ├── stability-detector.ts # Page stability polling (signature-based, 200ms/500ms/5s)
51-
│ └── state-diff.ts # Element before/after diff (appeared, disappeared, changed)
72+
│ ├── state-diff.ts # Element before/after diff (appeared, disappeared, changed)
73+
│ └── zod-helpers.ts # coerceBoolean and other Zod utilities
5274
└── types/
5375
├── tool.ts # ToolDefinition interface
76+
├── resource.ts # ResourceDefinition interface
5477
└── recording.ts # RecordedStep, SessionHistory interfaces
5578
```
5679

@@ -95,17 +118,32 @@ export const myTool: ToolCallback = async ({ param }: { param: string }) => {
95118
}
96119
};
97120

98-
// 3. Register in server.ts
99-
server.tool(myToolDefinition.name, myToolDefinition.description, myToolDefinition.inputSchema, myTool);
121+
// 3. Register in server.ts via the registerTool helper
122+
registerTool(myToolDefinition, myTool);
100123
```
101124

102125
### Recording
103126

104-
All tools are wrapped with `withRecording()` in `server.ts`. Steps accumulate in `state.sessionHistory` (keyed by sessionId).
105-
MCP resources expose history without tool calls:
106-
- `wdio://sessions` — index of all sessions (fixed URI, discoverable via ListResources)
107-
- `wdio://session/current/steps` — current session step log + generated JS (fixed URI)
108-
- `wdio://session/{sessionId}/steps` — any session by ID (URI template, NOT listed by ListResources — see `docs/architecture/mcp-resources-notes.md`)
127+
Selected tools are wrapped with `withRecording()` in `server.ts`. Steps accumulate in `state.sessionHistory` (keyed by sessionId).
128+
129+
MCP resources expose live session data — all at fixed URIs discoverable via ListResources:
130+
131+
**Session history:**
132+
- `wdio://sessions` — index of all sessions
133+
- `wdio://session/current/steps` — current session step log
134+
- `wdio://session/current/code` — generated WebdriverIO JS for current session
135+
- `wdio://session/{sessionId}/steps` — step log for any session (URI template)
136+
- `wdio://session/{sessionId}/code` — generated JS for any session (URI template)
137+
138+
**Live page state (current session):**
139+
- `wdio://session/current/elements` — interactable elements
140+
- `wdio://session/current/accessibility` — accessibility tree
141+
- `wdio://session/current/screenshot` — screenshot (base64)
142+
- `wdio://session/current/cookies` — browser cookies
143+
- `wdio://session/current/tabs` — open browser tabs
144+
- `wdio://session/current/contexts` — native/webview contexts (mobile)
145+
- `wdio://session/current/app-state` — mobile app state
146+
- `wdio://session/current/geolocation` — device geolocation
109147

110148
### Build
111149

@@ -120,29 +158,33 @@ MCP resources expose history without tool calls:
120158
| `src/server.ts` | MCP server init, tool + resource registration |
121159
| `src/session/state.ts` | Session state maps, `getBrowser()`, `getState()` |
122160
| `src/session/lifecycle.ts` | `registerSession()`, `closeSession()`, session transitions |
123-
| `src/tools/browser.tool.ts` | `start_browser`, `close_session`, `switch_tab`, `readTabs()` |
124-
| `src/tools/app-session.tool.ts` | Appium session creation |
161+
| `src/tools/session.tool.ts` | `start_session` (browser + mobile), `close_session` |
162+
| `src/tools/tabs.tool.ts` | `switch_tab` |
125163
| `src/tools/execute-sequence.tool.ts` | Batch action sequencing with stability + delta |
164+
| `src/resources/` | All MCP resource definitions (10 files) |
126165
| `src/providers/local-browser.provider.ts` | Chrome/Firefox/Edge/Safari capability building |
127166
| `src/providers/local-appium.provider.ts` | iOS/Android capabilities via appium.config.ts |
128167
| `src/scripts/get-interactable-browser-elements.ts` | Browser-context element detection |
129168
| `src/locators/` | Mobile element detection + locator generation |
130-
| `src/recording/step-recorder.ts` | `withRecording(toolName, cb)` HOF — wraps every tool for step logging |
169+
| `src/recording/step-recorder.ts` | `withRecording(toolName, cb)` HOF — wraps tools for step logging |
131170
| `src/recording/code-generator.ts` | Generates runnable WebdriverIO JS from `SessionHistory` |
132-
| `src/recording/resources.ts` | Builds text for `wdio://sessions` and `wdio://session/*/steps` resources |
133171
| `src/utils/stability-detector.ts` | Page stability detection (signature polling) |
134172
| `src/utils/state-diff.ts` | Element state diff (appeared/disappeared/changed) |
173+
| `src/utils/zod-helpers.ts` | `coerceBoolean` for client interop |
135174
| `tsup.config.ts` | Build configuration |
136175

137176
## Gotchas
138177

139178
### Console Output
140179

141-
All console methods redirect to stderr. Chrome writes to stdout which corrupts MCP stdio protocol.
180+
All console methods redirect to stderr via `console.error`. Chrome writes to stdout which corrupts MCP stdio protocol.
142181

143182
```typescript
144183
// In server.ts - do not remove
145-
console.log = (...args) => process.stderr.write(util.format(...args) + '\n');
184+
console.log = (...args) => console.error('[LOG]', ...args);
185+
console.info = (...args) => console.error('[INFO]', ...args);
186+
console.warn = (...args) => console.error('[WARN]', ...args);
187+
console.debug = (...args) => console.error('[DEBUG]', ...args);
146188
```
147189

148190
### Browser Scripts Must Be Self-Contained
@@ -169,11 +211,12 @@ catch (e) {
169211

170212
1. Create `src/tools/my-tool.tool.ts`
171213
2. Export `myToolDefinition` (Zod schema) and `myTool` (ToolCallback)
172-
3. Import and register in `src/server.ts`:
214+
3. Import and register in `src/server.ts` using the `registerTool` helper:
173215
```typescript
174216
import { myToolDefinition, myTool } from './tools/my-tool.tool';
175-
server.tool(myToolDefinition.name, myToolDefinition.description, myToolDefinition.inputSchema, myTool);
217+
registerTool(myToolDefinition, myTool);
176218
```
219+
To wrap with recording: `registerTool(myToolDefinition, withRecording('my_tool', myTool));`
177220

178221
## Selector Syntax Reference
179222

src/tools/context.tool.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ export const switchContextToolDefinition: ToolDefinition = {
1111
context: z
1212
.string()
1313
.describe(
14-
'Context name to switch to (e.g., "NATIVE_APP", "WEBVIEW_com.example.app", or use index from get_contexts)',
14+
'Context name to switch to (e.g., "NATIVE_APP", "WEBVIEW_com.example.app", or use index from wdio://session/current/contexts resource)',
1515
),
1616
},
1717
};

src/tools/execute-sequence.tool.ts

Lines changed: 81 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -119,88 +119,92 @@ export const executeSequenceTool: ToolCallback = async ({
119119
actions: z.infer<typeof actionSchema>[];
120120
waitForStability?: boolean;
121121
}) => {
122-
const browser = getBrowser();
123-
const isBrowser = !browser.isAndroid && !browser.isIOS;
124-
125-
// Capture initial URL/title for diff
126-
const { url: beforeUrl, title: beforeTitle } = isBrowser
127-
? await browser.execute(() => ({ url: window.location.href, title: document.title })) as {
128-
url: string;
129-
title: string
122+
try {
123+
const browser = getBrowser();
124+
const isBrowser = !browser.isAndroid && !browser.isIOS;
125+
126+
// Capture initial URL/title for diff
127+
const { url: beforeUrl, title: beforeTitle } = isBrowser
128+
? await browser.execute(() => ({ url: window.location.href, title: document.title })) as {
129+
url: string;
130+
title: string
131+
}
132+
: { url: '', title: '' };
133+
134+
// Capture initial elements for diff (browser only)
135+
const initialBrowserElements = isBrowser ? await getInteractableBrowserElements(browser, {}) : [];
136+
const initialElements = initialBrowserElements.map((el) => ({ selector: el.selector, text: el.name }));
137+
138+
const results: { action: string; durationMs: number }[] = [];
139+
140+
for (let i = 0; i < actions.length; i++) {
141+
const action = actions[i];
142+
const start = Date.now();
143+
const result = await dispatchAction(action);
144+
const durationMs = Date.now() - start;
145+
const isError = (result as any).isError === true;
146+
147+
// Record each sub-action as a step
148+
appendStep(
149+
action.action,
150+
action as Record<string, unknown>,
151+
isError ? 'error' : 'ok',
152+
durationMs,
153+
isError ? (result.content.find((c: any) => c.type === 'text') as any)?.text : undefined,
154+
);
155+
156+
if (isError) {
157+
return {
158+
content: [{
159+
type: 'text' as const,
160+
text: JSON.stringify({
161+
completed: i,
162+
total: actions.length,
163+
failed: {
164+
index: i,
165+
action: action.action,
166+
error: (result.content.find((c: any) => c.type === 'text') as any)?.text,
167+
},
168+
results,
169+
}),
170+
}],
171+
};
172+
}
173+
174+
results.push({ action: action.action, durationMs });
175+
176+
// Wait for stability after each action (except the last, we do it before diff)
177+
if (shouldWait && i < actions.length - 1 && isBrowser) {
178+
await waitForStability(browser);
179+
}
130180
}
131-
: { url: '', title: '' };
132-
133-
// Capture initial elements for diff (browser only)
134-
const initialBrowserElements = isBrowser ? await getInteractableBrowserElements(browser, {}) : [];
135-
const initialElements = initialBrowserElements.map((el) => ({ selector: el.selector, text: el.name }));
136-
137-
const results: { action: string; durationMs: number }[] = [];
138-
139-
for (let i = 0; i < actions.length; i++) {
140-
const action = actions[i];
141-
const start = Date.now();
142-
const result = await dispatchAction(action);
143-
const durationMs = Date.now() - start;
144-
const isError = (result as any).isError === true;
145-
146-
// Record each sub-action as a step
147-
appendStep(
148-
action.action,
149-
action as Record<string, unknown>,
150-
isError ? 'error' : 'ok',
151-
durationMs,
152-
isError ? (result.content.find((c: any) => c.type === 'text') as any)?.text : undefined,
153-
);
154-
155-
if (isError) {
156-
return {
157-
content: [{
158-
type: 'text' as const,
159-
text: JSON.stringify({
160-
completed: i,
161-
total: actions.length,
162-
failed: {
163-
index: i,
164-
action: action.action,
165-
error: (result.content.find((c: any) => c.type === 'text') as any)?.text,
166-
},
167-
results,
168-
}),
169-
}],
170-
};
171-
}
172-
173-
results.push({ action: action.action, durationMs });
174181

175-
// Wait for stability after each action (except the last, we do it before diff)
176-
if (shouldWait && i < actions.length - 1 && isBrowser) {
182+
// Final stability wait before capturing end state
183+
if (shouldWait && isBrowser) {
177184
await waitForStability(browser);
178185
}
179-
}
180186

181-
// Final stability wait before capturing end state
182-
if (shouldWait && isBrowser) {
183-
await waitForStability(browser);
184-
}
187+
// Capture final elements for state delta (browser only)
188+
const finalBrowserElements = isBrowser ? await getInteractableBrowserElements(browser, {}) : [];
189+
const finalElements = finalBrowserElements.map((el) => ({ selector: el.selector, text: el.name }));
190+
191+
const delta = isBrowser
192+
? await captureStateDelta(browser, initialElements, finalElements, beforeUrl, beforeTitle)
193+
: null;
194+
195+
const response: Record<string, unknown> = {
196+
completed: actions.length,
197+
total: actions.length,
198+
results,
199+
};
200+
if (delta) {
201+
response.delta = delta;
202+
}
185203

186-
// Capture final elements for state delta (browser only)
187-
const finalBrowserElements = isBrowser ? await getInteractableBrowserElements(browser, {}) : [];
188-
const finalElements = finalBrowserElements.map((el) => ({ selector: el.selector, text: el.name }));
189-
190-
const delta = isBrowser
191-
? await captureStateDelta(browser, initialElements, finalElements, beforeUrl, beforeTitle)
192-
: null;
193-
194-
const response: Record<string, unknown> = {
195-
completed: actions.length,
196-
total: actions.length,
197-
results,
198-
};
199-
if (delta) {
200-
response.delta = delta;
204+
return {
205+
content: [{ type: 'text' as const, text: JSON.stringify(response) }],
206+
};
207+
} catch (e) {
208+
return { isError: true, content: [{ type: 'text', text: `Error executing sequence: ${e}` }] };
201209
}
202-
203-
return {
204-
content: [{ type: 'text' as const, text: JSON.stringify(response) }],
205-
};
206210
};

0 commit comments

Comments
 (0)