Skip to content

Commit 096cc04

Browse files
authored
Merge pull request #35 from webdriverio/feature/attach-emulate-tools
Introduction of browser `attach` and `emulate` tools
2 parents 1bd25f9 + daf9295 commit 096cc04

9 files changed

Lines changed: 523 additions & 5 deletions

README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ appium
8383
- **Page Analysis**: Get visible elements, accessibility trees, take screenshots
8484
- **Cookie Management**: Get, set, and delete cookies
8585
- **Scrolling**: Smooth scrolling with configurable distances
86+
- **Attach to running Chrome**: Connect to an existing Chrome window via `--remote-debugging-port` — ideal for testing authenticated or pre-configured sessions
87+
- **Device emulation**: Apply mobile/tablet presets (iPhone 15, Pixel 7, etc.) to simulate responsive layouts without a physical device
8688

8789
### Mobile App Automation (iOS/Android)
8890

@@ -102,6 +104,8 @@ appium
102104
| `start_browser` | Start a browser session (Chrome, Firefox, Edge, Safari; headless/headed, custom dimensions) |
103105
| `start_app_session` | Start an iOS or Android app session via Appium (supports state preservation via noReset) |
104106
| `close_session` | Close or detach from the current browser or app session (supports detach mode) |
107+
| `attach_browser` | Attach to a running Chrome instance via `--remote-debugging-port` (CDP) |
108+
| `emulate_device` | Emulate a mobile/tablet device preset (viewport, DPR, UA, touch); requires BiDi session |
105109

106110
### Navigation & Page Interaction (Web & Mobile)
107111

@@ -231,6 +235,37 @@ start_browser({
231235
})
232236
```
233237

238+
**Attach to a running Chrome instance:**
239+
240+
```
241+
// First, launch Chrome with remote debugging enabled:
242+
//
243+
// macOS (must quit Chrome first — open -a ignores args if Chrome is already running):
244+
// pkill -x "Google Chrome" && sleep 1
245+
// /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome \
246+
// --remote-debugging-port=9222 \
247+
// --user-data-dir=/tmp/chrome-debug &
248+
//
249+
// Linux:
250+
// google-chrome --remote-debugging-port=9222 --user-data-dir=/tmp/chrome-debug &
251+
//
252+
// Verify it's ready: curl http://localhost:9222/json/version
253+
attach_browser()
254+
attach_browser({port: 9333})
255+
attach_browser({port: 9222, navigationUrl: 'https://app.example.com'})
256+
```
257+
258+
**Device emulation (requires BiDi session):**
259+
260+
```
261+
// Device emulation (requires BiDi session)
262+
start_browser({capabilities: {webSocketUrl: true}})
263+
emulate_device() // list available presets
264+
emulate_device({device: 'iPhone 15'}) // activate emulation
265+
emulate_device({device: 'Pixel 7'}) // switch device
266+
emulate_device({device: 'reset'}) // restore desktop defaults
267+
```
268+
234269
### Mobile App Automation
235270

236271
**Testing an iOS app on simulator:**

src/server.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ import {
5454
setGeolocationToolDefinition,
5555
} from './tools/device.tool';
5656
import { executeScriptTool, executeScriptToolDefinition } from './tools/execute-script.tool';
57+
import { attachBrowserTool, attachBrowserToolDefinition } from './tools/attach-browser.tool';
58+
import { emulateDeviceTool, emulateDeviceToolDefinition } from './tools/emulate-device.tool';
5759
import pkg from '../package.json' with { type: 'json' };
5860

5961
// IMPORTANT: Redirect all console output to stderr to avoid messing with MCP protocol (Chrome writes to console)
@@ -91,6 +93,8 @@ const registerTool = (definition: ToolDefinition, callback: ToolCallback) =>
9193
registerTool(startBrowserToolDefinition, startBrowserTool);
9294
registerTool(startAppToolDefinition, startAppTool);
9395
registerTool(closeSessionToolDefinition, closeSessionTool);
96+
registerTool(attachBrowserToolDefinition, attachBrowserTool);
97+
registerTool(emulateDeviceToolDefinition, emulateDeviceTool);
9498
registerTool(navigateToolDefinition, navigateTool);
9599

96100
// Element Discovery

src/tools/app-session.tool.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ export const startAppToolDefinition: ToolDefinition = {
2828
udid: z.string().optional().describe('Unique Device Identifier for iOS real device testing (e.g., "00008030-001234567890002E")'),
2929
noReset: z.boolean().optional().describe('Do not reset app state before session (preserves app data). Default: false'),
3030
fullReset: z.boolean().optional().describe('Uninstall app before/after session. Default: true. Set to false with noReset=true to preserve app state completely'),
31-
newCommandTimeout: z.number().min(0).optional().describe('How long (in seconds) Appium will wait for a new command before assuming the client has quit and ending the session. Default: 60. Set to 300 for 5 minutes, etc.'),
31+
newCommandTimeout: z.number().min(0).optional().default(300).describe('How long (in seconds) Appium will wait for a new command before assuming the client has quit and ending the session. Default: 300.'),
3232
capabilities: z.record(z.string(), z.unknown()).optional().describe('Additional Appium/WebDriver capabilities to merge with defaults (e.g. appium:udid, appium:chromedriverExecutable, appium:autoWebview)'),
3333
},
3434
};
@@ -82,7 +82,7 @@ export const startAppTool: ToolCallback = async (args: {
8282
udid,
8383
noReset,
8484
fullReset,
85-
newCommandTimeout,
85+
newCommandTimeout = 300,
8686
capabilities: userCapabilities = {},
8787
} = args;
8888

src/tools/attach-browser.tool.ts

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import { remote } from 'webdriverio';
2+
import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp';
3+
import type { CallToolResult } from '@modelcontextprotocol/sdk/types';
4+
import type { ToolDefinition } from '../types/tool';
5+
import { z } from 'zod';
6+
import { getBrowser } from './browser.tool';
7+
8+
export const attachBrowserToolDefinition: ToolDefinition = {
9+
name: 'attach_browser',
10+
description: `Attach to a Chrome instance already running with --remote-debugging-port.
11+
12+
Start Chrome first (quit any running Chrome instance before launching):
13+
14+
macOS — with real profile (preserves extensions, cookies, logins):
15+
pkill -x "Google Chrome" && sleep 1
16+
/Applications/Google Chrome.app/Contents/MacOS/Google Chrome --remote-debugging-port=9222 --user-data-dir="$HOME/Library/Application Support/Google/Chrome" --profile-directory=Default &
17+
18+
macOS — with fresh profile (lightweight, no extensions):
19+
pkill -x "Google Chrome" && sleep 1
20+
/Applications/Google Chrome.app/Contents/MacOS/Google Chrome --remote-debugging-port=9222 --user-data-dir=/tmp/chrome-debug &
21+
22+
Linux — with real profile:
23+
google-chrome --remote-debugging-port=9222 --user-data-dir="$HOME/.config/google-chrome" --profile-directory=Default &
24+
25+
Linux — with fresh profile:
26+
google-chrome --remote-debugging-port=9222 --user-data-dir=/tmp/chrome-debug &
27+
28+
Verify Chrome is ready: curl http://localhost:9222/json/version
29+
30+
Then call attach_browser() to hand control to the AI. All other tools (navigate, click, get_visible_elements, etc.) will work on the attached session. Use close_session() to detach without closing Chrome.`,
31+
inputSchema: {
32+
port: z.number().default(9222).describe('Chrome remote debugging port (default: 9222)'),
33+
host: z.string().default('localhost').describe('Host where Chrome is running (default: localhost)'),
34+
userDataDir: z.string().default('/tmp/chrome-debug').describe('Chrome user data directory — must match the --user-data-dir used when launching Chrome. Use your real profile path (e.g. "$HOME/Library/Application Support/Google/Chrome") to preserve extensions and logins, or /tmp/chrome-debug for a fresh profile (default: /tmp/chrome-debug)'),
35+
navigationUrl: z.string().optional().describe('URL to navigate to immediately after attaching'),
36+
},
37+
};
38+
39+
async function getActiveTabUrl(host: string, port: number): Promise<string | null> {
40+
try {
41+
const res = await fetch(`http://${host}:${port}/json`);
42+
const tabs = await res.json() as { type: string; url: string }[];
43+
const page = tabs.find((t) => t.type === 'page' && t.url && !t.url.startsWith('devtools://'));
44+
return page?.url ?? null;
45+
} catch {
46+
return null;
47+
}
48+
}
49+
50+
export const attachBrowserTool: ToolCallback = async ({
51+
port = 9222,
52+
host = 'localhost',
53+
userDataDir = '/tmp/chrome-debug',
54+
navigationUrl,
55+
}: {
56+
port?: number;
57+
host?: string;
58+
userDataDir?: string;
59+
navigationUrl?: string;
60+
}): Promise<CallToolResult> => {
61+
try {
62+
const state = (getBrowser as any).__state;
63+
64+
// Capture the active tab URL before WebDriver blanks it
65+
const activeUrl = navigationUrl ?? await getActiveTabUrl(host, port);
66+
67+
const browser = await remote({
68+
capabilities: {
69+
browserName: 'chrome',
70+
'goog:chromeOptions': {
71+
debuggerAddress: `${host}:${port}`,
72+
args: [`--user-data-dir=${userDataDir}`],
73+
},
74+
},
75+
});
76+
77+
const { sessionId } = browser;
78+
state.browsers.set(sessionId, browser);
79+
state.currentSession = sessionId;
80+
state.sessionMetadata.set(sessionId, {
81+
type: 'browser',
82+
capabilities: browser.capabilities,
83+
isAttached: true,
84+
});
85+
86+
if (activeUrl) {
87+
await browser.url(activeUrl);
88+
}
89+
90+
const title = await browser.getTitle();
91+
const url = await browser.getUrl();
92+
93+
return {
94+
content: [{
95+
type: 'text',
96+
text: `Attached to Chrome on ${host}:${port}\nSession ID: ${sessionId}\nCurrent page: "${title}" (${url})`,
97+
}],
98+
};
99+
} catch (e) {
100+
return {
101+
content: [{ type: 'text', text: `Error attaching to browser: ${e}` }],
102+
};
103+
}
104+
};

src/tools/browser.tool.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,9 @@ export const closeSessionTool: ToolCallback = async (args: { detach?: boolean }
196196
const sessionId = state.currentSession;
197197
const metadata = state.sessionMetadata.get(sessionId);
198198

199-
// Only delete session if not detaching
200-
if (!args.detach) {
199+
// Skip deleteSession for attached sessions (not created by us) or when user explicitly detaches
200+
const effectiveDetach = args.detach || !!metadata?.isAttached;
201+
if (!effectiveDetach) {
201202
await browser.deleteSession();
202203
}
203204

@@ -206,7 +207,7 @@ export const closeSessionTool: ToolCallback = async (args: { detach?: boolean }
206207
state.sessionMetadata.delete(sessionId);
207208
state.currentSession = null;
208209

209-
const action = args.detach ? 'detached from' : 'closed';
210+
const action = effectiveDetach ? 'detached from' : 'closed';
210211
const note = args.detach && !metadata?.isAttached
211212
? '\nNote: Session will remain active on Appium server.'
212213
: '';

src/tools/emulate-device.tool.ts

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp';
2+
import type { CallToolResult } from '@modelcontextprotocol/sdk/types';
3+
import type { ToolDefinition } from '../types/tool';
4+
// DeviceName is not in webdriverio's public exports but is required to satisfy browser.emulate('device', ...) overloads.
5+
// This is a type-only import — it is stripped at build time by tsup and has no runtime impact.
6+
import type { DeviceName } from 'webdriverio/build/deviceDescriptorsSource.js';
7+
import { z } from 'zod';
8+
import { getBrowser } from './browser.tool';
9+
10+
// Stores restore functions returned by browser.emulate(), keyed by sessionId
11+
const restoreFunctions = new Map<string, () => Promise<void>>();
12+
13+
export const emulateDeviceToolDefinition: ToolDefinition = {
14+
name: 'emulate_device',
15+
description: `Emulate a mobile or tablet device in the current browser session (sets viewport, DPR, user-agent, touch events).
16+
17+
Requires a BiDi-enabled session: start_browser({ capabilities: { webSocketUrl: true } })
18+
19+
Usage:
20+
emulate_device() — list available device presets
21+
emulate_device({ device: "iPhone 15" }) — activate emulation
22+
emulate_device({ device: "reset" }) — restore desktop defaults`,
23+
inputSchema: {
24+
device: z.string().optional().describe(
25+
'Device preset name (e.g. "iPhone 15", "Pixel 7"). Omit to list available presets. Pass "reset" to restore desktop defaults.'
26+
),
27+
},
28+
};
29+
30+
export const emulateDeviceTool: ToolCallback = async ({
31+
device,
32+
}: {
33+
device?: string;
34+
}): Promise<CallToolResult> => {
35+
try {
36+
const browser = getBrowser();
37+
const state = (getBrowser as any).__state;
38+
const sessionId = state.currentSession as string;
39+
const metadata = state.sessionMetadata.get(sessionId);
40+
41+
// Guard: mobile sessions
42+
if (metadata?.type === 'ios' || metadata?.type === 'android') {
43+
return {
44+
content: [{ type: 'text', text: 'Error: emulate_device is only supported for web browser sessions, not iOS/Android.' }],
45+
};
46+
}
47+
48+
// Guard: BiDi required
49+
if (!browser.isBidi) {
50+
return {
51+
content: [{
52+
type: 'text',
53+
text: 'Error: emulate_device requires a BiDi-enabled session.\nRestart the browser with: start_browser({ capabilities: { webSocketUrl: true } })',
54+
}],
55+
};
56+
}
57+
58+
// List presets by triggering a known-bad device name and parsing the error
59+
if (!device) {
60+
try {
61+
await browser.emulate('device', '\u0000' as DeviceName);
62+
} catch (e) {
63+
const msg = String(e);
64+
const match = msg.match(/please use one of the following: (.+)$/);
65+
if (match) {
66+
const names = match[1].split(', ').sort();
67+
return {
68+
content: [{ type: 'text', text: `Available devices (${names.length}):\n${names.join('\n')}` }],
69+
};
70+
}
71+
return { content: [{ type: 'text', text: `Error listing devices: ${e}` }] };
72+
}
73+
return { content: [{ type: 'text', text: 'Could not retrieve device list.' }] };
74+
}
75+
76+
// Reset
77+
if (device === 'reset') {
78+
const restoreFn = restoreFunctions.get(sessionId);
79+
if (!restoreFn) {
80+
return { content: [{ type: 'text', text: 'No active device emulation to reset.' }] };
81+
}
82+
await restoreFn();
83+
restoreFunctions.delete(sessionId);
84+
return { content: [{ type: 'text', text: 'Device emulation reset to desktop defaults.' }] };
85+
}
86+
87+
// Emulate device
88+
try {
89+
const restoreFn = await browser.emulate('device', device as DeviceName);
90+
restoreFunctions.set(sessionId, restoreFn as () => Promise<void>);
91+
return {
92+
content: [{ type: 'text', text: `Emulating "${device}".` }],
93+
};
94+
} catch (e) {
95+
const msg = String(e);
96+
if (msg.includes('Unknown device name')) {
97+
return {
98+
content: [{
99+
type: 'text',
100+
text: `Error: Unknown device "${device}". Call emulate_device() with no arguments to list valid names.`,
101+
}],
102+
};
103+
}
104+
return { content: [{ type: 'text', text: `Error: ${e}` }] };
105+
}
106+
} catch (e) {
107+
return {
108+
content: [{ type: 'text', text: `Error: ${e}` }],
109+
};
110+
}
111+
};

0 commit comments

Comments
 (0)