Skip to content

Commit e046c23

Browse files
authored
chore: in-page tool output: stash DOM elements and preprocess for serialization (#1877)
Improvements for handling in-page tool responses. In order to successfully pass an in-page tool response from the page context to the MCP server, the response needs to be serializable. The code walks the response object an performs the following changes: - DOM elements are stashed onto the window object and replaced with an ID. On the MCP server side this ID is used to map back to the corresponding UID in the page snapshot generated from the accessibility tree. - Circular references are replaced with a string. - Class instances (which can be complex or non-serializable) are replaced with a string. - Functions are replaced with a string. If the in-page tool response contains DOM elements which are not part of the page snapshot, a new snapshot is created add the missing elements are added explicitly.
1 parent 85b8993 commit e046c23

18 files changed

Lines changed: 1182 additions & 245 deletions

src/McpContext.ts

Lines changed: 1 addition & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ import type {
2626
HTTPRequest,
2727
Page,
2828
ScreenRecorder,
29-
SerializedAXNode,
3029
Viewport,
3130
Target,
3231
Extension,
@@ -36,17 +35,11 @@ import {Locator} from './third_party/index.js';
3635
import {PredefinedNetworkConditions} from './third_party/index.js';
3736
import {listPages} from './tools/pages.js';
3837
import {CLOSE_PAGE_ERROR} from './tools/ToolDefinition.js';
39-
import type {
40-
Context,
41-
DevToolsData,
42-
SupportedExtensions,
43-
} from './tools/ToolDefinition.js';
38+
import type {Context, SupportedExtensions} from './tools/ToolDefinition.js';
4439
import type {TraceResult} from './trace-processing/parse.js';
4540
import type {
4641
EmulationSettings,
4742
GeolocationOptions,
48-
TextSnapshot,
49-
TextSnapshotNode,
5043
ExtensionServiceWorker,
5144
} from './types.js';
5245
import {ensureExtension, saveTemporaryFile} from './utils/files.js';
@@ -92,7 +85,6 @@ export class McpContext implements Context {
9285
#extensionServiceWorkerMap = new WeakMap<Target, string>();
9386
#nextExtensionServiceWorkerId = 1;
9487

95-
#nextSnapshotId = 1;
9688
#traceResults: TraceResult[] = [];
9789

9890
#locatorClass: typeof Locator;
@@ -647,127 +639,6 @@ export class McpContext implements Context {
647639
return this.#mcpPages.get(page)?.isolatedContextName;
648640
}
649641

650-
getDevToolsPage(page: Page): Page | undefined {
651-
return this.#mcpPages.get(page)?.devToolsPage;
652-
}
653-
654-
async getDevToolsData(page: McpPage): Promise<DevToolsData> {
655-
try {
656-
this.logger('Getting DevTools UI data');
657-
const devtoolsPage = this.getDevToolsPage(page.pptrPage);
658-
if (!devtoolsPage) {
659-
this.logger('No DevTools page detected');
660-
return {};
661-
}
662-
const {cdpRequestId, cdpBackendNodeId} = await devtoolsPage.evaluate(
663-
async () => {
664-
// @ts-expect-error no types
665-
const UI = await import('/bundled/ui/legacy/legacy.js');
666-
// @ts-expect-error no types
667-
const SDK = await import('/bundled/core/sdk/sdk.js');
668-
const request = UI.Context.Context.instance().flavor(
669-
SDK.NetworkRequest.NetworkRequest,
670-
);
671-
const node = UI.Context.Context.instance().flavor(
672-
SDK.DOMModel.DOMNode,
673-
);
674-
return {
675-
cdpRequestId: request?.requestId(),
676-
cdpBackendNodeId: node?.backendNodeId(),
677-
};
678-
},
679-
);
680-
return {cdpBackendNodeId, cdpRequestId};
681-
} catch (err) {
682-
this.logger('error getting devtools data', err);
683-
}
684-
return {};
685-
}
686-
687-
/**
688-
* Creates a text snapshot of a page.
689-
*/
690-
async createTextSnapshot(
691-
page: McpPage,
692-
verbose = false,
693-
devtoolsData: DevToolsData | undefined = undefined,
694-
): Promise<void> {
695-
const rootNode = await page.pptrPage.accessibility.snapshot({
696-
includeIframes: true,
697-
interestingOnly: !verbose,
698-
});
699-
if (!rootNode) {
700-
return;
701-
}
702-
703-
const {uniqueBackendNodeIdToMcpId} = page;
704-
705-
const snapshotId = this.#nextSnapshotId++;
706-
// Iterate through the whole accessibility node tree and assign node ids that
707-
// will be used for the tree serialization and mapping ids back to nodes.
708-
let idCounter = 0;
709-
const idToNode = new Map<string, TextSnapshotNode>();
710-
const seenUniqueIds = new Set<string>();
711-
const assignIds = (node: SerializedAXNode): TextSnapshotNode => {
712-
let id = '';
713-
// @ts-expect-error untyped loaderId & backendNodeId.
714-
const uniqueBackendId = `${node.loaderId}_${node.backendNodeId}`;
715-
if (uniqueBackendNodeIdToMcpId.has(uniqueBackendId)) {
716-
// Re-use MCP exposed ID if the uniqueId is the same.
717-
id = uniqueBackendNodeIdToMcpId.get(uniqueBackendId)!;
718-
} else {
719-
// Only generate a new ID if we have not seen the node before.
720-
id = `${snapshotId}_${idCounter++}`;
721-
uniqueBackendNodeIdToMcpId.set(uniqueBackendId, id);
722-
}
723-
seenUniqueIds.add(uniqueBackendId);
724-
725-
const nodeWithId: TextSnapshotNode = {
726-
...node,
727-
id,
728-
children: node.children
729-
? node.children.map(child => assignIds(child))
730-
: [],
731-
};
732-
733-
// The AXNode for an option doesn't contain its `value`.
734-
// Therefore, set text content of the option as value.
735-
if (node.role === 'option') {
736-
const optionText = node.name;
737-
if (optionText) {
738-
nodeWithId.value = optionText.toString();
739-
}
740-
}
741-
742-
idToNode.set(nodeWithId.id, nodeWithId);
743-
return nodeWithId;
744-
};
745-
746-
const rootNodeWithId = assignIds(rootNode);
747-
const snapshot: TextSnapshot = {
748-
root: rootNodeWithId,
749-
snapshotId: String(snapshotId),
750-
idToNode,
751-
hasSelectedElement: false,
752-
verbose,
753-
};
754-
page.textSnapshot = snapshot;
755-
const data = devtoolsData ?? (await this.getDevToolsData(page));
756-
if (data?.cdpBackendNodeId) {
757-
snapshot.hasSelectedElement = true;
758-
snapshot.selectedElementUid = page.resolveCdpElementId(
759-
data?.cdpBackendNodeId,
760-
);
761-
}
762-
763-
// Clean up unique IDs that we did not see anymore.
764-
for (const key of uniqueBackendNodeIdToMcpId.keys()) {
765-
if (!seenUniqueIds.has(key)) {
766-
uniqueBackendNodeIdToMcpId.delete(key);
767-
}
768-
}
769-
}
770-
771642
async saveTemporaryFile(
772643
data: Uint8Array<ArrayBufferLike>,
773644
filename: string,

0 commit comments

Comments
 (0)