Skip to content

Commit 43e5068

Browse files
Mark Hendersonfwang
authored andcommitted
fix: force kill MCP server processes on dispose to prevent orphan processes
Track StdioClientTransport instances for local MCP servers to enable forceful process termination on dispose. The MCP SDK's client.close() relies on AbortController signals which don't work for all processes (e.g., docker containers without --init, hung processes). Changes: - Track LocalTransportInfo (transport + name) alongside MCP clients - Add forceKillProcess() helper for cross-platform process termination - Use taskkill on Windows, SIGKILL on Unix for forceful termination - Add timeout to client.close() calls to prevent hanging on dispose - Update connect() and disconnect() to track/cleanup local transports - Add tests for transport tracking and client lifecycle This prevents orphan MCP server processes from accumulating across sessions, which was causing significant memory usage (~300MB+ per orphan process). Related: #7261
1 parent f1729e8 commit 43e5068

2 files changed

Lines changed: 297 additions & 5 deletions

File tree

packages/opencode/src/mcp/index.ts

Lines changed: 146 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,91 @@ export namespace MCP {
5555

5656
type MCPClient = Client
5757

58+
// Track local transports for process cleanup
59+
type LocalTransportInfo = {
60+
transport: StdioClientTransport
61+
}
62+
63+
// Check if a process is still running
64+
function isProcessAlive(pid: number): boolean {
65+
try {
66+
// Sending signal 0 checks if the process exists without actually killing it
67+
process.kill(pid, 0)
68+
return true
69+
} catch {
70+
return false
71+
}
72+
}
73+
74+
// Timeout for kill operations to prevent hanging
75+
const KILL_TIMEOUT_MS = 5000
76+
// Grace period before escalating SIGTERM to SIGKILL (aligns with Shell.killTree)
77+
const SIGKILL_DELAY_MS = 200
78+
79+
// Helper to forcefully kill a process by PID
80+
// Handles both Windows (taskkill) and Unix (SIGTERM→SIGKILL) platforms
81+
// Note: We can't reuse Shell.killTree here because it requires a ChildProcess object,
82+
// but MCP SDK's StdioClientTransport only exposes the PID, not the underlying process.
83+
async function forceKillProcess(pid: number, name: string): Promise<void> {
84+
// Check if process is still alive before attempting to kill
85+
if (!isProcessAlive(pid)) {
86+
log.debug("MCP process already terminated", { name, pid })
87+
return
88+
}
89+
90+
log.info("force killing MCP server process", { name, pid })
91+
try {
92+
if (process.platform === "win32") {
93+
const { spawn } = await import("child_process")
94+
const killer = spawn("taskkill", ["/pid", String(pid), "/f", "/t"], { stdio: "ignore" })
95+
await new Promise<void>((resolve) => {
96+
const timeout = setTimeout(() => {
97+
log.debug("taskkill timed out, killing taskkill process", { name, pid })
98+
killer.kill("SIGKILL")
99+
resolve()
100+
}, KILL_TIMEOUT_MS)
101+
killer.once("exit", (code) => {
102+
clearTimeout(timeout)
103+
if (code !== 0) {
104+
log.debug("taskkill exited with non-zero code", { name, pid, code })
105+
}
106+
resolve()
107+
})
108+
killer.once("error", (error) => {
109+
clearTimeout(timeout)
110+
log.debug("taskkill process error", { name, pid, error })
111+
resolve()
112+
})
113+
})
114+
} else {
115+
// Unix: Try graceful SIGTERM first, then escalate to SIGKILL
116+
// This aligns with Shell.killTree behavior
117+
const tryKill = (signal: NodeJS.Signals) => {
118+
try {
119+
process.kill(-pid, signal) // Process group first
120+
} catch {
121+
try {
122+
process.kill(pid, signal) // Individual process fallback
123+
} catch {
124+
// Process already gone
125+
}
126+
}
127+
}
128+
129+
tryKill("SIGTERM")
130+
await Bun.sleep(SIGKILL_DELAY_MS)
131+
132+
// If still alive, escalate to SIGKILL
133+
if (isProcessAlive(pid)) {
134+
log.debug("process still alive after SIGTERM, sending SIGKILL", { name, pid })
135+
tryKill("SIGKILL")
136+
}
137+
}
138+
} catch (error) {
139+
log.debug("failed to force kill MCP process", { name, pid, error })
140+
}
141+
}
142+
58143
export const Status = z
59144
.discriminatedUnion("status", [
60145
z
@@ -159,6 +244,7 @@ export namespace MCP {
159244
const config = cfg.mcp ?? {}
160245
const clients: Record<string, MCPClient> = {}
161246
const status: Record<string, Status> = {}
247+
const localTransports: Record<string, LocalTransportInfo> = {}
162248

163249
await Promise.all(
164250
Object.entries(config).map(async ([key, mcp]) => {
@@ -181,23 +267,38 @@ export namespace MCP {
181267
if (result.mcpClient) {
182268
clients[key] = result.mcpClient
183269
}
270+
if (result.localTransport) {
271+
localTransports[key] = result.localTransport
272+
}
184273
}),
185274
)
186275
return {
187276
status,
188277
clients,
278+
localTransports,
189279
}
190280
},
191281
async (state) => {
282+
// First, try to gracefully close all clients with a timeout
283+
const closeTimeout = 2000
192284
await Promise.all(
193285
Object.values(state.clients).map((client) =>
194-
client.close().catch((error) => {
195-
log.error("Failed to close MCP client", {
196-
error,
197-
})
286+
withTimeout(client.close(), closeTimeout).catch((error) => {
287+
log.error("Failed to close MCP client", { error })
198288
}),
199289
),
200290
)
291+
292+
// Then forcefully kill any remaining local MCP server processes
293+
// This handles processes that don't respond to the SDK's abort signal
294+
// (e.g., docker containers without --init, hung processes)
295+
for (const [name, info] of Object.entries(state.localTransports)) {
296+
const pid = info.transport.pid
297+
if (pid) {
298+
await forceKillProcess(pid, name)
299+
}
300+
}
301+
201302
pendingOAuthTransports.clear()
202303
},
203304
)
@@ -268,6 +369,9 @@ export namespace MCP {
268369
}
269370
s.clients[name] = result.mcpClient
270371
s.status[name] = result.status
372+
if (result.localTransport) {
373+
s.localTransports[name] = result.localTransport
374+
}
271375

272376
return {
273377
status: s.status,
@@ -279,12 +383,14 @@ export namespace MCP {
279383
log.info("mcp server disabled", { key })
280384
return {
281385
mcpClient: undefined,
386+
localTransport: undefined,
282387
status: { status: "disabled" as const },
283388
}
284389
}
285390

286391
log.info("found", { key, type: mcp.type })
287392
let mcpClient: MCPClient | undefined
393+
let localTransport: LocalTransportInfo | undefined
288394
let status: Status | undefined = undefined
289395

290396
if (mcp.type === "remote") {
@@ -406,6 +512,9 @@ export namespace MCP {
406512
},
407513
})
408514

515+
// Track transport before connection so we can clean up on failure
516+
localTransport = { transport }
517+
409518
const connectTimeout = mcp.timeout ?? DEFAULT_TIMEOUT
410519
try {
411520
const client = new Client({
@@ -425,6 +534,12 @@ export namespace MCP {
425534
cwd,
426535
error: error instanceof Error ? error.message : String(error),
427536
})
537+
// Force kill the process if connection failed
538+
const pid = transport.pid
539+
if (pid) {
540+
await forceKillProcess(pid, key)
541+
}
542+
localTransport = undefined
428543
status = {
429544
status: "failed" as const,
430545
error: error instanceof Error ? error.message : String(error),
@@ -442,6 +557,7 @@ export namespace MCP {
442557
if (!mcpClient) {
443558
return {
444559
mcpClient: undefined,
560+
localTransport: undefined,
445561
status,
446562
}
447563
}
@@ -456,12 +572,20 @@ export namespace MCP {
456572
error,
457573
})
458574
})
575+
// Force kill the process if listTools failed (graceful close may not work)
576+
if (localTransport) {
577+
const pid = localTransport.transport.pid
578+
if (pid) {
579+
await forceKillProcess(pid, key)
580+
}
581+
}
459582
status = {
460583
status: "failed",
461584
error: "Failed to get tools",
462585
}
463586
return {
464587
mcpClient: undefined,
588+
localTransport: undefined,
465589
status: {
466590
status: "failed" as const,
467591
error: "Failed to get tools",
@@ -472,6 +596,7 @@ export namespace MCP {
472596
log.info("create() successfully created client", { key, toolCount: result.tools.length })
473597
return {
474598
mcpClient,
599+
localTransport,
475600
status,
476601
}
477602
}
@@ -525,17 +650,33 @@ export namespace MCP {
525650
if (result.mcpClient) {
526651
s.clients[name] = result.mcpClient
527652
}
653+
if (result.localTransport) {
654+
s.localTransports[name] = result.localTransport
655+
}
528656
}
529657

530658
export async function disconnect(name: string) {
531659
const s = await state()
532660
const client = s.clients[name]
661+
const transportInfo = s.localTransports[name]
662+
533663
if (client) {
534-
await client.close().catch((error) => {
664+
// Try graceful close first
665+
await withTimeout(client.close(), 2000).catch((error) => {
535666
log.error("Failed to close MCP client", { name, error })
536667
})
537668
delete s.clients[name]
538669
}
670+
671+
// Force kill the process if it's a local MCP server
672+
if (transportInfo) {
673+
const pid = transportInfo.transport.pid
674+
if (pid) {
675+
await forceKillProcess(pid, name)
676+
}
677+
delete s.localTransports[name]
678+
}
679+
539680
s.status[name] = { status: "disabled" }
540681
}
541682

0 commit comments

Comments
 (0)