diff --git a/packages/k8s/src/k8s/index.ts b/packages/k8s/src/k8s/index.ts index ae773da3..055eb842 100644 --- a/packages/k8s/src/k8s/index.ts +++ b/packages/k8s/src/k8s/index.ts @@ -250,9 +250,130 @@ export async function execPodStep( stdin?: stream.Readable ): Promise { const exec = new k8s.Exec(kc) + core.debug( + `[execPodStep] Starting execPodStep with command: ${JSON.stringify(command)}, podName: ${podName}, containerName: ${containerName}` + ) command = fixArgs(command) - return await new Promise(function (resolve, reject) { + core.debug(`[execPodStep] Fixed command: ${JSON.stringify(command)}`) + + // Heartbeat constants matching kubectl's Go implementation + const PING_PERIOD_MS = parseInt( + process.env.ACTIONS_RUNNER_HEARTBEAT_PERIOD_MS || '5000', + 10 + ) + const PING_READ_DEADLINE_MS = parseInt( + process.env.ACTIONS_RUNNER_HEARTBEAT_DEADLINE_MS || + String(PING_PERIOD_MS * 12 + 1000), + 10 + ) + core.debug( + `[execPodStep] Heartbeat config: PING_PERIOD_MS=${PING_PERIOD_MS}, PING_READ_DEADLINE_MS=${PING_READ_DEADLINE_MS}` + ) + + let pingInterval: ReturnType | null = null + let pongTimeout: ReturnType | null = null + let lastHeartbeatLog = 0 + const HEARTBEAT_LOG_INTERVAL_MS = 2 * 60 * 1000 // 2 minutes + + const shouldLogHeartbeat = (): boolean => { + const now = Date.now() + if (now - lastHeartbeatLog >= HEARTBEAT_LOG_INTERVAL_MS) { + lastHeartbeatLog = now + return true + } + return false + } + + const stopHeartbeat = (): void => { + if (shouldLogHeartbeat()) { + core.debug('[Heartbeat] stopHeartbeat called') + } + if (pingInterval) { + clearInterval(pingInterval) + pingInterval = null + } + if (pongTimeout) { + clearTimeout(pongTimeout) + pongTimeout = null + } + } + + const resetPongTimeout = (): void => { + if (shouldLogHeartbeat()) { + core.debug('[Heartbeat] resetPongTimeout called') + } + if (pongTimeout) { + clearTimeout(pongTimeout) + pongTimeout = null + } + pongTimeout = setTimeout(() => { + core.warning( + `[Heartbeat] No pong received in ${PING_READ_DEADLINE_MS}ms, connection may be stale` + ) + }, PING_READ_DEADLINE_MS) + } + + const startHeartbeat = (ws: any): void => { + core.debug( + `[Heartbeat] Starting with period=${PING_PERIOD_MS}ms, deadline=${PING_READ_DEADLINE_MS}ms` + ) + lastHeartbeatLog = Date.now() // Initialize timer + + // Handle pong responses + ws.on('pong', () => { + if (shouldLogHeartbeat()) { + core.debug('[Heartbeat] Pong received') + } + resetPongTimeout() + }) + + // Handle errors + ws.on('error', (err: Error) => { + core.error(`[Heartbeat] WebSocket error: ${err.message}`) + stopHeartbeat() + }) + + // Cleanup on close + ws.on('close', () => { + core.debug('[Heartbeat] WebSocket closed, stopping heartbeat') + stopHeartbeat() + }) + + // Set initial pong timeout + resetPongTimeout() + + // Start ping loop + pingInterval = setInterval(() => { + // WebSocket readyState: 0 = CONNECTING, 1 = OPEN, 2 = CLOSING, 3 = CLOSED + if (shouldLogHeartbeat()) { + core.debug(`[Heartbeat] Ping loop, ws.readyState=${ws.readyState}`) + } + if (ws.readyState === 1) { + try { + ws.ping() + if (shouldLogHeartbeat()) { + core.debug('[Heartbeat] Ping sent') + } + } catch (err) { + core.error(`[Heartbeat] Ping failed: ${err}`) + stopHeartbeat() + } + } else { + if (shouldLogHeartbeat()) { + core.debug( + `[Heartbeat] WebSocket not open (readyState=${ws.readyState}), stopping` + ) + } + stopHeartbeat() + } + }, PING_PERIOD_MS) + } + + return new Promise((resolve, reject) => { + core.debug('[execPodStep] About to call exec.exec') + let ws: any | null = null + exec .exec( namespace(), @@ -263,22 +384,83 @@ export async function execPodStep( process.stderr, stdin ?? null, false /* tty */, - resp => { - core.debug(`execPodStep response: ${JSON.stringify(resp)}`) + async resp => { + core.debug( + `[execPodStep] execPodStep response: ${JSON.stringify(resp)}` + ) + + // Stop heartbeat immediately + stopHeartbeat() + + // Close WebSocket and wait for it before resolving/rejecting + const closeWebSocket = async (): Promise => { + if (ws && (ws.readyState === 1 || ws.readyState === 0)) { + return new Promise(closeResolve => { + // Set a timeout to ensure we don't hang forever + const closeTimeout = setTimeout(() => { + core.warning( + '[execPodStep] WebSocket close timeout, forcing cleanup' + ) + closeResolve() + }, 5000) + + ws.once('close', () => { + clearTimeout(closeTimeout) + core.debug('[execPodStep] WebSocket closed cleanly') + closeResolve() + }) + ws.close() + }) + } + } + if (resp.status === 'Success') { + core.debug(`[execPodStep] Success, code: ${resp.code}`) + await closeWebSocket() resolve(resp.code || 0) } else { core.debug( - JSON.stringify({ - message: resp?.message, - details: resp?.details - }) + `[execPodStep] Failure: ${JSON.stringify({ message: resp?.message, details: resp?.details })}` ) + await closeWebSocket() reject(new Error(resp?.message || 'execPodStep failed')) } } ) - .catch(e => reject(e)) + .then(websocket => { + core.debug('[execPodStep] exec.exec resolved, ws object received') + ws = websocket + // Start heartbeat once WebSocket is connected + if (ws) { + startHeartbeat(ws) + } else { + core.warning('[Heartbeat] WebSocket is null, heartbeat not started') + } + }) + .catch(async e => { + stopHeartbeat() + core.error(`[execPodStep] exec.exec threw error: ${e}`) + + // Close WebSocket before rejecting with timeout protection + if (ws && (ws.readyState === 1 || ws.readyState === 0)) { + await new Promise(closeResolve => { + const closeTimeout = setTimeout(() => { + core.warning( + '[execPodStep] WebSocket close timeout in error handler' + ) + closeResolve() + }, 5000) + + ws.once('close', () => { + clearTimeout(closeTimeout) + closeResolve() + }) + ws.close() + }) + } + + reject(e) + }) }) }