Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions js/.changeset/detached-docker-running-status.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'start-command': patch
---

Fix detached docker `--status`/`--list` reporting a terminal status (`executed`) with the `-1` sentinel while the container is still running (or not visible yet on a slow Docker-in-Docker host). `isDetachedSessionAlive()` now treats a failed `docker inspect` as "unknown" (returns `null`) instead of "stopped", so a session whose container has not appeared yet stays `executing` rather than being marked finished. When a container has genuinely stopped, `enrichDetachedStatus()` resolves the real exit code from the `Exit Code:` log footer and then `docker inspect .State.ExitCode`, only falling back to `-1` when no real code can be obtained.
102 changes: 90 additions & 12 deletions js/src/lib/status-formatter.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,66 @@
* - Text: Human-readable text format
*/

const { execSync } = require('child_process');
const { execSync, spawnSync } = require('child_process');
const fs = require('fs');
const {
escapeForLinksNotation,
formatAsNestedLinksNotation,
} = require('./output-blocks');
const { collectProcessIds } = require('./execution-control');

/**
* Inspect the live state of a detached docker container by name.
*
* Distinguishes three outcomes that matter for status reporting:
* - the container exists and is running,
* - the container exists but has stopped (with a real exit code), and
* - the container cannot be inspected at all.
*
* The last case is crucial on slow Docker-in-Docker hosts (issue #136): right
* after `docker run -d` returns, `docker inspect <name>` can transiently fail
* because the container is not visible yet. A failed inspect must NOT be read
* as "stopped"; it means "unknown", so callers can keep the session running
* instead of fabricating a terminal `-1` result.
*
* @param {string} sessionName - Container name
* @returns {{running: boolean, exitCode: number|null}|null} State, or null when
* the container cannot be inspected (not found yet, removed, or docker error)
*/
function inspectDockerState(sessionName) {
const result = spawnSync(
'docker',
['inspect', '-f', '{{.State.Running}} {{.State.ExitCode}}', sessionName],
{ encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
);
if (result.error || result.status !== 0 || !result.stdout) {
return null;
}
const [runningRaw, exitRaw] = result.stdout.trim().split(/\s+/);
const exitCode = Number.parseInt(exitRaw, 10);
return {
running: runningRaw === 'true',
exitCode: Number.isFinite(exitCode) ? exitCode : null,
};
}

/**
* Best-effort terminal exit code reported by the isolation backend itself
* (currently docker via `docker inspect .State.ExitCode`). Returns null when
* the backend cannot provide a real code, so callers never surface the `-1`
* sentinel for a session whose real exit code is simply not available yet.
* @param {Object} record - Execution record
* @returns {number|null}
*/
function readBackendExitCode(record) {
const opts = record.options || {};
if (opts.isolated !== 'docker' || !opts.sessionName) {
return null;
}
const state = inspectDockerState(opts.sessionName);
return state && !state.running ? state.exitCode : null;
}

/**
* Check if a detached isolation session is still running
* @param {Object} record - Execution record
Expand Down Expand Up @@ -46,11 +98,11 @@ function isDetachedSessionAlive(record) {
return true;
}
case 'docker': {
const output = execSync(
`docker inspect -f "{{.State.Running}}" ${sessionName}`,
{ encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
);
return output.trim() === 'true';
// A failed inspect means the container is not visible yet (still being
// created on a slow DinD host) or already removed — not "stopped".
// Return null (unknown) so the session is not falsely marked finished.
const state = inspectDockerState(sessionName);
return state === null ? null : state.running;
}
case 'ssh': {
// For SSH, check if the PID is still running on remote would require
Expand Down Expand Up @@ -100,13 +152,37 @@ function readExitCodeFromLog(logPath) {
*/
function enrichDetachedStatus(record) {
const alive = isDetachedSessionAlive(record);
const footerExit = readExitCodeFromLog(record.logPath);

// Create a shallow copy to avoid mutating the original
const cloneRecord = () => {
const enriched = Object.create(Object.getPrototypeOf(record));
Object.assign(enriched, record);
return enriched;
};

if (alive === null) {
// Liveness is unknown: the backend could not be probed (e.g. a detached
// docker container that is not visible yet on a slow Docker-in-Docker host,
// or one that has already been removed). Honor a terminal `Exit Code:`
// footer if the command wrote one; otherwise leave the record untouched
// (still executing) rather than fabricating a `-1` terminal result that
// orchestrators misread as a finished/failed run (issue #136).
const isDetached =
record.options && record.options.isolationMode === 'detached';
if (isDetached && record.status === 'executing' && footerExit !== null) {
const enriched = cloneRecord();
enriched.status = 'executed';
enriched.exitCode = footerExit;
if (!enriched.endTime) {
enriched.endTime = new Date().toISOString();
}
return enriched;
}
return record;
}

// Create a shallow copy to avoid mutating the original
const enriched = Object.create(Object.getPrototypeOf(record));
Object.assign(enriched, record);
const enriched = cloneRecord();

if (alive && enriched.status === 'executed') {
// A live `screen -ls` (or `tmux`/`docker`) session does NOT mean the command
Expand All @@ -115,7 +191,6 @@ function enrichDetachedStatus(record) {
// window after `start` already wrote the terminal footer). The footer/recorded
// exit code is authoritative. Only flip back to 'executing' when there is NO
// recorded terminal exit code AND no `Exit Code:` footer in the log.
const footerExit = readExitCodeFromLog(enriched.logPath);
const hasRecordedExit =
enriched.exitCode !== null && enriched.exitCode !== undefined;
if (!hasRecordedExit && footerExit === null) {
Expand All @@ -126,10 +201,13 @@ function enrichDetachedStatus(record) {
}
// Otherwise keep the recorded/footer exit code - the command has finished.
} else if (!alive && enriched.status === 'executing') {
// Session ended but record says executing - correct it
// Session ended but record says executing - correct it. Resolve a real exit
// code: prefer the log footer, then the backend's own record (e.g.
// `docker inspect .State.ExitCode`), and only fall back to the `-1` sentinel
// as a last resort when no real code can be obtained (issue #136).
enriched.status = 'executed';
if (enriched.exitCode === null || enriched.exitCode === undefined) {
enriched.exitCode = readExitCodeFromLog(enriched.logPath) ?? -1;
enriched.exitCode = footerExit ?? readBackendExitCode(enriched) ?? -1;
}
if (!enriched.endTime) {
enriched.endTime = new Date().toISOString();
Expand Down
123 changes: 123 additions & 0 deletions js/test/session-name-status.js
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,129 @@ describe('Issue #101: Detached status enrichment', () => {
});
});

// Issue #136: a detached docker session must not be reported with a terminal
// status (`executed`) and the `-1` sentinel while its container is still
// running (or not visible yet on a slow Docker-in-Docker host).
describe('Issue #136: detached docker session liveness', () => {
// Use the repo's own probe: `docker` may be installed yet unable to run Linux
// images (e.g. Windows runners in Windows-containers mode, where `alpine`
// never starts). In that case `docker inspect` fails and liveness is `null`
// (unknown) rather than `false`, which would break the stopped-container
// assertions below.
const { canRunLinuxDockerImages } = require('../src/lib/isolation');
const dockerAvailable = canRunLinuxDockerImages();

// Whether the container actually exists (was created) per `docker inspect`.
function dockerContainerExists(name) {
const probe = spawnSync('docker', ['inspect', name], { stdio: 'ignore' });
return probe.status === 0;
}

function makeDockerRecord(sessionName, extra = {}) {
return new ExecutionRecord({
command: 'sleep 120; echo done',
options: {
sessionName,
isolated: 'docker',
isolationMode: 'detached',
},
...extra,
});
}

function dockerRm(name) {
spawnSync('docker', ['rm', '-f', name], { stdio: 'ignore' });
}

it('reports unknown (null) — not false — when the container is not visible yet', () => {
if (!dockerAvailable) {
return;
}
const record = makeDockerRecord('issue136-container-does-not-exist-yet');
expect(isDetachedSessionAlive(record)).toBeNull();
});

it('keeps the record executing while the container is not visible yet', () => {
if (!dockerAvailable) {
return;
}
const record = makeDockerRecord('issue136-container-not-visible');
// Defaults to executing with a null exit code.
const enriched = enrichDetachedStatus(record);
expect(enriched.status).toBe('executing');
expect(enriched.exitCode).toBeNull();
expect(enriched.endTime).toBeNull();
});

it('keeps a running container executing', () => {
if (!dockerAvailable) {
return;
}
const name = `issue136-running-${process.pid}`;
dockerRm(name);
const started = spawnSync('docker', [
'run',
'-d',
'--name',
name,
'alpine',
'sh',
'-c',
'sleep 30',
]);
if (started.status !== 0) {
dockerRm(name);
return;
}
try {
const record = makeDockerRecord(name);
expect(isDetachedSessionAlive(record)).toBe(true);
const enriched = enrichDetachedStatus(record);
expect(enriched.status).toBe('executing');
expect(enriched.exitCode).toBeNull();
} finally {
dockerRm(name);
}
});

it('resolves a stopped container to its real exit code, never the -1 sentinel', () => {
if (!dockerAvailable) {
return;
}
const name = `issue136-stopped-${process.pid}`;
dockerRm(name);
const ran = spawnSync('docker', [
'run',
'--name',
name,
'alpine',
'sh',
'-c',
'exit 1',
]);
// `docker run` exits with the container's code (1 here); treat spawn errors
// (no daemon) or a container that never materialized (e.g. the Linux image
// could not be pulled) as a skip — there is nothing stopped to inspect.
if (ran.error || !dockerContainerExists(name)) {
dockerRm(name);
return;
}
try {
// No log footer: force exit-code resolution through `docker inspect`.
const record = makeDockerRecord(name, {
logPath: '/nonexistent-issue136.log',
});
expect(isDetachedSessionAlive(record)).toBe(false);
const enriched = enrichDetachedStatus(record);
expect(enriched.status).toBe('executed');
expect(enriched.exitCode).toBe(1);
expect(enriched.endTime).not.toBeNull();
} finally {
dockerRm(name);
}
});
});

describe('Issue #105: attachCurrentTime for executing status', () => {
it('should add currentTime to serialization when status is executing', () => {
const record = new ExecutionRecord({
Expand Down
5 changes: 5 additions & 0 deletions rust/changelog.d/136.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
bump: patch
---

Fixed detached docker `--status`/`--list` reporting a terminal status (`executed`) with the `-1` sentinel while the container is still running (or not visible yet on a slow Docker-in-Docker host). `is_detached_session_alive` now treats a failed `docker inspect` as "unknown" (`None`) instead of "stopped", so a session whose container has not appeared yet stays `executing` rather than being marked finished. When a container has genuinely stopped, `enrich_detached_status` resolves the real exit code from the `Exit Code:` log footer and then `docker inspect .State.ExitCode`, only falling back to `-1` when no real code can be obtained.
Loading
Loading