diff --git a/.gitignore b/.gitignore
index 6dcea69..a4dcdca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,9 +19,11 @@ venv/
.mypy_cache/
.pyright/
.openclaw-state.json
-.entrabot-state.json
-.entrabot-state.json.bak.*
+.entrabot-state*.json
+.entrabot-state*.json.bak.*
.mcp.json
+.mcp.json.bak.*
+.mcp.*.json
a365.config.json
a365.generated.config.json
ToolingManifest.json
@@ -51,3 +53,8 @@ pip.ini
docs/runbooks/session-*.md
docs/superpowers/
.entraclaw-state.json
+.mxc-build/mxc-src/
+# MXC Windows SDK fetched on demand by scripts/setup_sandbox.ps1 (large; the
+# pinned SHA256 in src/entrabot/sandbox/binary.py is the trust anchor, not the
+# committed bytes).
+.mxc-build/npm/
diff --git a/.mxc-build/target/release/.mxc-exec-mac.mock b/.mxc-build/target/release/.mxc-exec-mac.mock
new file mode 100755
index 0000000..09eb4d1
--- /dev/null
+++ b/.mxc-build/target/release/.mxc-exec-mac.mock
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+# Test MXC mock - executes commands for testing entrabot plumbing
+# Mimics MXC 0.6.0-alpha schema
+
+# Read MXC config from stdin (JSON)
+if [ -t 0 ]; then
+ CONFIG="{}"
+else
+ CONFIG=$(cat)
+fi
+
+# Extract command from MXC schema: process.commandLine
+COMMAND=$(echo "$CONFIG" | python3 -c "
+import json, sys
+try:
+ c = json.load(sys.stdin)
+ print(c.get('process', {}).get('commandLine', ''))
+except:
+ print('')
+" 2>/dev/null || echo "")
+
+if [ -z "$COMMAND" ]; then
+ echo '{"error": "No command specified in process.commandLine"}' >&2
+ exit 1
+fi
+
+# Execute with timeout (real MXC would apply sandboxing)
+timeout 30s bash -c "$COMMAND" 2>&1
+EXIT_CODE=$?
+
+exit $EXIT_CODE
diff --git a/.mxc-build/target/release/mxc-exec-mac b/.mxc-build/target/release/mxc-exec-mac
new file mode 100755
index 0000000..d228002
Binary files /dev/null and b/.mxc-build/target/release/mxc-exec-mac differ
diff --git a/README.md b/README.md
index 6df8f07..c13e4f7 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Entrabot: Identity Research for Microsoft 365 Agents
+# Entrabot: Identity Research for Microsoft 365 Agents using Autopilot
Entrabot is a Python MCP server that gives a device-local agent its own Entra **Agent ID** and an **Agent User** that has all the capabilities of a human user in a Microsoft tenant. It can have a Teams presence and be invited to meetings to chat with your colleagues 1:1, a mailbox it can monitor and respond to, create and edit Word documents, make PowerPoint presentations, and allows you to access your CLI. The agent signs in autonomously, sends Teams messages from its own account, and writes audit events against its own object ID. It runs on macOS, Linux, and Windows, and works with Claude Code, Copilot CLI, or any MCP-speaking client.
@@ -35,6 +35,7 @@ entrabot is the device-side glue for a set of platform primitives Microsoft ship
- **Entra Agent ID** — the four-object hierarchy: Agent Identity Blueprint → BlueprintPrincipal → Agent Identity → Agent User. Confidential clients only; no public-client flows; tokens carry `idtyp=user` for the Agent User leaf. ([platform learning](docs/platform-learnings/agent-id-blueprints-and-users.md))
- **Microsoft Agent 365** — the control plane: admin-center inventory, OTel observability, Work IQ MCP servers (Mail, Calendar, Teams, SharePoint, OneDrive, Word, User, Copilot, Dataverse), AI-teammate lifecycle. GA 2026-05-01. ([platform learning](docs/platform-learnings/microsoft-agent-365.md))
+- **MXC Sandbox** — OS-enforced containment for local code execution. Process-level isolation with positive-allowlist filesystem access, network blocking, and operator-set capability ceilings. Opt-in `run_code` tool (disabled by default). Phase 1 ships macOS Seatbelt; Windows AppContainer and Linux seccomp-bpf coming next. Phase 2 will bind sessions to Agent User identity for M365 audit attribution. ([setup guide](docs/guides/mxc-sandbox.md) · [ADR-007](docs/decisions/007-mxc-sandbox-integration.md))
- **Conditional Access for agents** — GA. Apply CA policies to Agent Identity sign-ins the same way you apply them to users.
- **ID Protection for agents** — GA. Risk scoring and remediation against the agent's own object.
- **FastMCP** — the Python MCP server framework. entrabot registers every Teams, Outlook, Files, Word, audit, and identity tool through it.
@@ -96,7 +97,7 @@ source .venv/bin/activate
claude --dangerously-load-development-channels server:entrabot
```
-`setup.sh` is idempotent. It provisions the Blueprint, BlueprintPrincipal, Agent Identity, and Agent User; assigns a Teams-capable license; uploads a self-signed certificate to Entra; and writes `.env` plus `.mcp.json` with no secrets on disk. Full walkthrough — including Windows, cloud memory, cross-tenant group chats, and the Work IQ Word setup — is in [`docs/getting-started/quickstart.md`](docs/getting-started/quickstart.md) and [`INSTALL.md`](INSTALL.md).
+`setup.sh` is idempotent. It provisions the Blueprint, BlueprintPrincipal, Agent Identity, and Agent User; assigns a Teams-capable license; uploads a self-signed certificate to Entra; and writes `.env` plus `.mcp.json` with no secrets on disk. Add `--use-cloud-memory` to enable Azure Blob storage for operational state, or `--enable-sandbox` to provision MXC sandbox for contained local code execution (opt-in, disabled by default; full walkthrough in the [MXC sandbox setup guide](docs/guides/mxc-sandbox.md)). Full walkthrough — including Windows, cloud memory, cross-tenant group chats, and the Work IQ Word setup — is in [`docs/getting-started/quickstart.md`](docs/getting-started/quickstart.md) and [`INSTALL.md`](INSTALL.md).
### Launching the agent
@@ -146,6 +147,7 @@ The full doc site: ****
Direct pointers:
- [Quickstart](docs/getting-started/quickstart.md) — five minutes from clone to first Teams message
+- [MXC sandbox setup](docs/guides/mxc-sandbox.md) — enable contained local code execution (`run_code`): build the binary, set the operator ceiling, verify kernel enforcement
- [MCP tool reference](docs/reference/mcp-tools.md) — every tool, every parameter
- [Setup script reference](docs/reference/setup-script.md) — every `setup.sh` flag
- [Script reference](docs/reference/scripts/operations.md) — status, health, DM, email, setup, teardown, and diagnostic scripts
diff --git a/TODOS.md b/TODOS.md
index 53832e1..34e3f61 100644
--- a/TODOS.md
+++ b/TODOS.md
@@ -98,11 +98,11 @@ Two bugs, both observed at 2026-04-17T17:00:00 PDT (= 00:00:01 UTC 2026-04-18):
### ~~Token auto-refresh in teams_send~~ ✅ DONE
Implemented as `_with_token_retry()` in `mcp_server.py` and `_ensure_valid_token()` (proactive refresh at 55 min). All tools use it.
-### AppContainer sandbox production implementation
-Tonight's spike proves feasibility. Production version needs: filesystem allowlist, network filtering (Graph API only), process spawn restrictions, MCP server integration. May require Win32 C extension from Python.
-- **Effort:** L (CC: ~1-2 days)
-- **Depends on:** AppContainer spike results
+### ~~AppContainer sandbox production implementation~~ ✅ DONE (MXC sandbox integration)
+**Shipped as MXC sandbox integration (Issue #84, ADR-007).** Phase 1 complete: process-level containment via MXC 0.6.0-alpha (macOS Seatbelt). Positive-allowlist filesystem, network blocking, operator ceiling enforcement, binary SHA256 verification, opt-in `run_code` tool. The macOS mock binary was retired on 2026-06-18 and replaced with a real `mxc-exec-mac` built from `microsoft/mxc` v0.6.1 plus the repo-local stdin compatibility patch in `scripts/mxc-mac-stdin-compat.patch`. Phase 2 stub (session-bound Entra identity attribution) ready for future APIs. Windows AppContainer + Linux seccomp-bpf deferred to T4/T10.
+- **Status:** Phase 1 shipped (1605 tests passing), Phase 2 stub in place
- **Source:** CEO review, refined premise (sandbox co-equal with identity)
+- **See:** `docs/decisions/007-mxc-sandbox-integration.md`, `docs/architecture/DESIGN-mxc-sandbox.md`
## P2
diff --git a/docs/decisions/007-mxc-sandbox-integration.md b/docs/decisions/007-mxc-sandbox-integration.md
new file mode 100644
index 0000000..89026ea
--- /dev/null
+++ b/docs/decisions/007-mxc-sandbox-integration.md
@@ -0,0 +1,380 @@
+# ADR-007: MXC Sandbox Integration for Contained Local Code Execution
+
+**Status:** Accepted
+**Date:** 2026-06-13
+**Updated:** 2026-06-17
+**Deciders:** @brandwe, Claude Code
+
+## Context
+
+Entrabot enables AI agents to operate autonomously on local devices (Mac/Linux/Windows) with Microsoft Entra identity. The agent needs to execute code locally for capabilities like:
+- File system access (read user documents, write reports)
+- Process execution (run scripts, build code, analyze logs)
+- System interaction (check disk usage, query installed packages)
+
+**The problem:** Without sandboxing, agents operate with full user permissions — a compromised or malicious agent can access secrets, exfiltrate data, or damage the system.
+
+**Prior state:** No local execution capability. Agent could only call cloud APIs (Teams, Files, Email). Users requested local file access for document analysis and report generation.
+
+## Decision
+
+Integrate Microsoft Execution Containers (MXC) for OS-enforced sandboxing of local code execution, pairing Entra identity attribution with containment.
+
+### Phase 1: Process-Level Containment (IMPLEMENTED)
+
+Ship macOS/Windows process-level sandboxing via MXC 0.6.0-alpha:
+- **Backend.PROCESS**: Single-process containment without session isolation
+- **Positive-allowlist only**: Specify what's accessible (readonly/readwrite paths)
+- **Operator ceiling**: Human sets maximum capabilities, LLM can only narrow
+- **Audit-first**: Fail-closed if audit recording fails
+- **Binary verification**: SHA256 check before execution, refuse tampered binaries
+- **Opt-in**: Disabled by default (`ENTRABOT_ENABLE_RUN_CODE=1` required)
+
+### Phase 2: Session-Bound Identity Attribution (STUB ONLY)
+
+Future work when Entra/Intune APIs GA:
+- **Backend.SESSION**: Per-conversation session isolation
+- **Identity binding**: MXC sessions bound to Entra Agent User
+- **Governance**: Intune policies control agent capabilities
+- **M365 audit logs**: "Agent did X" vs "Human did X" attribution
+
+**Gating:**
+- MXC session API (not in 0.6.0-alpha schema)
+- Entra identity binding surface (availability unclear)
+- Intune agent governance APIs (not exposed as of 2026-06)
+
+Phase 2 stub shipped in `src/entrabot/sandbox/session.py` with `NotImplementedError` to enable future integration without breaking changes.
+
+## Implementation
+
+### Architecture
+
+```
+┌─────────────────────────────────────────────┐
+│ EntraBot MCP Server (mcp_server.py) │
+│ ├─ run_code() tool (opt-in) │
+│ └─ write_local_file() tool (demo only) │
+└──────────────────┬──────────────────────────┘
+ │
+ ▼
+┌─────────────────────────────────────────────┐
+│ Sandbox Layer (src/entrabot/sandbox/) │
+│ ├─ base.py: SandboxRunner protocol │
+│ ├─ policy.py: Policy builder + clamping │
+│ ├─ binary.py: Binary resolution + verify │
+│ ├─ mac.py: macOS Seatbelt runner │
+│ ├─ windows.py: Windows processcontainer │
+│ ├─ linux.py: seccomp-bpf runner (TODO) │
+│ └─ session.py: Phase 2 stub │
+└──────────────────┬──────────────────────────┘
+ │
+ ▼
+┌─────────────────────────────────────────────┐
+│ MXC Binary (mxc-exec-mac / mxc-exec-win) │
+│ - Reads policy JSON from stdin │
+│ - Enforces containment at OS level │
+│ - Returns stdout/stderr/exit_code │
+└─────────────────────────────────────────────┘
+```
+
+### Security Model
+
+**Learning #54 Enforcement:**
+```python
+operator_ceiling = load_operator_ceiling_from_env() # Human-set limits
+agent_request = clamp_to_ceiling(agent_policy, operator_ceiling)
+# Result: Agent can only NARROW, never WIDEN containment
+```
+
+**Fail-Closed:**
+- Binary tampering detected → refuse to run
+- Audit logging fails → refuse to run
+- Policy requests unenforceable primitive → refuse to run
+
+**No Secrets in Sandbox:**
+- `keychainAccess=false` hardcoded (not overridable by LLM)
+- Prevents access to user's passwords, certificates, tokens
+
+**Audit-First:**
+```python
+audit.emit("run_code", "pending", policy) # BEFORE execution
+if not audit_success:
+ raise SandboxError("Audit failed - refusing to run")
+result = runner.run(policy) # AFTER audit confirmed
+audit.emit("run_code", "success" if result.exit_code == 0 else "failure", result)
+```
+
+### MXC Policy Schema (0.6.0-alpha)
+
+```json
+{
+ "version": "0.6.0-alpha",
+ "containment": "process",
+ "process": {
+ "commandLine": "echo hello",
+ "timeout": 30000
+ },
+ "filesystem": {
+ "readonlyPaths": ["/tmp", "/Users/you/Documents"],
+ "readwritePaths": ["/tmp"]
+ },
+ "network": {
+ "defaultPolicy": "block"
+ },
+ "keychainAccess": false
+}
+```
+
+### Code Structure
+
+| Module | Purpose | Tests |
+|--------|---------|-------|
+| `sandbox/base.py` | SandboxRunner protocol, dataclasses, errors | 19 |
+| `sandbox/policy.py` | Policy builder, ceiling clamping | 12 |
+| `sandbox/binary.py` | Binary resolution, SHA256 verification | 13 |
+| `sandbox/mac.py` | macOS Seatbelt runner | 9 |
+| `sandbox/session.py` | Phase 2 stub (identity binding) | 10 |
+| `tests/test_mcp_run_code.py` | run_code() MCP tool | 10 |
+| `tests/test_write_local_file.py` | Demo tool (unsafe) | 8 |
+
+**Total:** 81 new tests, all passing
+
+### Platform Coverage
+
+| Platform | Backend | Status | Notes |
+|----------|---------|--------|-------|
+| macOS | Seatbelt | ✅ SHIPPED | Requires `--experimental` flag |
+| Windows | processcontainer | ✅ SHIPPED | AppContainer/BaseContainer; default (non-experimental) backend, Win11 24H2+ (build 26100+). Verified against `@microsoft/mxc-sdk` v0.7.0 `wxc-exec.exe`. |
+| Linux | seccomp-bpf | ⏳ TODO (T10) | Optional, lower priority |
+
+### Demonstration Tool
+
+`write_local_file()` — DELIBERATELY UNSAFE tool for security demonstration:
+- No path validation
+- No ceiling enforcement
+- Can write anywhere with user permissions
+- Contrasts with sandboxed `run_code()` to show value
+
+**Demo scenario:**
+```
+UNSAFE: write_local_file(path="~/Desktop/hack.txt", content="pwned")
+ → ✅ Succeeds (DANGEROUS!)
+
+SAFE: run_code(argv=["sh", "-c", "echo pwned > ~/Desktop/hack.txt"],
+ readwrite_paths=["~/Desktop"])
+ → ❌ Blocked (Desktop not in operator ceiling)
+```
+
+## Consequences
+
+### Positive
+
+✅ **Least-privilege execution** — Agents can't access more than operator allows
+✅ **Fail-closed security** — Violations logged and blocked, not silently allowed
+✅ **Platform-enforced** — OS kernel enforces policy, not just Python checks
+✅ **Audit trail** — Every execution logged (pending/success/failure)
+✅ **Future-ready** — Phase 2 stub enables Entra identity binding without refactor
+✅ **Opt-in** — Disabled by default, explicit flag required
+
+### Negative
+
+⚠️ **MXC binary required** — Users must install/build MXC (setup.sh automates)
+⚠️ **macOS only (Phase 1)** — Windows/Linux deferred to later phases
+⚠️ **Local stdin-compat patch on macOS** — Entrabot streams config on stdin, so the
+darwin build uses `scripts/mxc-mac-stdin-compat.patch` on top of upstream
+MXC v0.6.1 until upstream exposes a native stdin config path
+⚠️ **Phase 2 unvalidated** — Identity binding assumptions need verification when APIs GA
+⚠️ **Performance overhead** — Subprocess spawning + policy enforcement adds latency
+
+### Trade-offs
+
+**Chosen:** Positive-allowlist only (no deniedPaths)
+**Rejected:** Deny-list approach (Windows doesn't support deniedPaths)
+**Rationale:** Portable security model across platforms
+
+**Chosen:** Operator ceiling, LLM can only narrow
+**Rejected:** LLM-controlled policy (too dangerous)
+**Rationale:** Learning #54 — LLMs will try to widen access if allowed
+
+**Chosen:** Subprocess execution via MXC binary
+**Rejected:** In-process sandboxing (seccomp in Python)
+**Rationale:** MXC provides cross-platform API, better isolation
+
+**Chosen:** Phase 1 process-level, Phase 2 session-level
+**Rejected:** Wait for session APIs before shipping
+**Rationale:** Ship value now, add identity attribution later
+
+## Alternatives Considered
+
+### 1. No Sandboxing (Status Quo)
+
+**Approach:** Don't add local execution, keep agent cloud-only
+**Pros:** No security risk, simple
+**Cons:** Can't access local files, limits agent utility
+**Rejected:** Users need local file access (document analysis, report generation)
+
+### 2. Python-Only Sandboxing (subprocess, chroot)
+
+**Approach:** Use Python `subprocess` with OS-specific sandbox flags
+**Pros:** No external binary, faster iteration
+**Cons:** Platform-specific code, easy to get wrong, incomplete isolation
+**Rejected:** MXC provides vetted cross-platform sandbox API
+
+### 3. VM/Container Per Execution
+
+**Approach:** Docker container or lightweight VM per `run_code()` call
+**Pros:** Strongest isolation
+**Cons:** Slow (seconds per invocation), heavyweight, complex setup
+**Rejected:** Too slow for interactive agent UX
+
+### 4. WebAssembly Sandbox
+
+**Approach:** Compile Python to WASM, run in sandboxed runtime
+**Pros:** Strong isolation, fast
+**Cons:** Limited syscall access, can't read user files directly
+**Rejected:** User scenarios need native file system access
+
+### 5. Wait for MXC Session API (Phase 2 First)
+
+**Approach:** Block Phase 1 until Entra/MXC session APIs are GA
+**Pros:** Ship complete solution once
+**Cons:** Delays value delivery, APIs may not GA for months
+**Rejected:** Phase 1 process-level sandboxing provides immediate value
+
+## Implementation Plan (COMPLETED)
+
+- [x] **T1**: Base protocol and dataclasses (19 tests)
+- [x] **T2**: Policy building and clamping (12 tests)
+- [x] **T3**: Binary resolution and verification (13 tests)
+- [x] **T4**: macOS Seatbelt runner (9 tests)
+- [x] **T5**: run_code MCP tool (10 tests)
+- [x] **T6**: setup_sandbox.sh script (idempotent, non-fatal)
+- [x] **T6.5**: write_local_file demo tool (8 tests)
+- [x] **T7**: Phase 2 session stub (10 tests)
+- [x] **T8**: Documentation (this ADR)
+- [ ] **T9**: Adversarial integration tests (opt-in)
+- [ ] **T10**: Linux seccomp-bpf runner (optional)
+
+**Test suite:** 1605 passing (81 new for MXC)
+
+## Validation
+
+### Functional Testing
+
+✅ Binary resolution works (MXC_BIN_DIR, npm global, fallback)
+✅ SHA256 verification blocks tampered binaries
+✅ Policy clamping enforces operator ceiling (LLM can't widen)
+✅ macOS runner executes commands and returns results
+✅ run_code tool registers when ENTRABOT_ENABLE_RUN_CODE=1
+✅ Audit logging records pending/success/failure
+✅ Demo tool contrasts unsafe vs safe execution
+
+### Security Testing (T9 - In Progress)
+
+⏳ Symlink escape blocked
+⏳ Path traversal blocked
+⏳ Keychain access denied (keychainAccess=false enforced)
+⏳ Network isolation enforced (defaultPolicy=block)
+⏳ Timeout kills process tree
+⏳ Binary tampering detected and blocked
+
+### User Scenario Testing
+
+**Scenario:** Agent in Teams chat tries to read/write local files
+
+**Setup:**
+```bash
+export ENTRABOT_SANDBOX_READONLY_PATHS=/Users/you/Documents:/tmp
+export ENTRABOT_SANDBOX_READWRITE_PATHS=/tmp
+export ENTRABOT_ENABLE_RUN_CODE=1
+```
+
+**Test cases:**
+| User Request | Agent Tool Call | Outcome |
+|--------------|----------------|---------|
+| "Write file to Documents" | `run_code(..., readwrite_paths=["~/Documents"])` | ❌ BLOCKED (not in ceiling) |
+| "Read file from Documents" | `run_code(..., readonly_paths=["~/Documents"])` | ✅ ALLOWED (in readonly ceiling) |
+| "Write file to /tmp" | `run_code(..., readwrite_paths=["/tmp"])` | ✅ ALLOWED (in readwrite ceiling) |
+
+✅ Demonstrates **least-privilege enforcement** — agent can read Documents but not write
+
+## References
+
+- **Design spec:** `docs/architecture/DESIGN-mxc-sandbox.md`
+- **Platform research:** `docs/platform-learnings/mxc-windows-sandbox.md`
+- **Learning #54:** "Operator sets ceiling, LLM can only narrow" (`docs/runbooks/hard-won-learnings.md`)
+- **Issue #84:** MXC Sandbox Integration (GitHub)
+- **MXC OSS repo:** `github.com/microsoft/mxc` (hypothetical, platform not yet public)
+- **Build 2026 announcement:** Windows Developer Blog, *Windows platform security for AI agents* (2026-06-02)
+
+## Supersedes
+
+- TODOS.md "AppContainer sandbox production implementation" item (now tracked in Issue #84 and this ADR)
+
+## Future Work
+
+### Phase 2: Entra-Bound Session Isolation
+
+**When APIs GA:**
+1. Implement `identity_binding()` (currently raises NotImplementedError)
+2. Bind MXC sessions to Entra Agent User via token
+3. Add per-conversation session isolation (Backend.SESSION)
+4. Integrate Intune governance (policy-controlled capabilities)
+5. Surface M365 audit attribution (agent vs human actions)
+
+**Gating questions to resolve:**
+- Is entrabot's Entra Agent User the same identity MXC attributes to?
+- Can MXC sessions reference external identity providers (Entra)?
+- Does Intune expose agent governance APIs for non-human principals?
+
+### Phase 3: Windows Support (T4) — SHIPPED
+
+- [x] Implement `sandbox/windows.py` with `ProcessContainerRunner`
+- [x] Wire `get_sandbox_runner()` to return it on `win32`
+- [x] Pin real `wxc-exec.exe` SHA256 (`win32-arm64`, `win32-x64`) from `@microsoft/mxc-sdk` v0.7.0
+- [x] Normalize Windows `platform.machine()` (`AMD64`/`ARM64`) for binary lookup + hash key
+- [x] Fix `os.pathsep` ceiling parsing (drive-letter colon no longer shreds paths)
+- [x] Windows-path + arch tests; `setup_sandbox.ps1` provisioning script
+- [x] Preview learnings: `docs/platform-learnings/mxc-windows-sandbox-preview.md`
+
+Differences from macOS captured during the port: config is delivered via
+`--config-base64` (no stdin), `--experimental` is NOT needed (processcontainer is
+a default backend), the parser rejects unknown top-level fields (the stray
+`keychainAccess` field was removed from `build_policy`), `commandLine` runs via
+`CreateProcessW` with no implicit shell (use `cmd /c ...`), and host-based network
+filtering (`allowedHosts`) is unenforced — reflected in `get_capabilities` so
+fail-closed logic refuses it.
+
+### Phase 4: Linux Support (T10)
+
+- Implement `sandbox/linux.py` with SeccompRunner
+- Add Linux-specific tests (seccomp-bpf policy validation)
+- Update setup_sandbox.sh for Linux binary resolution
+
+### Adversarial Testing (T9)
+
+- Symlink escape attempts (e.g., `/tmp/link -> ~/Desktop`)
+- Path traversal (`../../.ssh/id_rsa`)
+- Fork bombs (process limit enforcement)
+- Timing attacks (timeout enforcement)
+- Binary tampering (SHA256 mismatch handling)
+
+## Decision Log
+
+| Date | Decision | Rationale |
+|------|----------|-----------|
+| 2026-06-13 | Integrate MXC for sandboxing | Vetted cross-platform API, OS-enforced isolation |
+| 2026-06-13 | Phase 1 process-level, Phase 2 session-level | Ship value now, add identity later |
+| 2026-06-13 | Positive-allowlist only | Windows doesn't support deniedPaths, portable model |
+| 2026-06-13 | Operator ceiling, LLM narrows only | Learning #54 — prevent LLM from widening access |
+| 2026-06-13 | Disabled by default (opt-in) | Conservative security posture |
+| 2026-06-13 | SHA256 verification mandatory | Prevent tampered binary execution |
+| 2026-06-13 | Audit-first fail-closed | Security over availability |
+| 2026-06-17 | Add demo tool (write_local_file) | Show security value via concrete contrast |
+| 2026-06-17 | Ship Phase 2 stub now | Enable future integration without breaking changes |
+
+---
+
+**Status:** Accepted and implemented (Phase 1 complete, Phase 2 stub shipped)
+**Reviewers:** @brandwe (human operator)
+**Last Updated:** 2026-06-17 by Claude Code
diff --git a/docs/engineering-status.md b/docs/engineering-status.md
index 1356da3..ba0febd 100644
--- a/docs/engineering-status.md
+++ b/docs/engineering-status.md
@@ -1,7 +1,7 @@
# Engineering Status
-**Last updated:** 2026-06-13
-**Status:** v1 released. Two auth modes (Agent User / Delegated) running locally on macOS, Linux, and ARM64 Windows 11. **1,400 passing tests** across the suite (1 skipped), ruff clean. Body-first prompt architecture loads at boot; persona-sati MCP wires personality and memory when configured. ADR-005 cloud-memory Phases 1, 2, 5, 6a shipped — blob storage is opt-in via `setup.sh --use-cloud-memory`. Work IQ Word migration landed (PR #75) and now emits fail-closed audit events for every Work IQ MCP tool call. The `send_teams_message` auto-wait pattern is host-gated and deterministic. Confused-deputy authorization fix in `add_teams_member` / `share_file` shipped via active-sponsor-channel binding (Gate 3) on 2026-06-04. The Teams Bot Gateway mode was removed on 2026-06-08 (ADR-006) — it bypassed Agent Identity and was superseded by Microsoft Agent 365's managed AI teammate. README, docs site, and GitHub Pages auto-deploy refreshed 2026-05-21.
+**Last updated:** 2026-06-18
+**Status:** v1 released. Two auth modes (Agent User / Delegated) running locally on macOS, Linux, and ARM64 Windows 11. **1,605 passing tests** across the suite (16 skipped; 10 known warnings), with one pre-existing ruff failure in `test_demo_simple.py`. Body-first prompt architecture loads at boot; persona-sati MCP wires personality and memory when configured. ADR-005 cloud-memory Phases 1, 2, 5, 6a shipped — blob storage is opt-in via `setup.sh --use-cloud-memory`. Work IQ Word migration landed (PR #75) and now emits fail-closed audit events for every Work IQ MCP tool call. The `send_teams_message` auto-wait pattern is host-gated and deterministic. Confused-deputy authorization fix in `add_teams_member` / `share_file` shipped via active-sponsor-channel binding (Gate 3) on 2026-06-04. The Teams Bot Gateway mode was removed on 2026-06-08 (ADR-006) — it bypassed Agent Identity and was superseded by Microsoft Agent 365's managed AI teammate. README, docs site, and GitHub Pages auto-deploy refreshed 2026-05-21.
---
@@ -13,6 +13,7 @@ Source of truth for detail: `TODOS.md` in the repository root. One line each bel
- **Follow-up: `read_file` content spotlighting** — broader prompt-injection mitigation than the Gate 3 fix. See `TODOS.md` P1.
- **Script-toolkit docs closeout** — `./status.sh` is the canonical entry; finish the remaining script-reference polish and smoke verification. See `TODOS.md` P1.
- **Test isolation: blob env leakage** — `tmp_data_dir` fixture in `tests/tools/test_interaction_log.py` doesn't clear `ENTRABOT_BLOB_ENDPOINT`; 10 tests fail on any machine with blob env configured. Partially addressed: `test_interaction_log.py`, `test_daily_summary.py`, and `test_email_poll.py` fixtures now unset blob env; session-scoped autouse fixture still open.
+- **Windows sandbox local-file commands — needs live validation** (branch `feat/mxc-sandbox-integration`) — `read_local_file`/`write_local_file` built POSIX `cat`/`printf` commands with no platform branch; `wxc-exec.exe` runs `process.commandLine` via `CreateProcessW` with no implicit shell, so `cat` was not found (`0x80070002`). Fixed: Windows read uses `cmd /c type ""`; Windows write uses an inline Python base64 writer (` -c ... ` via `subprocess.list2cmdline`) for byte-exact, injection-safe writes; `mcp_server` now distinguishes a helper spawn-failure from a real policy denial. Unit-tested (command construction + error discrimination, `tests/sandbox/test_local_files.py`, `tests/test_local_file_tools.py`). OPEN: the write path assumes `python.exe`+stdlib load inside the processcontainer (preview only documents a cmd.exe/system-DLL baseline) — needs end-to-end validation against the real binary via `scripts/demo_sandbox.ps1`; fallback if Python is unavailable is a `certutil -decode` cmd approach.
- **MCP server orphans on Claude Code exit** — background poll tasks sit outside FastMCP's lifespan cancel scope; new sessions spawn a second server, both poll Graph independently.
- **Daily summary scheduler — wrong day + double-fire** — UTC-based `target_day` summarizes the brand-new UTC day at 5pm PDT; scheduler fired twice at the same second on 2026-04-17.
@@ -20,8 +21,11 @@ Source of truth for detail: `TODOS.md` in the repository root. One line each bel
Last ~30 days. Full diff: `git log --since="2026-05-04"`.
+- **Shared-Blueprint test-agent provisioning** (2026-06-18, branch `feat/mxc-sandbox-integration`) — `scripts/setup.sh` now supports `--new --use-blueprint=` to create a fresh Agent Identity + Agent User under an existing Blueprint instead of forcing a second Blueprint. Added `--state-file` and `--env-file` so production and E2E test chains can live side by side (for example `.entrabot-state-mxc-test.json` + `.env.mxc-test`) without overwriting the primary setup. `scripts/create_entra_agent_ids.py` now honors a pinned Blueprint App ID for this flow, and `scripts/entra_provisioning.py` can read/write an override state path via `ENTRABOT_STATE_FILE`. +25 targeted tests across `tests/scripts/test_a365_setup_prereqs.py`, `tests/scripts/test_create_entra_agent_ids.py`, and `tests/scripts/test_entra_provisioning.py`.
- **A365 Work IQ audit attribution** (2026-06-13, branch `security/a365-audit-attribution`) — `WorkIqProvider.call_tool` now logs pending/success/failure audit events around every Work IQ MCP call before touching customer SharePoint/OneDrive/Word resources. Audit metadata records only `{server, tool}` — never argument keys or values — and audit failure prevents the MCP call. Resource handle is a stable `a365.{server}.{tool}` string; operators correlate by action+timestamp+agent_id and walk over to Graph server-side logs for document-level detail. +6 tests in `tests/a365/test_provider.py`.
+- **Real MXC macOS Seatbelt binary built from source** (2026-06-18, branch `feat/mxc-sandbox-integration`) — replaced the 703-byte mock at `.mxc-build/target/release/mxc-exec-mac` with a 1.6 MB `mxc-exec-mac` built from `microsoft/mxc` v0.6.1 (commit `161598fd08a4fdd030f461de19af23ce4a310b41`). Added `scripts/mxc-mac-stdin-compat.patch` so Entrabot's existing stdin-driven `SeatbeltRunner` works against the real Seatbelt backend, updated `scripts/setup_sandbox.sh`, and pinned the new darwin-arm64 SHA256 in `src/entrabot/sandbox/binary.py`.
- **Teams chat poll cursor persistence (issue #17)** (2026-06-09) — per-chat poll cursor (`last_ts`, `seen_ids_tail`, `bootstrapped`) now persists through `MemoryBackend` at `chat_cursors/.json`. Fixes the "11-day-old replay flood" symptom — every MCP restart used to re-bootstrap from "newest message at boot" and silently drop messages that arrived during a server-down window. 24-hour staleness cap on `last_ts` re-baselines genuinely-old chats instead of surfacing stale messages as live. Debounced 1s async save coalesces bursts; graceful shutdown flushes dirty cursors. New module `src/entrabot/tools/chat_cursors.py`. +35 tests across `tests/tools/test_chat_cursors.py` and `tests/test_mcp_server_chat_cursors.py`.
+- **Cursor staleness keyed off write-time, not message-time (issue #17 follow-up)** (2026-06-26, branch `fix/cursor-staleness-uses-write-time`) — `chat_cursors.is_stale()` measured staleness from `last_ts` (the newest-*message* watermark) instead of `last_written_at` (when the cursor was persisted). Any chat idle >24h was therefore judged permanently stale and re-bootstrapped on every MCP restart — and `_bootstrap_chat` deliberately leaves the newest message unseen, so that weeks-old message got re-pushed as if it were live. With ~50 idle chats and frequent restarts (amplified by the open MCP-disconnect issue) this produced a flood of stale replays. Fix: `is_stale` now takes `last_written_at`; both call sites (`mcp_server._register_watched_chat`, `body_bootstrap._cursor_freshness`) pass the write timestamp. The 24h cap still re-baselines after a genuine long downtime. +1 regression test (`test_idle_chat_recent_write_rehydrates_despite_old_last_ts`); two tests that encoded the old behavior corrected. Full suite green (1527 passed).
- **Confused-deputy fix: active-sponsor-channel binding (Gate 3)** (2026-06-04, branch `fix/msrc-active-sponsor-channel-binding`) — closes Chain A in `add_teams_member` and `share_file`. New `ActiveChannelBindings` store keyed by Graph `user_id`, TTL on `graph_sent_at` (not server-observed time) to defend bootstrap-replay, updated only after `write_stream.send()` succeeds. `share_file` refactored to audit-first so gate failures emit audit events. Audit metadata records both `supplied_chat_id` and `bound_chat_id`. +50 tests across `tests/identity/test_active_channel.py`, `tests/test_mcp_push_channel_binding.py`, `tests/tools/test_add_member_channel_binding.py`, `tests/tools/test_share_file_channel_binding.py`. Hard-won learning #67. Follow-up: two-phase confirmation for Chain B (tracked in TODOS P1).
- **`read_email` MCP tool** (2026-05-27) — fetches the full body + all recipient lists + headers of an inbound mail by `message_id`. Fixes the gap where the 60s email-poll channel push truncates the preview of long forwarded mails. Same three-hop Agent User token + `Mail.Read` scope as the poll. +7 tests.
- **Email cursor sub-second precision** (2026-05-27) — `advance_cursor()` bumps the poll watermark by 1 ms so Graph's `gt` filter does not re-fetch messages at the cursor's exact second after a server restart.
diff --git a/docs/guides/mxc-sandbox-demo-windows.md b/docs/guides/mxc-sandbox-demo-windows.md
new file mode 100644
index 0000000..724465d
--- /dev/null
+++ b/docs/guides/mxc-sandbox-demo-windows.md
@@ -0,0 +1,243 @@
+# Demo Walkthrough — EntraBot × MXC Sandbox on Windows
+
+> The Windows run-of-show for demonstrating OS-enforced, least-privilege local
+> execution — the counterpart to the macOS Seatbelt demo. Everything below was
+> verified against the **real** `wxc-exec.exe` (`@microsoft/mxc-sdk` v0.7.0) on
+> Windows 11 24H2+ with the `processcontainer` backend.
+
+**The one-line story** (say this at the top and the bottom):
+
+> *"The agent has its own Entra identity and can read what you allow — but the
+> **OS**, not the agent's good behavior, stops it from writing where it
+> shouldn't. Least privilege, enforced by the kernel, attributed to the agent,
+> audited before every action."*
+
+---
+
+## 0. What the audience will see (and why it lands)
+
+Three layers of proof, from "always works" to "BUILD-stage flashy":
+
+| Layer | What it shows | Needs admin? |
+|---|---|---|
+| **A. The harness** (`demo_sandbox.ps1`) | The clamp dropping out-of-ceiling paths to `[]`, then `BLOCKED by the Windows kernel — Access is denied` inline. The money-shot. | No |
+| **B. `wxc-exec --debug`** | The *resolved policy* the kernel enforces (`readwrite_paths`, `denied_paths`, `containment: processcontainer`, `selected isolation tier`). | No |
+| **C. `mxc-diagnostic-console` (elevated)** | The **live ETW event stream** from the MXC OS provider as each sandbox runs — the Build-2026-stage "watch the kernel" view. | **Yes** |
+
+Run **A** for everyone; drop to **B** when a developer asks "what does the
+policy actually look like?"; run **C** in a second elevated window for the full
+effect.
+
+---
+
+## 1. Prerequisites (one-time)
+
+```powershell
+# From the repo root, in PowerShell:
+
+# 1. Provision the MXC binary + pin its SHA256 + write .env defaults.
+.\scripts\setup_sandbox.ps1
+
+# 2. (Recommended) Stabilize the processcontainer tier. On boxes where MXC falls
+# back to the AppContainer+DACL tier, the sandbox can't read C:\ root
+# metadata, so cmd.exe/pwsh.exe startup can intermittently fail. This grants
+# the minimal metadata ACEs and makes the demo rock-solid. Run ELEVATED:
+# (Right-click PowerShell -> Run as administrator)
+& "$env:MXC_BIN_DIR\arm64\wxc-host-prep.exe" prepare-system-drive # or \x64\ on Intel
+
+# 3. Confirm the operator ceiling in .env. On Windows, paths are ';'-separated:
+# ENTRABOT_ENABLE_RUN_CODE=1
+# MXC_BIN_DIR=...\.mxc-build\npm\node_modules\@microsoft\mxc-sdk\bin
+# ENTRABOT_SANDBOX_READONLY_PATHS=C:\Users\you\Documents;%TEMP%
+# ENTRABOT_SANDBOX_READWRITE_PATHS=%TEMP%;C:\Users\you\Downloads
+# ENTRABOT_SANDBOX_NETWORK=block
+```
+
+> **Check the tier:** `& "$env:MXC_BIN_DIR\arm64\wxc-exec.exe" --probe` prints the
+> selected isolation tier and `uiCapabilities` as JSON. `processcontainer` is the
+> default, non-experimental backend on Windows 11 24H2+ (build 26100+); no
+> `--experimental` flag is needed.
+
+---
+
+## 2. Part 1 — Local proof harness (screen-share)
+
+This drives the real binary through the **exact** `run_code` enforcement chain
+the MCP server uses (operator ceiling → clamp → canonicalize → MXC) and narrates
+each beat.
+
+```powershell
+.\scripts\demo_sandbox.ps1 # press Enter between beats (live)
+.\scripts\demo_sandbox.ps1 -NoPause # straight through (recording / CI)
+.\scripts\demo_sandbox.ps1 -ConfigOnly # just show the operator ceiling + backend
+```
+
+**What to say as it runs:**
+
+1. *"The operator sets a ceiling in `.env`. The agent can only narrow it, never
+ widen it."*
+2. **READ Documents** → *"The agent can read your files for analysis."* ✅
+3. **WRITE Documents** → *"It tries to tamper — watch the clamp drop the path to
+ `[]`, and the kernel says no."* ⛔ (`Access is denied.`)
+4. **WRITE %TEMP% + Downloads** → *"Scoped output dirs the operator allowed."* ✅
+5. **WRITE C:\Windows** → *"It can't reach the OS itself — dropped and blocked."* ⛔
+
+The harness prints, per scenario, the **clamp decision** (`dropped WRITE
+C:\Users\you\Documents (outside operator ceiling)`), the **exact policy sent to
+MXC**, and the **kernel verdict** (`[x] BLOCKED by the Windows kernel exit=1
+reason: Access is denied.`).
+
+---
+
+## 3. Part 2 — Show the enforcement internals (developer beat)
+
+When someone asks "but what is actually enforced?", run the real binary with
+`--debug` on a blocked write and point at the resolved policy:
+
+```powershell
+$cfg = '{"version":"0.6.0-alpha","containment":"process","process":{"commandLine":"cmd /c echo HACK > \"C:\\Users\\you\\Documents\\hack.txt\"","timeout":15000},"filesystem":{"readonlyPaths":[],"readwritePaths":["%TEMP%"]},"network":{"defaultPolicy":"block"}}'
+$b64 = [Convert]::ToBase64String([Text.Encoding]::UTF8.GetBytes($cfg))
+& "$env:MXC_BIN_DIR\arm64\wxc-exec.exe" --debug --config-base64 $b64
+```
+
+It prints the full resolved `ExecutionRequest`, including:
+
+```
+ "containment": "processcontainer",
+ "readwrite_paths": [ ... only what the operator allowed ... ],
+ "readonly_paths": [],
+ "denied_paths": [],
+ "default_network_policy": "block",
+selected isolation tier: appcontainer-dacl
+```
+
+> ⚠️ `--debug` wraps the process and returns **exit 0** for the diagnostic run —
+> do **not** use `--debug` to judge allow/block. Without it, a blocked write
+> returns **exit 1 + `Access is denied.`** (this is what the harness relies on).
+
+---
+
+## 4. Part 3 — The live "watch the kernel" view (elevated, Build-stage)
+
+The Windows analog to macOS's `log stream` is **`mxc-diagnostic-console.exe`**,
+which streams the **MXC OS-provider ETW events** plus pipe log messages from
+`wxc-exec`. It **requires Administrator** for two reasons we verified:
+
+- ETW capture (`StartTraceW`) needs admin.
+- `wxc-exec` refuses to send diagnostics to a console running below **High
+ integrity** (i.e. a non-elevated console) as a security measure.
+
+**Window A — the live console (Run as administrator):**
+
+```powershell
+$env:MXC_DIAG_CONSOLE = "1"
+& "$env:MXC_BIN_DIR\arm64\mxc-diagnostic-console.exe" --verbose
+# add --collect to also zip a timestamped capture into %TEMP% on Ctrl+C
+```
+
+**Window B — also elevated, same session, so `wxc-exec` talks to the console:**
+
+```powershell
+$env:MXC_DIAG_CONSOLE = "1"
+.\scripts\demo_sandbox.ps1 -NoPause
+```
+
+As each scenario runs, the console shows `wxc-exec` connect/disconnect and the
+OS-provider events for the allowed vs. denied file operations in real time. Pause
+on the **WRITE Documents** beat so the audience watches the denial appear live.
+
+> **No-admin fallback:** if you can't elevate, the harness (Part 1) and `--debug`
+> (Part 2) already prove containment without ETW. You can also run **Process
+> Monitor** (Sysinternals `procmon`) filtered to `Result is ACCESS DENIED` — the
+> classic Windows "kernel said no" red rows — as a familiar alternative.
+
+---
+
+## 5. Part 4 — Do it live in Teams (the identity beat)
+
+This is where EntraBot's half of the thesis shows: the contained execution is
+attributed to the **agent's own Entra identity**, not yours. Chat with the agent
+(`entrabot-agent@werner.ac`) in Teams, in plain language. It calls
+`run_code` / `read_local_file` / `write_local_file` under the hood.
+
+**Agent first-person script** (what the agent says back, mirroring the Mac demo):
+
+> 👋 Hi — I'm **entrabot-agent**, an agent with my own Entra identity. I'm going
+> to try to touch files on this Windows PC. Everything I run is contained by the
+> MXC sandbox (Windows `processcontainer`), and the limits were set by you, the
+> operator — I can only narrow them, never widen them.
+>
+> **Operator ceiling**
+> 📖 read-only: `C:\Users\you\Documents`, `%TEMP%`
+> ✏️ read-write: `%TEMP%`, `C:\Users\you\Downloads`
+>
+> **1) Read your Documents.** Documents is in my read-only ceiling, so this is
+> allowed.
+> ✅ I read it: *"SECRET: quarterly numbers the agent may read but must not alter."*
+>
+> **2) Write to your Documents.** Documents is not in my read-write ceiling — the
+> policy clamps it to `[]`, and the OS kernel blocks the write.
+> ⛔ Blocked — *"Access is denied."*
+>
+> **3) Write a scratch report to %TEMP%.** TEMP is in my read-write ceiling.
+> ✅ Wrote it: *scratch report.*
+>
+> **4) Write an export to your Downloads.** Downloads is in my read-write ceiling.
+> ✅ Wrote it: *export data.*
+>
+> 🔒 **Recap:** I could read your Documents but could not write to them — even
+> though I asked. I could write only where you allowed (`%TEMP%`, Downloads). The
+> boundary is enforced by the OS, attributed to my identity, and logged before
+> each action. That's least-privilege for agentic work on your device.
+
+**Suggested prompts to type in Teams:**
+
+1. *"Read my file at `~\Documents\entrabot-secret.txt` and tell me what it says."*
+ → Agent reads it. Point out: Documents is read-only in the ceiling.
+2. *"Now save the text `hello` to `~\Documents\note.txt`."*
+ → Blocked. The agent reports it can't write there. Show the audit log.
+3. *"Write a short summary to `~\Downloads\summary.txt` instead."*
+ → Works. Downloads is in the read-write ceiling.
+
+> **Make `run_code` the agent's only path to the disk.** MXC contains code run
+> *through the entrabot tools* — not your host's built-in `Bash`/`Write`/`Edit`.
+> For an honest demo, disable the host's built-in file/shell tools (Copilot CLI:
+> `--deny-tool`/`--available-tools`; Claude Code: `--disallowedTools "..."`). See
+> [the sandbox guide](mxc-sandbox.md#critical-the-sandbox-contains-run_code-not-the-agent).
+
+---
+
+## 6. The honest caveat (say it — it builds trust)
+
+MXC is an **early preview** and Microsoft is explicit that *"no MXC profiles
+should be treated as security boundaries currently."* In this demo MXC is
+**defense-in-depth** layered *under* EntraBot's existing identity, attribution,
+and audit gates — it never relaxes one. The filesystem enforcement you're
+watching is real and kernel-backed; the maturity bar for "trusted boundary" is
+still ahead (micro-VM / session isolation tiers on the roadmap).
+
+---
+
+## 7. Troubleshooting
+
+| Symptom | Cause / Fix |
+|---|---|
+| `Sandbox unavailable` / binary not found | Run `.\scripts\setup_sandbox.ps1`; confirm `MXC_BIN_DIR`. |
+| `Untrusted binary` (SHA mismatch) | The binary changed but `PINNED_HASHES` wasn't updated. Re-run `setup_sandbox.ps1` (it re-pins). |
+| An **allowed** write intermittently fails (exit 1) | AppContainer+DACL tier can't stat `C:\` root, so `cmd.exe` startup flakes. Run `wxc-host-prep prepare-system-drive` **elevated** (Prereqs step 2). |
+| Ceiling paths look shredded (`C` and `\Users\...`) | Old colon-split bug; ensure you're on this branch (ceiling is parsed with `os.pathsep` = `;` on Windows). |
+| Diagnostic console shows no events | Not elevated. ETW + the High-integrity pipe both require **Run as administrator**, and set `MXC_DIAG_CONSOLE=1` in **both** windows. |
+| `&&` errors running a command | `wxc-exec` runs `commandLine` via `CreateProcessW` (no shell). Wrap shell syntax in `cmd /c "..."`. |
+| Read shows stray `` bytes | A UTF-8 BOM in the fixture file; write fixtures as ASCII / UTF-8-no-BOM. |
+
+---
+
+## 8. Reference
+
+- Harness: [`scripts/demo_sandbox.ps1`](../../scripts/demo_sandbox.ps1) ·
+ engine: [`scripts/demo_sandbox_run.py`](../../scripts/demo_sandbox_run.py)
+- Setup: [`scripts/setup_sandbox.ps1`](../../scripts/setup_sandbox.ps1)
+- Sandbox guide: [`mxc-sandbox.md`](mxc-sandbox.md)
+- What the Windows preview actually exposes:
+ [`mxc-windows-sandbox-preview.md`](../platform-learnings/mxc-windows-sandbox-preview.md)
+- Decision record: [ADR-007](../decisions/007-mxc-sandbox-integration.md)
diff --git a/docs/guides/mxc-sandbox.md b/docs/guides/mxc-sandbox.md
new file mode 100644
index 0000000..d8615a0
--- /dev/null
+++ b/docs/guides/mxc-sandbox.md
@@ -0,0 +1,335 @@
+# MXC Sandbox — Contained Local Code Execution
+
+Give your agent the ability to run code on the local machine **without** giving it
+the run of your filesystem. entrabot integrates
+[Microsoft Execution Containers (MXC)](https://github.com/microsoft/mxc) so a
+`run_code` tool executes inside an OS-enforced sandbox (Apple **Seatbelt** on macOS).
+You — the operator — set a capability ceiling in plain config; the agent can only
+ever *narrow* it, and the OS kernel enforces the result.
+
+- **Opt-in.** Disabled by default; you enable it explicitly.
+- **Positive allow-list.** The agent gets nothing it isn't granted (no network, no
+ filesystem, no Keychain by default).
+- **The model can't widen its box.** Requests are clamped to the operator ceiling.
+- **Fail-closed + audited.** Every call is audit-logged before it runs; if audit
+ can't record, the action doesn't proceed.
+
+> Decision record: [ADR-007](../decisions/007-mxc-sandbox-integration.md) ·
+> Platform research: [`mxc-windows-sandbox.md`](../platform-learnings/mxc-windows-sandbox.md)
+
+Phase 1 ships **macOS (Seatbelt)** and **Windows (`processcontainer`)**. Linux
+seccomp-bpf is on the roadmap. The Windows path is documented inline below where it
+differs; see also
+[`mxc-windows-sandbox-preview.md`](../platform-learnings/mxc-windows-sandbox-preview.md)
+for what the Windows preview build actually exposes, and run
+[`scripts/setup_sandbox.ps1`](../../scripts/setup_sandbox.ps1) (the PowerShell
+counterpart to `setup_sandbox.sh`) to provision `wxc-exec.exe` and pin its hash.
+
+> **Windows notes.** Ceiling lists are **`;`-separated** (`os.pathsep`), not
+> colon-separated. `wxc-exec.exe` runs commands via `CreateProcessW` with **no
+> implicit shell**, so invoke builtins/redirection as `cmd /c ...`. The
+> `processcontainer` backend is default (no `--experimental`) on Win11 24H2+.
+
+---
+
+## How it works
+
+```
+ Operator config (.env) ┌──────────────────────────────┐
+ ENTRABOT_SANDBOX_READONLY_PATHS ─────► │ run_code tool (mcp_server.py)│
+ ENTRABOT_SANDBOX_READWRITE_PATHS ─────► │ reads the ceiling from env │
+ └───────────────┬──────────────┘
+ Agent's request (paths it wants) ──────────────────────►│
+ ┌───────────────▼──────────────┐
+ │ clamp_to_ceiling (policy.py) │
+ │ request ∩ ceiling → narrower │ ← agent can only narrow
+ └───────────────┬──────────────┘
+ ┌───────────────▼──────────────┐
+ │ mxc-exec-mac (SHA256-pinned) │
+ │ → Seatbelt profile │
+ └───────────────┬──────────────┘
+ ┌───────────────▼──────────────┐
+ │ macOS kernel enforces; denies │
+ │ logged: deny(1) file-write-… │
+ └──────────────────────────────┘
+```
+
+The rules are read **on every call** from the environment — never from the model.
+
+---
+
+## HOWTO: enable the sandbox
+
+### Prerequisites
+
+- **macOS** (Phase 1). Apple Silicon or Intel.
+- A working entrabot agent (`./scripts/setup.sh` already run). See the
+ [Quickstart](../getting-started/quickstart.md).
+- To **build** the MXC binary from source: **Rust 1.93+** (`https://rustup.rs/`).
+ (If you already have a prebuilt `mxc-exec-mac` on `MXC_BIN_DIR` or via npm, the
+ build step is skipped.)
+
+### Step 1 — Build and configure the sandbox
+
+```bash
+./scripts/setup_sandbox.sh
+```
+
+This script is idempotent and does five things:
+
+1. **Finds or builds** the MXC binary. If not already present, it clones
+ [`microsoft/mxc`](https://github.com/microsoft/mxc) at the pinned tag
+ (`v0.6.1`, commit `161598f…`), applies the bundled
+ [stdin-compat patch](../../scripts/mxc-mac-stdin-compat.patch), and `cargo`-builds
+ `mxc-exec-mac` into `.mxc-build/target/release/`.
+2. **Code-signs** the binary (ad-hoc) so macOS will run it.
+3. **Pins its SHA256** into
+ [`src/entrabot/sandbox/binary.py`](../../src/entrabot/sandbox/binary.py)
+ (`PINNED_HASHES`). At runtime the binary is verified against this hash and refused
+ if it doesn't match — a tampered enforcer can't be swapped in.
+4. **Writes the sandbox config** into `.env` (see Step 2).
+5. Prints a summary (binary path, hash, env).
+
+> Flags: `--force-build` rebuilds even if a binary exists; `--skip-sign` skips
+> code-signing. Run `./scripts/setup_sandbox.sh --help` for details.
+
+### Step 2 — Set your operator ceiling
+
+`setup_sandbox.sh` writes safe defaults to `.env` (everything scoped to `/tmp`). Edit
+these to grant exactly what your agent needs — **directories**, colon-separated:
+
+```dotenv
+# Turn the sandboxed run_code tool on
+ENTRABOT_ENABLE_RUN_CODE=1
+
+# Where the verified binary lives (written for you)
+MXC_BIN_DIR=/absolute/path/to/.mxc-build/target/release
+
+# The ceiling — the MOST the agent may ever touch. The agent can only narrow this.
+ENTRABOT_SANDBOX_READONLY_PATHS=/Users/you/Documents:/tmp # may READ
+ENTRABOT_SANDBOX_READWRITE_PATHS=/tmp:/Users/you/Downloads # may WRITE
+
+# Guardrails
+ENTRABOT_SANDBOX_TIMEOUT_MS=30000 # max wall-clock per execution
+ENTRABOT_SANDBOX_NETWORK=block # block | allow (default block)
+```
+
+Guidance:
+
+- **Grant the least you can.** Prefer a scratch output dir in `READWRITE_PATHS` and a
+ read-only project tree in `READONLY_PATHS`.
+- Use **absolute paths**. `~` and symlinks are resolved (canonicalized) before the
+ containment check, so a request can't escape a granted directory via a symlink.
+- Leaving a list **empty** means *no* access of that kind. There is no implicit
+ default — default-deny is total.
+- **Keychain access is hard-disabled** and not overridable by the agent or config.
+
+### Step 3 — Restart the MCP server
+
+Config is read at server boot. Restart your host (e.g. Claude Code / Copilot CLI) so
+the `entrabot` MCP server picks up the new `.env`. Confirm the tool is registered:
+
+```bash
+# The run_code tool only appears when ENTRABOT_ENABLE_RUN_CODE=1
+claude mcp list # entrabot server should show ✓ Connected
+```
+
+> ### ⚠️ Critical: the sandbox contains `run_code`, not "the agent"
+>
+> MXC sandboxes code executed **through the `run_code` tool**. It does **not**
+> contain your *host* (Claude Code, Copilot CLI, Codex, …), which ships its own
+> built-in `Bash`/`Edit`/`Write`/`Read` tools with full, unsandboxed disk access.
+> If those remain enabled, the agent will simply use them and bypass the sandbox
+> entirely — `run_code` is then just *one* door in an open house.
+>
+> **For the containment to be real, make `run_code` the agent's only path to the
+> filesystem** by disabling the host's built-in file/shell tools.
+>
+> **Claude Code** (verified): deny the built-ins — do **not** use `--tools ""`,
+> which removes the *MCP* tools (including `run_code`) and leaves the built-ins:
+>
+> ```bash
+> claude --dangerously-load-development-channels server:entrabot \
+> --disallowedTools "Bash Write Edit NotebookEdit Read Glob Grep WebFetch WebSearch Task"
+> ```
+>
+> With this, `run_code` still works but a direct `Write` returns
+> *"No such tool available"* and the file is never created.
+>
+> **Copilot CLI**: use `--available-tools` (allow-list) or `--deny-tool` to the
+> same effect.
+>
+> **This is a real trade-off, not a tweak.** Stripping the built-ins makes the
+> agent MCP-only — it keeps every entrabot tool (Teams, email, Files-via-Graph,
+> `run_code`) but loses general local coding (arbitrary file edits, shell). Run
+> the *contained* configuration in a **dedicated session**; keep your everyday
+> agent fully tooled. Whole-agent containment that *keeps* the powerful tools is a
+> separate model (a dedicated OS user / VM the agent runs as) — see
+> [ADR-007](../decisions/007-mxc-sandbox-integration.md) Phase 2.
+>
+> As a defense-in-depth backstop, entrabot's own deliberately-unsafe
+> `write_local_file` tool is **off by default** and only registered when
+> `ENTRABOT_ENABLE_UNSAFE_WRITE=1`. Leave it unset.
+
+### Step 4 — Verify it works
+
+Show the active configuration (operator's view):
+
+```bash
+./scripts/demo_sandbox.py --config-only
+```
+
+Run the full enforcement check against the **real** binary (narrated, no agent
+required):
+
+```bash
+./scripts/demo_sandbox.py # interactive, pauses between beats
+./scripts/demo_sandbox.py --no-pause # straight through
+```
+
+It exercises: read an allowed dir ✅, write a *disallowed* dir ⛔ (blocked by the
+kernel), write allowed dirs ✅, and a symlink-escape attempt ⛔.
+
+To watch the kernel enforce in real time, stream Seatbelt denials in another window:
+
+```bash
+log stream --predicate 'eventMessage CONTAINS "deny(" AND eventMessage CONTAINS "file-write"' --style compact
+```
+
+A blocked write prints instantly:
+
+```
+kernel (Sandbox) Sandbox: bash(NNNNN) deny(1) file-write-create /Users/you/Documents/note.txt
+```
+
+---
+
+## Using it
+
+Enabling the sandbox registers three tools, all gated behind
+`ENTRABOT_ENABLE_RUN_CODE` and all enforced by the same operator ceiling:
+
+- **`read_local_file(path)`** — read a file on the user's local disk.
+- **`write_local_file(path, content)`** — write/save a file on the local disk.
+- **`run_code(argv, …)`** — run an arbitrary command/script in the sandbox.
+
+The two purpose-named file tools exist because models select tools by intent:
+they reliably reach for `read_local_file` / `write_local_file` when asked to
+"read" or "save" a local file, whereas a single generic `run_code` got skipped
+for writes (the model routed "save a file" to the cloud OneDrive tools). All
+three share the identical clamp → realpath → Seatbelt machinery.
+
+In practice you just ask the agent, e.g. in Teams:
+
+- *"Read `~/Documents/report.md` and summarize it."* → `read_local_file`; allowed
+ if `~/Documents` is in `READONLY_PATHS`.
+- *"Save the summary to `~/Documents/summary.md`."* → `write_local_file`;
+ **blocked** unless `~/Documents` is in `READWRITE_PATHS` (the kernel returns
+ `Operation not permitted` and nothing is written).
+- *"Write it to `~/Downloads/summary.md` instead."* → `write_local_file`; allowed
+ if `~/Downloads` is in `READWRITE_PATHS`.
+
+`run_code` takes a structured `argv` (no shell string) plus optional
+`readonly_paths` / `readwrite_paths` (to *narrow* the ceiling) and `timeout_ms`.
+The file tools just take a `path` (and `content` for writes). See the
+[MCP tool reference](../reference/mcp-tools.md).
+
+> A deliberately-**unsafe** contrast tool, `unsafe_write_local_file`, bypasses the
+> sandbox and writes anywhere. It is off by default and only registered when
+> `ENTRABOT_ENABLE_UNSAFE_WRITE=1`; leave it unset outside teaching demos.
+
+---
+
+## Configuration reference
+
+| Variable | Default | Meaning |
+|----------|---------|---------|
+| `ENTRABOT_ENABLE_RUN_CODE` | *(unset = off)* | `1` registers the `run_code` tool. Off by default. |
+| `MXC_BIN_DIR` | *(written by setup)* | Directory containing the verified `mxc-exec-mac`. |
+| `ENTRABOT_SANDBOX_READONLY_PATHS` | `/tmp` | Colon-separated dirs the agent may read. |
+| `ENTRABOT_SANDBOX_READWRITE_PATHS` | `/tmp` | Colon-separated dirs the agent may read **and** write. |
+| `ENTRABOT_SANDBOX_TIMEOUT_MS` | `30000` | Max wall-clock per execution (ms). |
+| `ENTRABOT_SANDBOX_NETWORK` | `block` | `block` (no egress) or `allow`. |
+| *Keychain* | *off* | Hard-disabled in code; not configurable. |
+
+---
+
+## The security model (why you can trust it)
+
+- **Operator ceiling, model narrows.**
+ [`clamp_to_ceiling`](../../src/entrabot/sandbox/policy.py) intersects the agent's
+ requested paths with your ceiling. The worst the agent can do is ask for *less*.
+- **Canonicalize-then-contain.** Paths are `realpath`-resolved before the containment
+ check, so a symlink inside a granted directory can't point outside it. (Details and
+ an upstream note for the MXC team:
+ [macOS symlink canonicalization](../platform-learnings/mxc-upstream-feedback-macos-symlinks.md).)
+- **Tamper-evident binary.** The runner verifies the binary's SHA256 against
+ `PINNED_HASHES` in [`binary.py`](../../src/entrabot/sandbox/binary.py) and refuses a
+ mismatch.
+- **Fail-closed.** If the policy needs a primitive the backend can't enforce, or audit
+ can't record, the execution is refused — not silently allowed.
+- **Kernel-enforced.** The deny is a real syscall denial in the macOS unified log, not
+ a Python check.
+
+---
+
+## Advanced: a throwaway test agent
+
+To exercise the sandbox without touching your production agent's Teams presence, run a
+second, isolated agent that shares the Blueprint but has its own Agent User and data
+dir:
+
+1. Provision a fresh Agent Identity + Agent User under the existing Blueprint:
+ ```bash
+ ./scripts/setup.sh --new --use-blueprint= \
+ --agent-user-upn=entrabot-test@yourtenant.com \
+ --state-file=.entrabot-state-test.json \
+ --env-file=.env.test
+ ```
+ (See the [setup-script reference](../reference/setup-script.md).)
+2. In `.env.test`, add the sandbox vars from Step 2 **plus** an isolated data dir so
+ it won't collide with production's singleton lock or local memory:
+ ```dotenv
+ ENTRABOT_KEEP_MEMORY_LOCAL=true
+ ENTRABOT_DATA_DIR=/Users/you/.entrabot-test
+ ```
+3. Point the MCP server at it via `ENTRABOT_ENV_FILE`. The runtime honors this
+ override (falling back to `./.env`):
+ ```jsonc
+ // .mcp.json
+ { "mcpServers": { "entrabot-test": {
+ "type": "stdio",
+ "command": "/abs/path/.venv/bin/entrabot-mcp",
+ "env": { "ENTRABOT_ENV_FILE": "/abs/path/.env.test" }
+ }}}
+ ```
+ Verify with `claude mcp list` (expect `✓ Connected`).
+
+---
+
+## Troubleshooting
+
+| Symptom | Cause / Fix |
+|---------|-------------|
+| `run_code` tool missing | `ENTRABOT_ENABLE_RUN_CODE` isn't `1`, or the server wasn't restarted after editing `.env`. |
+| Agent ignored the sandbox / wrote anyway | The host's built-in `Edit`/`Bash`/`Write` tools were enabled and the agent used those instead of `run_code`. Disable them (see *Critical: the sandbox contains run_code, not "the agent"* above). |
+| `run_code` disappeared after adding `--tools ""` | `--tools ""` disables **MCP** tools (incl. `run_code`) and is the wrong flag. Use `--disallowedTools "Bash Write Edit NotebookEdit Read Glob Grep WebFetch WebSearch Task"` instead. |
+| `Sandbox unavailable` / binary not found | `MXC_BIN_DIR` is unset/wrong, or the binary wasn't built. Re-run `./scripts/setup_sandbox.sh`. |
+| `Untrusted binary` (SHA mismatch) | The binary changed but `PINNED_HASHES` wasn't updated. Re-run `setup_sandbox.sh` (it re-pins), or rebuild from the pinned commit. |
+| A write to `/tmp` is denied in raw policy JSON | macOS `/tmp`→`/private/tmp` symlink. The `run_code` chain canonicalizes paths, so this only bites hand-written policy JSON. See the [upstream note](../platform-learnings/mxc-upstream-feedback-macos-symlinks.md). |
+| `cargo not found` during build | Install Rust 1.93+ from `https://rustup.rs/`. |
+| `entrabot` shows `✗ Failed to connect` in `claude mcp list` | Another entrabot instance (same `ENTRABOT_DATA_DIR`) already holds the singleton lock. Stop it, or give the second agent its own data dir (see *Advanced*). |
+| `TypeError: unsupported operand type … '\|'` running a script | A script ran under the system `python3` (3.9). entrabot needs 3.12+; run from the repo so the script re-execs into `.venv/bin/python3`. |
+
+---
+
+## Reference
+
+- [ADR-007 — MXC sandbox integration](../decisions/007-mxc-sandbox-integration.md)
+- [MXC platform research](../platform-learnings/mxc-windows-sandbox.md)
+- [Upstream note: macOS symlink canonicalization](../platform-learnings/mxc-upstream-feedback-macos-symlinks.md)
+- Code: [`src/entrabot/sandbox/`](../../src/entrabot/sandbox/) — `policy.py` (clamp +
+ canonicalization), `mac.py` (Seatbelt runner), `binary.py` (SHA256 pin),
+ `mcp_server.py` (`run_code` tool)
+- Helper: [`scripts/setup_sandbox.sh`](../../scripts/setup_sandbox.sh) ·
+ [`scripts/demo_sandbox.py`](../../scripts/demo_sandbox.py)
diff --git a/docs/platform-learnings/mxc-upstream-feedback-macos-symlinks.md b/docs/platform-learnings/mxc-upstream-feedback-macos-symlinks.md
new file mode 100644
index 0000000..68dd992
--- /dev/null
+++ b/docs/platform-learnings/mxc-upstream-feedback-macos-symlinks.md
@@ -0,0 +1,169 @@
+# Upstream feedback for `microsoft/mxc` — macOS symlink canonicalization at the policy boundary
+
+**Audience:** the MXC maintainers (`github.com/microsoft/mxc`).
+**From:** the entrabot project (a third-party integrator embedding MXC for OS-enforced
+local code execution behind an Entra Agent identity).
+**Binary under test:** `mxc-exec-mac`, built from MXC **v0.6.1** (commit
+`161598fd08a4fdd030f461de19af23ce4a310b41`), macOS **arm64**, Seatbelt backend,
+invoked with `--experimental`, policy schema `0.6.0-alpha`, config piped on stdin.
+**Date:** 2026-06-20.
+
+This note is intentionally self-contained so it can be forwarded as-is. It reports one
+concrete, reproducible behavior (Issue 1) and one design/security observation that
+follows from it (Issue 2).
+
+---
+
+## TL;DR
+
+1. **`mxc-exec-mac` enforces filesystem rules on the kernel-resolved (realpath) path, but
+ builds the Seatbelt profile from the *literal* policy path.** On macOS, `/tmp`, `/var`,
+ and `/etc` are symlinks into `/private`. A policy that grants `readwritePaths: ["/tmp"]`
+ therefore **silently denies** all writes under `/tmp`, because the kernel resolves
+ `/tmp/foo` → `/private/tmp/foo` at syscall time and the `(subpath "/tmp")` rule never
+ matches. The failure is a generic `Operation not permitted` with no hint that symlink
+ resolution is the cause.
+
+2. **Consider canonicalizing policy paths (realpath) during profile generation — and note
+ that the *order* of canonicalization vs. containment is security-relevant**, especially
+ for `deniedPaths`. We hit the mirror-image of this in our own clamp layer and it would
+ apply to MXC's allow/deny matching too.
+
+---
+
+## Issue 1 — Filesystem rules don't match symlinked allowlist paths (macOS)
+
+### Reproduction
+
+```bash
+BIN=./mxc-exec-mac
+
+# (A) Grant /tmp, write under /tmp → DENIED (unexpected)
+echo '{
+ "version":"0.6.0-alpha","containment":"process",
+ "process":{"commandLine":"echo ok > /tmp/mxc-probe.txt","timeout":5000},
+ "filesystem":{"readonlyPaths":["/tmp"],"readwritePaths":["/tmp"]},
+ "network":{"defaultPolicy":"block"},"keychainAccess":false
+}' | "$BIN" --experimental
+# -> /bin/sh: /tmp/mxc-probe.txt: Operation not permitted (exit 1)
+
+# (B) Grant /private/tmp (the realpath), write under /tmp → ALLOWED
+echo '{
+ "version":"0.6.0-alpha","containment":"process",
+ "process":{"commandLine":"echo ok > /tmp/mxc-probe.txt","timeout":5000},
+ "filesystem":{"readonlyPaths":["/private/tmp"],"readwritePaths":["/private/tmp"]},
+ "network":{"defaultPolicy":"block"},"keychainAccess":false
+}' | "$BIN" --experimental
+# -> (exit 0), file written
+```
+
+The only difference between (A) and (B) is `/tmp` vs `/private/tmp` in the policy. The
+command and the file it touches are identical.
+
+### Root cause
+
+macOS keeps several top-level directories as symlinks into `/private`:
+
+```
+/tmp -> /private/tmp
+/var -> /private/var # note: the real $TMPDIR lives under /var/folders/...
+/etc -> /private/etc
+```
+
+Seatbelt rules such as `(allow file-write* (subpath "/tmp"))` are matched by the kernel
+against the **canonical** path of the file being accessed. Because the profile carries the
+literal `/tmp` rather than the resolved `/private/tmp`, the rule does not fire for
+`/private/tmp/...`, and the access is denied.
+
+### Why this is a sharp edge for integrators
+
+- **It's silent and non-obvious.** The error is a generic `Operation not permitted`. Nothing
+ in the output points at symlink resolution. We only diagnosed it via differential testing
+ of the binary (granting `/private/tmp` vs `/tmp`).
+- **`/tmp` is the most obvious thing to grant.** It's the canonical "scratch space" an agent
+ needs for outputs. The first policy a developer writes is the one that fails.
+- **`$TMPDIR` is also affected.** The real per-user temp dir on macOS is
+ `/var/folders/<...>/T/`, i.e. under the `/var → /private/var` symlink, so the same trap
+ applies to anything using `tempfile`/`mkstemp` defaults.
+- **The discovery helpers may paper over or expose this depending on what they return.**
+ If `getTemporaryFilesPolicy()` returns `/tmp` (literal) it would inherit the bug; if it
+ returns the realpath it would mask it. Either way the literal-path contract is implicit.
+
+### Suggested fixes (any one would help; not mutually exclusive)
+
+1. **Canonicalize `readonlyPaths` / `readwritePaths` (and `deniedPaths`) during profile
+ generation** — resolve symlinks to realpaths before emitting Seatbelt rules. This makes
+ the obvious policy "just work".
+2. **Or emit rules for both the link and its target** when a granted path is (or traverses)
+ a symlink.
+3. **Or, at minimum, document the contract explicitly** ("policy filesystem paths must be
+ realpaths on macOS; `/tmp`, `/var`, `/etc` are symlinks") and **fail loudly** — e.g.
+ `--dry-run`/validation could warn when a policy path differs from its realpath.
+
+A one-line `realpath()` normalization in the macOS profile builder would have saved us a
+half-day of binary-level debugging, and will bite every macOS integrator who grants `/tmp`.
+
+---
+
+## Issue 2 — Canonicalization order is security-relevant (allow *and* deny matching)
+
+This is a design note rather than a bug report; we raise it because we hit the exact mirror
+of it in our own ceiling-clamp layer and the same reasoning applies to MXC's policy matching.
+
+When you move to canonicalizing policy paths (Issue 1, fix #1), the **order** of operations
+matters:
+
+- **Canonicalize first, then match.** Resolve the realpath of both the policy path and the
+ accessed path, *then* test containment/equality. This is safe.
+- **Match on un-resolved strings (e.g. prefix check), then canonicalize.** This is unsafe:
+ a symlink located *inside* a granted directory can point *outside* it, and a naive
+ string-prefix test admits it.
+
+Concretely, with a grant of `readwritePaths: ["/work/granted"]` and a symlink
+`/work/granted/evil -> /work/secret`:
+
+- A string-prefix check sees `/work/granted/evil` starts with `/work/granted/` → **admit**
+ (escape: writes land in `/work/secret`).
+- A realpath-first check resolves to `/work/secret`, which is **not** under `/work/granted`
+ → **deny** (correct).
+
+For `deniedPaths` the failure is inverted but equally bad: if a denied path is given as a
+symlink and only the literal is matched, the *real* target remains reachable (a deny that
+doesn't deny). Since MXC's own README currently cautions that profiles "should not be
+treated as security boundaries yet," symlink handling at the profile-generation boundary is
+concrete, actionable hardening in exactly that area.
+
+**Recommendation:** when canonicalizing (Issue 1), do it as **realpath-first, then
+allow/deny matching**, for both allow and deny lists, and treat `deniedPaths` resolution as
+load-bearing.
+
+---
+
+## How we worked around it downstream (for reference)
+
+In entrabot we don't rely on MXC to canonicalize. Our policy layer:
+
+1. Resolves the operator-set "ceiling" and the agent-requested paths to realpaths
+ (`expanduser` + `realpath`) and admits a request only if it equals or is a descendant of
+ a ceiling entry — **canonicalize-first, then containment** (so the symlink-escape in
+ Issue 2 is closed on our side).
+2. Passes the resolved realpaths to MXC, which is what makes `/tmp` writes actually work
+ (Issue 1 workaround — we hand MXC `/private/tmp`).
+
+This works, but every integrator will independently rediscover both points. Pushing the
+realpath normalization (and the realpath-first ordering) into MXC would make the obvious
+policy correct by default and remove a silent, security-relevant footgun.
+
+---
+
+## Environment
+
+| Field | Value |
+|---|---|
+| MXC version | v0.6.1 (commit `161598fd08a4fdd030f461de19af23ce4a310b41`) |
+| Binary | `mxc-exec-mac`, Seatbelt backend, `--experimental` |
+| Policy schema | `0.6.0-alpha` |
+| OS | macOS, arm64 (Apple Silicon) |
+| Delivery | config JSON piped on stdin |
+
+Happy to provide the full differential-test harness or pair on a repro if useful.
diff --git a/docs/platform-learnings/mxc-windows-sandbox-preview.md b/docs/platform-learnings/mxc-windows-sandbox-preview.md
new file mode 100644
index 0000000..7ed2471
--- /dev/null
+++ b/docs/platform-learnings/mxc-windows-sandbox-preview.md
@@ -0,0 +1,131 @@
+# MXC Windows Preview — What the `processcontainer` Build Actually Exposes
+
+**Date:** 2026-06-25
+**Author:** Windows port (entrabot PR #86, `feat/mxc-sandbox-integration`)
+**Status:** Verified against a real Windows preview build — not announcement-only.
+**Companion to:** [`mxc-windows-sandbox.md`](mxc-windows-sandbox.md) (the pre-build
+research) and [ADR-007](../decisions/007-mxc-sandbox-integration.md).
+
+This note records what the **shipped** MXC Windows binary actually does, measured
+on a real machine, versus what the earlier platform research inferred from the
+Build-2026 announcement and the SDK README. The macOS instance literally could
+not test any of this; everything below was run against the binary.
+
+## Environment under test
+
+- **Host:** Windows 11, build **28120** (26H1), **ARM64**.
+- **Binary:** `wxc-exec.exe` from `@microsoft/mxc-sdk` **v0.7.0** (npm,
+ Microsoft-published, 41.7 MB). Ships **both** `bin/arm64/` and `bin/x64/`
+ `wxc-exec.exe` (plus `lxc-exec`, `mxc-exec-mac`, and the experimental
+ `wxc-windows-sandbox-*`, `winhttp-proxy-shim`, `mxc-diagnostic-console`).
+- **Python:** 3.13, `sys.platform == "win32"`, `platform.machine() == "ARM64"`.
+
+Pinned SHA256 (taken from the published package, not a local build):
+
+| Arch | `wxc-exec.exe` SHA256 |
+|---|---|
+| arm64 | `e430d0e4f44f616e91db684f8d825a6dc93e06a1262b8d00bcaac7522a317aab` |
+| x64 | `db0a3422be9e1b396cc1b2547c70ff16b27412438a31c10a45abf370cac86ae2` |
+
+## What matched the research
+
+- **`processcontainer` is a default, non-experimental backend.** `run_code`
+ works **without** `--experimental` once the binary is wired (confirmed by a
+ real run, exit 0). The minimum build is 26100 (24H2); this host (28120) is well
+ above it.
+- **The abstract `process` intent resolves to `processcontainer`.** A config with
+ `"containment": "process"` is rewritten by the binary to
+ `"containment": "processcontainer"` (visible in `--dry-run` output).
+- **Filesystem allow-listing is real and kernel-enforced.** With only a scratch
+ dir in `readwritePaths`, a write **into** it succeeds (exit 0, file created); a
+ write **outside** it fails with `Access is denied.` (exit 1, no file). This is
+ the §2 demo matrix, reproduced on Windows through the exact entrabot
+ ceiling→clamp→canonicalize→MXC chain.
+- **Network host filtering is NOT enforced on Windows.** The README states it
+ outright (`network.allowedHosts` / `blockedHosts` have no effect; only
+ `network.defaultPolicy` and `network.proxy` constrain egress). `get_capabilities`
+ therefore reports `network_host_filtering=False`, and `clamp_to_ceiling` fails
+ closed if a policy ever asks for an allow-list.
+
+## What the research got wrong / didn't know (load-bearing)
+
+1. **No stdin config path. Use a file or `--config-base64`.** The macOS runner
+ streams policy JSON on stdin (via a local patch). `wxc-exec.exe` does **not**
+ read stdin: the CLI is `wxc-exec.exe [CONFIG_PATH] [--config ]
+ [--config-base64 ] [-- ...]`. The Windows runner uses
+ `--config-base64` (no temp file to create/secure/clean up).
+
+2. **The parser strictly rejects unknown top-level fields.** entrabot's
+ `build_policy()` emitted a top-level `"keychainAccess": false`. The real
+ v0.7.0 binary rejects it:
+ `Unknown top-level field(s) in config: keychainAccess`. This was a
+ **cross-platform latent bug** — the macOS v0.6.1 build tolerated it, the
+ Windows v0.7.0 build does not. Fix: stop emitting the field entirely. No MXC
+ schema version defines a top-level `keychainAccess`; on macOS it lives under
+ `experimental.seatbelt.keychainAccess`. `keychain_access` stays denied by
+ default-deny regardless, so omitting it is safe, not a relaxation.
+
+3. **`process.commandLine` runs via `CreateProcessW` — there is no implicit
+ shell.** `"echo hi"` fails (`CreateProcessW failed: cannot find the file`,
+ because `echo` is a `cmd` builtin); `"whoami"` failed with
+ `STATUS_DLL_INIT_FAILED`. Commands needing shell builtins, redirection, pipes,
+ or PATH resolution must be invoked as `cmd /c ...`. The agent (caller) owns
+ this; the runner passes `commandLine` through verbatim. Redirection like
+ `cmd /c echo X > file` works and is enforced against the filesystem ceiling.
+
+4. **`processcontainer` auto-grants the system DLL baseline.** A `cmd /c echo`
+ succeeded even with `readonlyPaths: []` — the backend supplies the baseline
+ needed to load `cmd.exe` + system DLLs (analogous to Seatbelt's `/usr/lib`
+ baseline). Operators do not need to add `C:\Windows` to read every command.
+
+5. **`platform.machine()` is upper-case on Windows (`AMD64` / `ARM64`).** This
+ broke two assumptions: the `MXC_BIN_DIR//` lookup and the
+ `PINNED_HASHES` key. The npm package uses lower-case `bin/arm64` and `bin/x64`.
+ entrabot now normalizes arch (`AMD64`→`x64`, `ARM64`→`arm64`) for both the
+ lookup and the hash key (`normalize_arch` in `binary.py`).
+
+6. **The `os.pathsep` ceiling bug was fatal on Windows.** The operator ceiling
+ was parsed with `.split(":")`. On Windows a single `C:\Users\me` ceiling entry
+ split into `["C", "\\Users\\me"]`, shredding every path at the drive-letter
+ colon and making the ceiling unusable. Now parsed with `os.pathsep`
+ (`;` on Windows). Operator ceiling lists are **`;`-separated on Windows**.
+
+## Schema version
+
+- Current **stable** schema is **`0.7.0-alpha`** (the README says "pick 0.7.0-alpha
+ for new code"). entrabot still emits **`0.6.0-alpha`**, which the v0.7.0 binary
+ accepts without complaint (validated by `--dry-run`, exit 0). Both are "Stable".
+ Policy-building stays isolated in `policy.py`, so a bump to `0.7.0-alpha` is a
+ one-line change when we choose to make it.
+- Experimental backends, the `experimental.*` block, and the state-aware
+ lifecycle live in the `0.8.0-dev` schema. The parser accepts them only with
+ `--experimental`. **Schema choice affects editor validation, not runtime.**
+
+## Phase 2 reconnaissance — session isolation + Entra binding
+
+The Phase 2 thesis (container activity attributed to the entrabot Agent User) hinges
+on the **`isolation_session`** backend. Findings from this preview:
+
+- `isolation_session` is present in the SDK's backend table but marked
+ **experimental**, "concrete-only" (no abstract intent maps to it), and requires
+ a **higher minimum build — 26300.8553 (Insider Preview)** than this host
+ (28120). It is the only backend with a state-aware
+ provision→start→exec→stop→deprovision lifecycle.
+- **No Entra-binding surface is exposed in the OSS binary or SDK.** The CLI has no
+ `--session`, no identity, no tenant flag; the only session-shaped surfaces are
+ `--delete`/`--containername` (profile cleanup) and the WSLC/Hyperlight setup
+ flags. The "attribute the container to an Entra identity" story remains in the
+ **Windows + Agent 365 vision/governance layer** (Intune), not in the shipping
+ `wxc-exec.exe`.
+- **Conclusion:** Phase 2 stays a stub (`session.py`, `identity_binding()` →
+ `NotImplementedError`). The preview does **not** yet expose the APIs needed to
+ bind a container to the entrabot Agent User. Re-check when (a) the host can run
+ `isolation_session` (build ≥ 26300.8553) and (b) an identity-binding surface
+ appears in the SDK/CLI or a documented Intune/Entra API.
+
+## Defense-in-depth caveat (unchanged)
+
+MXC still self-describes as **not a security boundary yet** ("profiles are overly
+permissive"). The filesystem enforcement demonstrated here is real, but MXC remains
+**defense-in-depth** layered under entrabot's existing identity/attribution/audit
+gates — it must never relax one. (See `mxc-windows-sandbox.md` §7 Q4.)
diff --git a/docs/platform-learnings/mxc-windows-sandbox.md b/docs/platform-learnings/mxc-windows-sandbox.md
index 909ee12..65ec079 100644
--- a/docs/platform-learnings/mxc-windows-sandbox.md
+++ b/docs/platform-learnings/mxc-windows-sandbox.md
@@ -110,6 +110,48 @@ macOS support is **experimental**, requires schema **`0.6.0-alpha`+** and the `-
---
+## 5.1 Entrabot macOS build/install notes (2026-06-18)
+
+For Entrabot's macOS E2E work we build the native Seatbelt runner from source
+and install it at `.mxc-build/target/release/mxc-exec-mac`.
+
+- **Upstream source:** `https://github.com/microsoft/mxc`
+- **Pinned version:** `v0.6.1`
+- **Pinned commit:** `161598fd08a4fdd030f461de19af23ce4a310b41`
+- **Local compatibility patch:** `scripts/mxc-mac-stdin-compat.patch`
+ - Why: Entrabot's `SeatbeltRunner` streams policy JSON on stdin.
+ - Upstream `mxc-exec-mac` v0.6.1 accepts file/base64 config but not stdin.
+ - The patch adds: "if no config arg is present, read JSON from stdin and
+ feed it through the existing base64 parse path."
+- **Installed binary SHA256 (darwin-arm64):**
+ `700e9e7120c78fe9ecdb8c99309ba6df0ea467ac5b581b803b73d655bbccff36`
+
+Rebuild recipe:
+
+```bash
+git clone --depth 1 --branch v0.6.1 https://github.com/microsoft/mxc.git .mxc-build/mxc-src
+git -C .mxc-build/mxc-src fetch --depth 1 origin 161598fd08a4fdd030f461de19af23ce4a310b41
+git -C .mxc-build/mxc-src checkout --force 161598fd08a4fdd030f461de19af23ce4a310b41
+git -C .mxc-build/mxc-src apply scripts/mxc-mac-stdin-compat.patch
+cd .mxc-build/mxc-src && ./build-mac.sh --rust-only
+cp src/target/aarch64-apple-darwin/release/mxc-exec-mac ../target/release/mxc-exec-mac
+shasum -a 256 ../target/release/mxc-exec-mac
+```
+
+Smoke checks:
+
+```bash
+# File-based config (upstream interface)
+.mxc-build/target/release/mxc-exec-mac --experimental .mxc-build/smoke-config.json
+
+# Entrabot compatibility path (stdin)
+cat .mxc-build/smoke-config.json | .mxc-build/target/release/mxc-exec-mac --experimental
+```
+
+Both should print the configured command output and exit 0.
+
+---
+
## 6. Where MXC fits entrabot
entrabot and MXC are **two halves of the same security thesis**, and they don't overlap — they compose:
diff --git a/docs/reference/scripts/setup.md b/docs/reference/scripts/setup.md
index d4600b4..aa757a4 100644
--- a/docs/reference/scripts/setup.md
+++ b/docs/reference/scripts/setup.md
@@ -32,14 +32,14 @@ Run `./scripts/setup.sh --help` for the full flag matrix.
- Calls `entra_provisioning.py` to mint or reuse the dedicated Provisioner app (cert-auth).
- Calls `create_entra_agent_ids.py` to create Blueprint + Agent Identity + Agent User.
- Generates a Blueprint cert, stores the private key in the OS keystore, uploads the public cert to the Blueprint app.
-- Writes `.env` with the resulting IDs and thumbprints.
+- Writes `.env` (or the file given by `--env-file`) with the resulting IDs and thumbprints.
- Optionally provisions Azure Blob Storage when `--use-cloud-memory` is passed (see `provision_blob_storage.py`).
- Registers `entrabot` in `.mcp.json` and `~/.copilot/mcp-config.json` via `mcp_config.py`.
- With `--status`, skips provisioning and delegates to `./status.sh`, forwarding status arguments such as `--json`, `--health-only`, and `--strict`.
### Idempotency
-Re-runs reuse the existing chain unless `--new` is passed. Each step short-circuits when its target already exists; cert verification (`verify_blueprint_cert.py`) decides whether to keep or rotate the cert.
+Re-runs reuse the existing chain unless `--new` is passed. Pairing `--new` with `--use-blueprint=` creates a fresh Agent Identity/User under an existing Blueprint instead of provisioning a second Blueprint. Each step short-circuits when its target already exists; cert verification (`verify_blueprint_cert.py`) decides whether to keep or rotate the cert.
See `docs/reference/setup-script.md` for the long form. ADR-003 covers the cert-auth choice. ADR-005 covers cloud memory.
diff --git a/docs/reference/setup-script.md b/docs/reference/setup-script.md
index 2b02635..5df559f 100644
--- a/docs/reference/setup-script.md
+++ b/docs/reference/setup-script.md
@@ -15,10 +15,12 @@ The `./scripts/setup.sh` script provisions and configures an EntraBot agent end
| Flag | Purpose |
|------|---------|
| *(none)* | Reuse existing Blueprint / Agent Identity / Agent User from `.entrabot-state.json`. This is the common case on a machine that's already been set up. |
-| `--new` | Provision a brand-new identity chain (Blueprint + Agent Identity + Agent User). Does not touch the existing chain; the current `.env` is backed up. Must be paired with `--with-upn-suffix` or you'll be prompted. |
-| `--use-blueprint=` | Attach to an existing Blueprint from a different machine. Generates a new cert locally and uploads its public key to the Blueprint. Reuses the existing Agent Identity and Agent User. Also handles the "switch this machine to a different Blueprint" case — stale Agent Identity / User / cert thumbprint are wiped from local state. |
+| `--new` | Provision a fresh Agent Identity + Agent User. By default this also provisions a fresh Blueprint. Does not touch the existing chain; the current env/state files are backed up if they are the targets for this run. Must be paired with `--with-upn-suffix` or `--agent-user-upn`, otherwise you'll be prompted. |
+| `--use-blueprint=` | Attach to an existing Blueprint from a different machine. Generates a new cert locally and uploads its public key to the Blueprint. Without `--new`, reuses the existing Agent Identity and Agent User. With `--new`, creates a fresh Agent Identity + Agent User under the existing Blueprint. Also handles the "switch this machine to a different Blueprint" case — stale Agent Identity / User / cert thumbprint are wiped from local state. |
| `--with-upn-suffix=` | Required with `--new`; also supported with `--use-blueprint` to select an existing suffixed Agent User under the Blueprint. Example: `--with-upn-suffix=sati-agent` produces or selects `entrabot-agent-sati-agent@yourdomain.com`. |
| `--agent-user-upn=` | Explicit Agent User UPN. With `--use-blueprint`, selects an existing Agent User to reuse, e.g. `entrabot-agent-sati-agent@yourtenant.onmicrosoft.com`. With `--new`, creates exactly that UPN, e.g. `entrabot-agent@yourtenant.onmicrosoft.com`. |
+| `--state-file=` | Write provisioning state to a custom file instead of `./.entrabot-state.json`. Useful for keeping production and test Agent Identity chains side by side. |
+| `--env-file=` | Write generated env config to a custom file instead of `./.env`. Useful for parallel prod/test setups and E2E fixtures. |
### User identity
@@ -82,6 +84,19 @@ If the Blueprint has a suffixed Agent User, pin that chain explicitly:
If the local OS keystore already has the matching Blueprint private key, setup recovers the registered cert thumbprint and does not prompt to rotate the Blueprint cert.
+### Create a fresh test Agent Identity under the existing Blueprint
+
+```bash
+./scripts/setup.sh \
+ --new \
+ --use-blueprint=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx \
+ --agent-user-upn=entrabot-mxc-test@yourtenant.com \
+ --state-file=.entrabot-state-mxc-test.json \
+ --env-file=.env.mxc-test
+```
+
+This reuses the existing Blueprint, creates a new Agent Identity + Agent User under it, and keeps the test chain's state/config separate from production.
+
### Configure Work IQ Word for an existing Agent User
```bash
@@ -111,7 +126,7 @@ Auto-detects the external UPN, resolves their home tenant, and creates a federat
## Environment outcomes
-After a successful run, `.env` will have the following entries (at minimum):
+After a successful run, the target env file (`.env` by default, or `--env-file`) will have the following entries (at minimum):
```
ENTRABOT_TENANT_ID=...
diff --git a/docs/runbooks/hard-won-learnings.md b/docs/runbooks/hard-won-learnings.md
index 6aea7ad..ac93462 100644
--- a/docs/runbooks/hard-won-learnings.md
+++ b/docs/runbooks/hard-won-learnings.md
@@ -911,6 +911,26 @@ After this, `setup.sh --diagnose` passed all 7 checks including the three-hop to
---
+### Learning #69: Eager Synchronous Boot Auth Stalled the MCP Handshake — copilot Engine Launch Timed Out Where Claude Code Tolerated It
+
+**Date:** 2026-06-29
+**Status:** **CONFIRMED — fixed by offloading boot auth to a worker thread (`asyncio.to_thread`). Test `TestInitAuthDoesNotBlockEventLoop`.**
+**Context:** Launching entrabot under GitHub Copilot CLI (`copilot`, v1.0.65). Copilot was started as an engine from the trusted folder `C:\Development\entrabot`, so it auto-loaded the workspace `.mcp.json` and tried to boot the `entrabot` MCP server during launch.
+**Problem:** Host reported `execution failed: launch_engine - …\copilot.exe exited with non-zero status (exit code: 1)`. copilot.exe was healthy in isolation — `--version`, `-p "say hi"`, and `--acp` all exited 0. The failure was the `entrabot` MCP server: copilot's log showed `Failed to start MCP client for entrabot: McpError: MCP error -32001: Request timed out` after ~63s. A raw `initialize` sent directly to `entrabot-mcp.exe` sat with **no response for >60s**.
+**Root cause:** `mcp_server._run_stdio_with_write_stream` kicks off `_eager_init()` via `asyncio.create_task` (eager boot so Teams/email observation starts immediately, not lazily on first tool call — that design choice landed at the `entraclaw → entrabot` rename, `2e22527`). But `_init_auth` called the **synchronous, blocking** `acquire_agent_user_token` (several blocking HTTPS token POSTs, ~60s for the three-hop flow) and the MSAL `auth.authenticate()` **directly on the event loop**. `create_task` looks concurrent but a sync blocking call inside an async task still freezes the single asyncio loop — so the MCP stdio read loop could not service the client's `initialize` request until auth finished. Claude Code tolerates a slow/late MCP server (keeps the session, connects whenever it's ready); copilot's stdio/ACP engine launch enforces a startup readiness deadline and treats the stalled handshake as a fatal launch failure → exit 1.
+**Fix:** Wrap both blocking calls in `await asyncio.to_thread(...)` in `_init_auth`, so auth runs on a worker thread and the loop stays free to answer `initialize` immediately. Post-fix the handshake returns in ~1.8s (was >60s). Eager observation is preserved — auth still starts at boot, it just no longer monopolizes the loop.
+**Prevention:**
+
+- **This is a code fix, not a config fix.** Nothing was wrong with `.mcp.json` or `scripts/mcp_config.py` (it only writes a standard `command`/`args`/`type` `mcpServers` entry — there is no per-server startup-timeout knob to tune). A slow handshake must be fixed in the server's boot path, and the fix benefits every host.
+- **Never run synchronous blocking I/O directly on the asyncio loop in a server's boot/lifespan path.** `asyncio.create_task(coro)` does not make the *body* of `coro` non-blocking — only its `await` points yield. Any sync network/crypto/file call inside must go through `asyncio.to_thread` (or an async client), or it starves every other task including the protocol handshake.
+- **Test the property, not the path:** assert the loop stays responsive (a concurrent heartbeat coroutine fires promptly) while a deliberately slow (`time.sleep`) blocking dependency runs — don't just assert the token was acquired.
+- **Host tolerance differs.** "Works in Claude Code" does not mean the MCP server boots cleanly — Claude Code masks slow/failed handshakes that stricter stdio/ACP engine hosts (copilot, Zed-style ACP clients) reject. When validating an MCP server, probe the raw `initialize` latency directly.
+- Related to the open `docs/runbooks/mcp-disconnect-investigation.md` slow-boot dossier — same eager-boot weight, different symptom (here: launch-time handshake timeout rather than mid-session drop).
+
+**Evidence/references:** Live session 2026-06-29. copilot log `~/.copilot/logs/process-1782754836854-3540.log:153`. Boot path: `src/entrabot/mcp_server.py` `_run_stdio_with_write_stream` → `_eager_init` → `_init_auth` (the two `asyncio.to_thread` wraps). Blocking dependency: `src/entrabot/tools/teams.py:126` `def acquire_agent_user_token` (synchronous). Test: `tests/test_mcp_server_integration.py::TestInitAuthDoesNotBlockEventLoop`.
+
+---
+
### [HISTORICAL] Learning #4: OBO Requires Matching Token Audience
**Date:** 2026-04-06
diff --git a/mkdocs.yml b/mkdocs.yml
index 4ec9c9a..3ef5616 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -30,6 +30,7 @@ nav:
- Guides:
- Customizing the body prompt: guides/customizing-the-body-prompt.md
- Storage configuration: guides/storage-configuration.md
+ - MXC sandbox setup: guides/mxc-sandbox.md
- Architecture:
- System Overview: architecture/system-overview.md
- Enforcement Flow: architecture/enforcement-flow.md
diff --git a/prompts/anatomy/identity-and-tools.md b/prompts/anatomy/identity-and-tools.md
index 935b8e1..f95a3e9 100644
--- a/prompts/anatomy/identity-and-tools.md
+++ b/prompts/anatomy/identity-and-tools.md
@@ -72,6 +72,47 @@ budget is small even when several sends happen in one turn. Scope
is intentionally narrow: outbound publishing only. Reads, list calls,
and audit entries do not need a pre-call observe.
+### Local files vs cloud files
+
+"Files" can mean two different places, and you must not conflate them:
+
+- **Cloud files** — OneDrive / SharePoint, reached via the Graph file
+ tools (`read_file`, `write_text_file`, `upload_file`, `share_file`,
+ etc.). These live in Microsoft 365, attributed to your Agent Identity.
+- **Local files** — the user's actual computer (`~/Documents`,
+ `~/Downloads`, `/tmp`, any path on disk). These are reachable through
+ the `read_local_file` / `write_local_file` tools (and `run_code` for
+ running commands), when they are available.
+
+When the user refers to a file "on my machine", "in my Documents /
+Downloads folder", a path like `/Users/.../...`, or anything on their
+local disk:
+
+- To **read/open/show** it → use `read_local_file`.
+- To **write/save/create** it → use `write_local_file`.
+- To run a script or command on it → use `run_code`.
+
+Use these for local/on-disk requests — NOT the OneDrive tools. Do not
+assume "Documents folder" means OneDrive; default to the local disk when
+they say "my machine" or give a filesystem path. Never substitute a
+OneDrive write for a requested local write and report it as if it were
+local.
+
+These run inside an OS-enforced sandbox (Apple Seatbelt): the operator
+pre-authorizes which directories you may read and write. It is
+**permission-based on the user's REAL filesystem — not an isolated or
+throwaway container.** Files you read are the user's actual files; files
+you write to allowed paths persist on the user's real disk. If a path is
+outside the operator's allowed paths, the kernel blocks it.
+
+**Attempt the operation; let the sandbox decide.** Don't pre-judge that a
+path is off-limits and refuse — try it. If it's blocked, tell the user the
+path is outside the sandbox's allowed read/write paths (the operator's
+ceiling), not that the file is missing, that you have no local-file tool,
+or that the write went somewhere isolated. If these tools are not in your
+toolset at all, then local-file access simply isn't enabled in this
+deployment — say so plainly.
+
### Files (SharePoint / OneDrive) authorization
When sharing a file via `share_file`:
diff --git a/scripts/create_entra_agent_ids.py b/scripts/create_entra_agent_ids.py
index 6f08421..a935f73 100644
--- a/scripts/create_entra_agent_ids.py
+++ b/scripts/create_entra_agent_ids.py
@@ -29,6 +29,8 @@
# When ENTRABOT_NEW_CHAIN=1, skip all find_existing_* lookups and create fresh.
# Set by setup.sh --new to force a new identity chain.
_FORCE_NEW = os.environ.get("ENTRABOT_NEW_CHAIN") == "1"
+_REUSE_BLUEPRINT = os.environ.get("ENTRABOT_REUSE_BLUEPRINT") == "1"
+_PINNED_BLUEPRINT_APP_ID = os.environ.get("ENTRABOT_PIN_BLUEPRINT_APP_ID", "").strip()
_ASSIGN_TEAMS_LICENSE = os.environ.get("ENTRABOT_ASSIGN_TEAMS_LICENSE", "1") == "1"
_ASSIGN_WORK_IQ_LICENSE = os.environ.get("ENTRABOT_ASSIGN_WORK_IQ_LICENSE") == "1"
@@ -95,6 +97,21 @@ def find_existing_blueprint(token: str) -> dict | None:
return None
+def find_blueprint_by_app_id(token: str, app_id: str) -> dict | None:
+ """Find a Blueprint by exact appId, without any display-name fallback."""
+ resp = graph_request(
+ "GET",
+ f"/applications?$filter=appId eq '{odata_escape(app_id)}'",
+ token,
+ )
+ if resp.status_code != 200:
+ return None
+ values = resp.json().get("value", [])
+ if values:
+ return values[0]
+ return None
+
+
def ensure_blueprint_principal(token: str, app_id: str) -> None:
"""Ensure the BlueprintPrincipal (SP) exists — it is NOT auto-created."""
resp = graph_request(
@@ -134,6 +151,27 @@ def create_blueprint(token: str) -> tuple[str, str]:
"""Create or find the Agent Identity Blueprint. Returns (app_id, object_id)."""
print("\n--- Creating Agent Identity Blueprint ---\n")
+ if _PINNED_BLUEPRINT_APP_ID:
+ existing = find_blueprint_by_app_id(token, _PINNED_BLUEPRINT_APP_ID)
+ if not existing:
+ print(
+ " ERROR: Requested Blueprint was not found: "
+ f"{_PINNED_BLUEPRINT_APP_ID}"
+ )
+ print(" Re-run setup with the correct --use-blueprint value.")
+ sys.exit(1)
+
+ app_id = existing["appId"]
+ obj_id = existing["id"]
+ mode = "[--new --use-blueprint]" if _FORCE_NEW and _REUSE_BLUEPRINT else "[use-blueprint]"
+ print(f" {mode} Reusing Blueprint: {existing.get('displayName', BLUEPRINT_DISPLAY_NAME)}")
+ print(f" App ID: {app_id}")
+ print(f" Object ID: {obj_id}")
+ set_state("BLUEPRINT_APP_ID", app_id)
+ set_state("BLUEPRINT_OBJECT_ID", obj_id)
+ ensure_blueprint_principal(token, app_id)
+ return app_id, obj_id
+
if _FORCE_NEW:
print(" [--new] Skipping existing Blueprint lookup — creating fresh")
existing = None
diff --git a/scripts/demo_sandbox.ps1 b/scripts/demo_sandbox.ps1
new file mode 100644
index 0000000..9e4cbd7
--- /dev/null
+++ b/scripts/demo_sandbox.ps1
@@ -0,0 +1,229 @@
+<#
+.SYNOPSIS
+ EntraBot x MXC - least-privilege local-execution demo (Windows).
+
+.DESCRIPTION
+ The Windows counterpart to scripts/demo_sandbox.py. Drives the REAL,
+ SHA256-pinned wxc-exec.exe through the exact run_code enforcement chain the
+ MCP server uses (operator ceiling -> clamp -> canonicalize -> MXC
+ processcontainer) and narrates each beat so an audience can watch the
+ Windows kernel - not Python, not the agent's good behavior - enforce the
+ boundary.
+
+ Pair it with an ELEVATED mxc-diagnostic-console.exe in a second window to
+ show the live event stream (see docs/guides/mxc-sandbox-demo-windows.md).
+
+.PARAMETER NoPause
+ Run straight through without pausing between beats (for recording / CI).
+
+.PARAMETER ConfigOnly
+ Print the operator ceiling + backend and exit (the operator's view).
+
+.NOTES
+ Requires:
+ - ENTRABOT_ENABLE_RUN_CODE=1 and the MXC sandbox vars in .env
+ - The real wxc-exec.exe resolvable via MXC_BIN_DIR (run setup_sandbox.ps1)
+ - The repo venv at .venv\Scripts\python.exe
+#>
+[CmdletBinding()]
+param(
+ [switch]$NoPause,
+ [switch]$ConfigOnly
+)
+
+$ErrorActionPreference = "Stop"
+$RepoRoot = (Resolve-Path (Join-Path $PSScriptRoot "..")).Path
+$Py = Join-Path $RepoRoot ".venv\Scripts\python.exe"
+$Runner = Join-Path $PSScriptRoot "demo_sandbox_run.py"
+
+$Docs = Join-Path $HOME "Documents"
+$Downloads = Join-Path $HOME "Downloads"
+$Temp = $env:TEMP
+
+function Banner($text) {
+ $line = "=" * 64
+ Write-Host ""
+ Write-Host $line -ForegroundColor Cyan
+ Write-Host " $text" -ForegroundColor Cyan
+ Write-Host $line -ForegroundColor Cyan
+}
+
+function Beat($text) {
+ if ($NoPause) { Write-Host "`n -> $text" -ForegroundColor DarkGray }
+ else { Read-Host "`n [Enter] $text" | Out-Null }
+}
+
+function Invoke-Scenario {
+ param(
+ [string]$Title, [string]$Say, [string]$Cmd,
+ [string[]]$Ro = @(), [string[]]$Rw = @(), [bool]$ExpectAllow,
+ [string]$ReadBack = $null
+ )
+ Write-Host ""
+ Write-Host " $Title" -ForegroundColor Blue
+ Write-Host " $Say" -ForegroundColor Gray
+ Write-Host " agent runs : $Cmd" -ForegroundColor DarkGray
+ $reqRo = if ($Ro.Count) { $Ro -join ', ' } else { '[]' }
+ $reqRw = if ($Rw.Count) { $Rw -join ', ' } else { '[]' }
+ Write-Host " agent asks for: read=$reqRo write=$reqRw" -ForegroundColor DarkGray
+
+ $argList = @($Runner, "--cmd", $Cmd)
+ foreach ($p in $Ro) { $argList += @("--ro", $p) }
+ foreach ($p in $Rw) { $argList += @("--rw", $p) }
+ $json = & $Py @argList | Select-Object -Last 1
+ $r = $json | ConvertFrom-Json
+
+ if ($r.error) {
+ Write-Host " HARNESS ERROR: $($r.error)" -ForegroundColor Red
+ return $false
+ }
+
+ if ($r.dropped_rw -and $r.dropped_rw.Count) {
+ Write-Host " clamp : dropped WRITE $($r.dropped_rw -join ', ') (outside operator ceiling)" -ForegroundColor Yellow
+ }
+ if ($r.dropped_ro -and $r.dropped_ro.Count) {
+ Write-Host " clamp : dropped READ $($r.dropped_ro -join ', ') (outside operator ceiling)" -ForegroundColor Yellow
+ }
+ $sentRo = if ($r.clamped_ro.Count) { $r.clamped_ro -join ', ' } else { '[]' }
+ $sentRw = if ($r.clamped_rw.Count) { $r.clamped_rw -join ', ' } else { '[]' }
+ Write-Host " policy -> MXC: read=$sentRo write=$sentRw" -ForegroundColor DarkGray
+
+ if ($r.allowed) {
+ $detail = if ($r.stdout) { $r.stdout } else { "(no output)" }
+ # For write scenarios the write goes to a file (no stdout); read it back to prove it landed.
+ if ($ReadBack -and (Test-Path $ReadBack)) { $detail = (Get-Content $ReadBack -Raw).Trim() }
+ Write-Host " [+] ALLOWED exit=$($r.exit_code) output: $detail" -ForegroundColor Green
+ } else {
+ $detail = if ($r.stderr) { $r.stderr } else { "(blocked)" }
+ Write-Host " [x] BLOCKED by the Windows kernel exit=$($r.exit_code) reason: $detail" -ForegroundColor Red
+ }
+
+ $correct = ($r.allowed -eq $ExpectAllow)
+ $expect = if ($ExpectAllow) { "ALLOW" } else { "BLOCK" }
+ if ($correct) { Write-Host " expected $expect -> as designed" -ForegroundColor Green }
+ else { Write-Host " expected $expect -> UNEXPECTED" -ForegroundColor Red }
+ return $correct
+}
+
+# -- Preconditions -----------------------------------------------------------
+if (-not (Test-Path $Py)) { Write-Host "venv not found at $Py. Run: python -m venv .venv; .venv\Scripts\pip install -e .[dev]" -ForegroundColor Red; exit 1 }
+if ($env:ENTRABOT_ENABLE_RUN_CODE -ne "1") {
+ # .env may set it; the Python runner loads .env, so just warn.
+ Write-Host "(note: ENTRABOT_ENABLE_RUN_CODE not set in this shell; .env value will be used by the runner)" -ForegroundColor DarkYellow
+}
+
+Banner "EntraBot x MXC - Least-Privilege Local Execution (Windows)"
+Write-Host @"
+
+ An AI agent with its own Entra identity wants to run code on this PC.
+ The OPERATOR decides what it may touch. The agent can only NARROW that -
+ never widen it. Containment is enforced by Windows' processcontainer
+ (AppContainer) via Microsoft Execution Containers (MXC).
+"@
+
+# Show ceiling + backend by running a trivial probe through the real chain.
+$probe = (& $Py $Runner --cmd "cmd /c echo ." | Select-Object -Last 1) | ConvertFrom-Json
+if ($probe.error) { Write-Host "`nMXC unavailable: $($probe.error)" -ForegroundColor Red; Write-Host "Run scripts\setup_sandbox.ps1 first." -ForegroundColor DarkGray; exit 1 }
+Write-Host "`n Operator ceiling (the human-set maximum):" -ForegroundColor White
+Write-Host " read-only : $($probe.ceiling_ro -join ', ')" -ForegroundColor Green
+Write-Host " read-write: $($probe.ceiling_rw -join ', ')" -ForegroundColor Green
+Write-Host " keychain : hard-disabled (not overridable by the agent)" -ForegroundColor DarkGray
+Write-Host "`n Backend: $($probe.backend) (real binary, SHA256-verified)" -ForegroundColor White
+$agent = if ($env:ENTRABOT_AGENT_USER_UPN) { $env:ENTRABOT_AGENT_USER_UPN } else {
+ # Not in the shell env; the runner reads .env, so surface it here too.
+ $envFile = Join-Path $RepoRoot ".env"
+ $val = "(unset)"
+ if (Test-Path $envFile) {
+ $m = Select-String -Path $envFile -Pattern '^\s*ENTRABOT_AGENT_USER_UPN=(.+)$' | Select-Object -First 1
+ if ($m) { $val = $m.Matches[0].Groups[1].Value.Trim() }
+ }
+ $val
+}
+Write-Host " Agent identity: $agent (its own Entra Agent User)" -ForegroundColor White
+
+if ($ConfigOnly) {
+ Write-Host "`n This is the operator-set configuration. The agent can only narrow it." -ForegroundColor DarkGray
+ Write-Host " Run without -ConfigOnly to see it enforced.`n"
+ exit 0
+}
+
+# Fixture: a 'confidential' file in Documents the agent may READ but not WRITE.
+New-Item -ItemType Directory -Force $Docs | Out-Null
+$Secret = Join-Path $Docs "entrabot-secret.txt"
+if (-not (Test-Path $Secret)) {
+ # ASCII (no BOM) so `cmd /c type` doesn't show stray BOM bytes in the demo.
+ Set-Content -Path $Secret -Value "SECRET: quarterly numbers the agent may read but must not alter" -Encoding ascii
+}
+New-Item -ItemType Directory -Force $Downloads | Out-Null
+Write-Host "`n Fixture ready: $Secret" -ForegroundColor DarkGray
+
+$results = @()
+
+Banner "Act 1 - The agent reads what you allow"
+Beat "Scenario 1 - read your Documents (legitimate analysis)"
+$results += Invoke-Scenario -Title '"Read my confidential file in Documents."' `
+ -Say "Documents is in my read-only ceiling, so this is allowed." `
+ -Cmd ('cmd /c type "' + $Secret + '"') -Ro @($Docs) -ExpectAllow $true
+
+Banner "Act 2 - The agent cannot tamper"
+$hackFile = Join-Path $Docs "entrabot-hack.txt"
+Beat "Scenario 2 - try to WRITE to your Documents (tampering)"
+$results += Invoke-Scenario -Title '"Overwrite a file in Documents."' `
+ -Say "Documents is NOT in my read-write ceiling. The clamp drops it to [] and the kernel blocks the write." `
+ -Cmd ('cmd /c echo TAMPERED > "' + $hackFile + '"') -Rw @($Docs) -ExpectAllow $false
+
+Banner "Act 3 - The agent writes only where you allow"
+$reportFile = Join-Path $Temp "entrabot-report.txt"
+Beat "Scenario 3 - write a scratch report to %TEMP%"
+$results += Invoke-Scenario -Title '"Save a scratch report to my temp folder."' `
+ -Say "TEMP is in my read-write ceiling." `
+ -Cmd ('cmd /c echo scratch report > "' + $reportFile + '"') -Rw @($Temp) -ExpectAllow $true -ReadBack $reportFile
+
+$exportFile = Join-Path $Downloads "entrabot-export.txt"
+Beat "Scenario 4 - write an export to your Downloads"
+$results += Invoke-Scenario -Title '"Drop the export in my Downloads folder."' `
+ -Say "Downloads is in my read-write ceiling." `
+ -Cmd ('cmd /c echo export data > "' + $exportFile + '"') -Rw @($Downloads) -ExpectAllow $true -ReadBack $exportFile
+
+Banner "Act 4 - The agent can't reach the OS"
+Beat "Scenario 5 - try to write into C:\Windows (system tampering)"
+$results += Invoke-Scenario -Title '"Write into the Windows system directory."' `
+ -Say "C:\Windows isn't in any ceiling. The clamp drops it and the kernel blocks it." `
+ -Cmd 'cmd /c echo OWNED > C:\Windows\entrabot-owned.txt' -Rw @("C:\Windows") -ExpectAllow $false
+
+# Cleanup
+foreach ($p in @($hackFile, $reportFile, $exportFile, "C:\Windows\entrabot-owned.txt")) {
+ Remove-Item $p -ErrorAction SilentlyContinue
+}
+
+Banner "Recap"
+$passed = ($results | Where-Object { $_ }).Count
+$total = $results.Count
+Write-Host ""
+Write-Host " READ Documents allowed - WRITE Documents blocked - WRITE TEMP + Downloads allowed - WRITE C:\Windows blocked" -ForegroundColor White
+Write-Host ""
+Write-Host " Every action is audit-first: logged before it runs, and if audit cannot" -ForegroundColor Gray
+Write-Host " record, the action does not proceed. Fail-closed, and attributed to the" -ForegroundColor Gray
+Write-Host " agent own Entra identity - not yours." -ForegroundColor Gray
+$color = if ($passed -eq $total) { "Green" } else { "Red" }
+Write-Host "`n $passed/$total scenarios behaved exactly as designed." -ForegroundColor $color
+
+# -- Agent first-person Teams talk-track -------------------------------------
+Banner "Now do it live - Teams talk-track"
+Write-Host ""
+Write-Host " Chat with the agent ($agent) in Teams and ask, in plain language."
+Write-Host " The agent calls run_code / read_local_file / write_local_file under the hood."
+Write-Host ""
+Write-Host ' 1) "Read my file at ~\Documents\entrabot-secret.txt and tell me what it says."' -ForegroundColor Green
+Write-Host " -> Agent reads it. Point out: Documents is read-only in the ceiling." -ForegroundColor DarkGray
+Write-Host ""
+Write-Host ' 2) "Now save the text hello to ~\Documents\note.txt."' -ForegroundColor Red
+Write-Host " -> Blocked. The agent reports it cannot write there. Show the audit log." -ForegroundColor DarkGray
+Write-Host ""
+Write-Host ' 3) "Write a short summary to ~\Downloads\summary.txt instead."' -ForegroundColor Green
+Write-Host " -> Works. Downloads is in the read-write ceiling." -ForegroundColor DarkGray
+Write-Host ""
+Write-Host " The agent never sees the ceiling as something it can change - you, the" -ForegroundColor DarkGray
+Write-Host " operator, set it in .env, and the OS enforces it. The model can only narrow." -ForegroundColor DarkGray
+
+exit $(if ($passed -eq $total) { 0 } else { 1 })
diff --git a/scripts/demo_sandbox.py b/scripts/demo_sandbox.py
new file mode 100755
index 0000000..281e242
--- /dev/null
+++ b/scripts/demo_sandbox.py
@@ -0,0 +1,357 @@
+#!/usr/bin/env python3
+"""
+demo_sandbox.py — Presentation demo for the EntraBot MXC sandbox.
+
+Runs the REAL ``mxc-exec-mac`` (Seatbelt) binary through the exact same
+``run_code`` enforcement chain the MCP server uses (operator ceiling →
+clamp → canonicalize → MXC), and narrates each step so an audience can
+see least-privilege containment enforced by the OS kernel — not by Python.
+
+This is the "proof harness" you run alongside the live Teams chat: it
+demonstrates that when the agent says "write to your Documents," the
+kernel says no.
+
+Usage:
+ ./scripts/demo_sandbox.py # interactive (pauses between beats)
+ ./scripts/demo_sandbox.py --no-pause # run straight through (CI/recording)
+
+Requires:
+ - ENTRABOT_ENABLE_RUN_CODE=1 and the MXC sandbox env vars in .env
+ - The real mxc-exec-mac binary resolvable via MXC_BIN_DIR
+"""
+
+from __future__ import annotations
+
+# ruff: noqa: I001 — import order is deliberate (venv re-exec + sys.path insert +
+# .env side-effect load must precede the entrabot.sandbox imports).
+
+import os
+import sys
+from pathlib import Path
+
+# Re-exec under the repo's venv interpreter if we're not already running it.
+# The entrabot package needs Python 3.12+; running ``./scripts/demo_sandbox.py``
+# directly would otherwise pick up the system python3 (often 3.9) and crash on
+# modern type syntax. Uses only stdlib so it's safe on any Python 3.x.
+_VENV_PY = Path(__file__).resolve().parent.parent / ".venv" / "bin" / "python3"
+if _VENV_PY.exists() and os.path.realpath(sys.executable) != os.path.realpath(_VENV_PY):
+ os.execv(str(_VENV_PY), [str(_VENV_PY), *sys.argv])
+
+import contextlib # noqa: E402
+
+# Make the entrabot package importable and load .env (handles spaces in paths).
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "src"))
+import entrabot.config # noqa: E402, F401 (import side-effect: loads .env)
+
+from entrabot.sandbox import get_sandbox_runner # noqa: E402
+from entrabot.sandbox.base import SandboxPolicy # noqa: E402
+from entrabot.sandbox.policy import ( # noqa: E402
+ canonicalize_paths,
+ clamp_to_ceiling,
+)
+
+# ── ANSI styling ────────────────────────────────────────────────────────────
+BOLD = "\033[1m"
+DIM = "\033[2m"
+RED = "\033[31m"
+GREEN = "\033[32m"
+YELLOW = "\033[33m"
+BLUE = "\033[34m"
+CYAN = "\033[36m"
+NC = "\033[0m"
+
+PAUSE = "--no-pause" not in sys.argv
+CONFIG_ONLY = "--config-only" in sys.argv
+HOME = os.path.expanduser("~")
+
+
+def banner(text: str) -> None:
+ line = "═" * 62
+ print(f"\n{BOLD}{CYAN}╔{line}╗{NC}")
+ print(f"{BOLD}{CYAN}║{NC} {BOLD}{text}{NC}")
+ print(f"{BOLD}{CYAN}╚{line}╝{NC}")
+
+
+def beat(text: str) -> None:
+ if PAUSE:
+ try:
+ input(f"\n{DIM} ↵ {text}{NC}")
+ except (EOFError, KeyboardInterrupt):
+ print()
+ sys.exit(0)
+ else:
+ print(f"\n{DIM} → {text}{NC}")
+
+
+def load_ceiling() -> tuple[list[str], list[str]]:
+ ro = [p for p in os.environ.get("ENTRABOT_SANDBOX_READONLY_PATHS", "").split(":") if p]
+ rw = [p for p in os.environ.get("ENTRABOT_SANDBOX_READWRITE_PATHS", "").split(":") if p]
+ return ro, rw
+
+
+def run_scenario(
+ runner,
+ caps,
+ ceiling_ro: list[str],
+ ceiling_rw: list[str],
+ *,
+ title: str,
+ cmd: str,
+ req_ro: list[str],
+ req_rw: list[str],
+ expect_allow: bool,
+) -> bool:
+ """Run one scenario through the real run_code chain and narrate it."""
+ print(f"\n{BOLD}{BLUE}▎{title}{NC}")
+ print(f" {DIM}agent runs:{NC} {cmd}")
+ print(f" {DIM}agent requests:{NC} readonly={req_ro or '[]'} readwrite={req_rw or '[]'}")
+
+ ceiling = SandboxPolicy(
+ backend="process",
+ command_line="",
+ readonly_paths=ceiling_ro,
+ readwrite_paths=ceiling_rw,
+ timeout_ms=30000,
+ network_default_policy="block",
+ keychain_access=False,
+ )
+ requested = SandboxPolicy(
+ backend="process",
+ command_line=cmd,
+ readonly_paths=req_ro,
+ readwrite_paths=req_rw,
+ timeout_ms=30000,
+ network_default_policy="block",
+ keychain_access=False,
+ )
+
+ clamped = clamp_to_ceiling(requested, ceiling, caps)
+ if clamped.readonly_paths:
+ clamped.readonly_paths = canonicalize_paths(clamped.readonly_paths)
+ if clamped.readwrite_paths:
+ clamped.readwrite_paths = canonicalize_paths(clamped.readwrite_paths)
+
+ # Show the clamp decision — the security money-shot.
+ dropped_rw = [p for p in req_rw if not _kept(p, clamped.readwrite_paths)]
+ if dropped_rw:
+ print(
+ f" {YELLOW}clamp:{NC} dropped write paths "
+ f"{dropped_rw} {DIM}(not within operator ceiling){NC}"
+ )
+ print(
+ f" {DIM}policy sent to MXC:{NC} "
+ f"readonlyPaths={clamped.readonly_paths or '[]'} "
+ f"readwritePaths={clamped.readwrite_paths or '[]'}"
+ )
+
+ result = runner.run(clamped)
+ allowed = result.exit_code == 0
+ correct = allowed == expect_allow
+
+ if allowed:
+ verdict = f"{GREEN}✅ ALLOWED{NC}"
+ detail = result.stdout.strip() or "(no output)"
+ print(f" {verdict} exit={result.exit_code} output: {detail[:80]!r}")
+ else:
+ verdict = f"{RED}⛔ BLOCKED by the OS kernel{NC}"
+ detail = (result.stderr.strip() or result.stdout.strip() or "").splitlines()
+ msg = detail[-1] if detail else "(no message)"
+ print(f" {verdict} exit={result.exit_code} reason: {msg[:80]!r}")
+
+ expectation = "ALLOW" if expect_allow else "BLOCK"
+ mark = f"{GREEN}as designed{NC}" if correct else f"{RED}UNEXPECTED{NC}"
+ print(f" {DIM}expected {expectation} →{NC} {mark}")
+ return correct
+
+
+def _kept(requested_path: str, kept_canonical: list[str]) -> bool:
+ real = os.path.realpath(os.path.expanduser(requested_path))
+ return real in kept_canonical
+
+
+def main() -> int:
+ banner("EntraBot × MXC — Least-Privilege Local Execution Demo")
+ print(
+ f"\n An AI agent with its own Entra identity wants to run code on this Mac.\n"
+ f" {BOLD}The operator{NC} decides what it may touch. {BOLD}The agent can only\n"
+ f" narrow that — never widen it.{NC} Containment is enforced by Apple's\n"
+ f" Seatbelt kernel sandbox via Microsoft Execution Containers (MXC)."
+ )
+
+ # Preconditions
+ if os.environ.get("ENTRABOT_ENABLE_RUN_CODE") != "1":
+ print(f"\n{RED}run_code is disabled. Set ENTRABOT_ENABLE_RUN_CODE=1 in .env.{NC}")
+ return 1
+
+ ceiling_ro, ceiling_rw = load_ceiling()
+ print(f"\n{BOLD}Operator ceiling (the human-set maximum):{NC}")
+ print(f" {GREEN}read-only :{NC} {ceiling_ro}")
+ print(f" {GREEN}read-write:{NC} {ceiling_rw}")
+ print(f" {DIM}keychain access: hard-disabled (not overridable by the agent){NC}")
+
+ try:
+ runner = get_sandbox_runner()
+ except Exception as exc: # noqa: BLE001
+ print(f"\n{RED}MXC binary unavailable: {exc}{NC}")
+ print(f"{DIM}Build it or set MXC_BIN_DIR. See scripts/setup_sandbox.sh.{NC}")
+ return 1
+ caps = runner.get_capabilities()
+ print(f"\n{BOLD}Backend:{NC} {caps['backend']} {DIM}(real binary, SHA256-verified){NC}")
+
+ # Agent identity (who is constrained, and on whose behalf).
+ agent_upn = os.environ.get("ENTRABOT_AGENT_USER_UPN", "(unset)")
+ run_code_on = os.environ.get("ENTRABOT_ENABLE_RUN_CODE") == "1"
+ net = os.environ.get("ENTRABOT_SANDBOX_NETWORK", "block")
+ print(f"\n{BOLD}Agent identity:{NC} {agent_upn} {DIM}(its own Entra Agent User){NC}")
+ print(f"{BOLD}run_code tool:{NC} {'enabled' if run_code_on else 'DISABLED'} "
+ f"{DIM}· network: {net} · keychain: disabled{NC}")
+
+ if CONFIG_ONLY:
+ print(
+ f"\n {DIM}This is the operator-set configuration. The agent can only "
+ f"narrow it.\n Run without --config-only to see it enforced.{NC}\n"
+ )
+ return 0
+
+ # Fixture: a "confidential" file in Documents the agent may READ but not WRITE.
+ secret = Path(HOME) / "Documents" / "entrabot-secret.txt"
+ secret.parent.mkdir(parents=True, exist_ok=True)
+ if not secret.exists():
+ secret.write_text("SECRET: quarterly numbers the agent may read but must not alter\n")
+ print(f"\n{DIM}Fixture ready: {secret}{NC}")
+
+ # ── Act 1: the threat ────────────────────────────────────────────────
+ banner("Act 1 — Why containment matters")
+ print(
+ f"\n EntraBot ships a deliberately-unsafe tool, {BOLD}write_local_file{NC},\n"
+ f" to show the baseline: an unsandboxed agent can write {BOLD}anywhere{NC}.\n"
+ f" That's the risk a compromised or over-eager agent poses to your machine."
+ )
+ print(f" {DIM}(We don't run it here — the point of the rest of the demo is the cure.){NC}")
+ beat("Press enter to see the sandbox in action…")
+
+ # ── Act 2: the protection ────────────────────────────────────────────
+ banner("Act 2 — run_code: the sandboxed path")
+ results: list[bool] = []
+
+ beat("Scenario 1 — the agent reads your Documents (legitimate analysis)")
+ results.append(run_scenario(
+ runner, caps, ceiling_ro, ceiling_rw,
+ title="“Read my confidential file in Documents.”",
+ cmd=f"cat {HOME}/Documents/entrabot-secret.txt",
+ req_ro=[f"{HOME}/Documents"], req_rw=[],
+ expect_allow=True,
+ ))
+
+ beat("Scenario 2 — the agent tries to WRITE to your Documents (tampering)")
+ results.append(run_scenario(
+ runner, caps, ceiling_ro, ceiling_rw,
+ title="“Overwrite that file in Documents.”",
+ cmd=f"echo TAMPERED > {HOME}/Documents/entrabot-hack.txt",
+ req_ro=[], req_rw=[f"{HOME}/Documents"],
+ expect_allow=False,
+ ))
+ print(
+ f" {DIM}Documents is in the read-only ceiling, not read-write. The agent's\n"
+ f" attempt to widen is clamped to nothing, and the kernel blocks the write.{NC}"
+ )
+
+ beat("Scenario 3 — the agent writes a report to /tmp (allowed output)")
+ results.append(run_scenario(
+ runner, caps, ceiling_ro, ceiling_rw,
+ title="“Save a scratch report to /tmp.”",
+ cmd="echo 'report' > /tmp/entrabot-report.txt && cat /tmp/entrabot-report.txt",
+ req_ro=[], req_rw=["/tmp"],
+ expect_allow=True,
+ ))
+
+ beat("Scenario 4 — the agent writes to ~/Downloads (allowed output)")
+ results.append(run_scenario(
+ runner, caps, ceiling_ro, ceiling_rw,
+ title="“Drop the export in my Downloads folder.”",
+ cmd=(
+ f"echo 'export' > {HOME}/Downloads/entrabot-export.txt "
+ f"&& cat {HOME}/Downloads/entrabot-export.txt"
+ ),
+ req_ro=[], req_rw=[f"{HOME}/Downloads"],
+ expect_allow=True,
+ ))
+
+ # ── Act 3: the hardening ─────────────────────────────────────────────
+ banner("Act 3 — The agent can't cheat the boundary")
+ beat("Scenario 5 — a symlink inside an allowed dir pointing OUT is rejected")
+ import tempfile
+ with tempfile.TemporaryDirectory() as d:
+ d = os.path.realpath(d)
+ granted = os.path.join(d, "granted")
+ secret_dir = os.path.join(d, "secret")
+ os.mkdir(granted)
+ os.mkdir(secret_dir)
+ Path(secret_dir, "creds.txt").write_text("top secret\n")
+ evil = os.path.join(granted, "escape")
+ os.symlink(secret_dir, evil) # granted/escape -> ../secret
+ results.append(run_scenario(
+ runner, caps, [granted], [granted],
+ title="agent grants itself the 'granted' dir, then follows a symlink out",
+ cmd=f"cat {evil}/creds.txt",
+ req_ro=[evil], req_rw=[],
+ expect_allow=False,
+ ))
+ print(
+ f" {DIM}Paths are canonicalized (realpath) BEFORE the containment check, so a\n"
+ f" symlink can't smuggle access to a target outside the ceiling.{NC}"
+ )
+
+ # Cleanup
+ for p in (
+ Path(HOME, "Documents", "entrabot-hack.txt"),
+ Path("/tmp/entrabot-report.txt"),
+ Path(HOME, "Downloads", "entrabot-export.txt"),
+ ):
+ with contextlib.suppress(FileNotFoundError):
+ p.unlink()
+
+ # ── Curtain ──────────────────────────────────────────────────────────
+ banner("Recap")
+ passed = sum(results)
+ total = len(results)
+ print(
+ f"\n {GREEN}READ Documents{NC} allowed · {RED}WRITE Documents{NC} blocked · "
+ f"{GREEN}WRITE /tmp + Downloads{NC} allowed · {RED}symlink escape{NC} blocked"
+ )
+ print(
+ f"\n Every action is {BOLD}audit-first{NC} (logged before it runs; if audit\n"
+ f" fails, the action doesn't), {BOLD}fail-closed{NC}, and attributed to the\n"
+ f" agent's own Entra identity — not yours."
+ )
+ color = GREEN if passed == total else RED
+ print(f"\n {BOLD}{color}{passed}/{total} scenarios behaved exactly as designed.{NC}")
+
+ print_teams_talktrack()
+ return 0 if passed == total else 1
+
+
+def print_teams_talktrack() -> None:
+ banner("Now do it live — Teams talk-track")
+ print(
+ f"""
+ Chat with the agent ({BOLD}entrabot-mxc-test@werner.ac{NC}) in Teams and ask,
+ in plain language. The agent will call run_code under the hood.
+
+ {GREEN}1){NC} "Read my file at ~/Documents/entrabot-secret.txt and tell me what it says."
+ {DIM}→ Agent reads it. Point out: Documents is read-only in the ceiling.{NC}
+
+ {RED}2){NC} "Now save the text 'hello' to ~/Documents/note.txt."
+ {DIM}→ Blocked. The agent reports it can't write there. Show the audit log.{NC}
+
+ {GREEN}3){NC} "Write a short summary to ~/Downloads/summary.txt instead."
+ {DIM}→ Works. Downloads is in the read-write ceiling.{NC}
+
+ {DIM}The agent never sees the ceiling as something it can change — it's set by
+ you, the operator, in .env, and enforced by the OS. The model can only narrow.{NC}
+"""
+ )
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/scripts/demo_sandbox_run.py b/scripts/demo_sandbox_run.py
new file mode 100644
index 0000000..5501cdd
--- /dev/null
+++ b/scripts/demo_sandbox_run.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+"""demo_sandbox_run.py — run ONE sandbox scenario through the real run_code chain.
+
+This is the enforcement engine behind ``scripts/demo_sandbox.ps1`` (Windows) and
+is intentionally narration-free: it takes a single command + the paths the agent
+*requests*, pushes them through the exact operator-ceiling -> clamp ->
+canonicalize -> MXC chain the MCP server's ``run_code`` tool uses, runs the real
+SHA256-pinned MXC binary, and prints a JSON result on stdout.
+
+The operator ceiling is read from the environment (``ENTRABOT_SANDBOX_*`` vars)
+via :func:`entrabot.sandbox.local_files.ceiling_from_env`, so the demo proves the
+*real* configured ceiling — including the ``os.pathsep`` parsing that lets
+Windows drive-letter paths (``C:\\Users\\...``) survive.
+
+Usage:
+ python demo_sandbox_run.py --cmd "" \
+ [--ro ...] [--rw ...]
+
+Output (stdout): a single JSON object. Exit code 0 if the JSON was produced
+(regardless of whether the sandboxed command was allowed or blocked); non-zero
+only on harness/setup errors (e.g. binary unavailable).
+"""
+
+from __future__ import annotations
+
+# ruff: noqa: I001 — import order is deliberate (sys.path insert + .env
+# side-effect load must precede the entrabot.sandbox imports).
+
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+
+# Make the entrabot package importable and load .env (ceiling + MXC_BIN_DIR).
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "src"))
+import entrabot.config # noqa: E402, F401 (import side-effect: loads .env)
+
+from entrabot.sandbox import get_sandbox_runner # noqa: E402
+from entrabot.sandbox.base import ( # noqa: E402
+ SandboxBackendUnsupportedError,
+ SandboxPolicy,
+ SandboxPolicyError,
+ SandboxTimeoutError,
+ SandboxUnavailableError,
+ SandboxUntrustedBinaryError,
+)
+from entrabot.sandbox.local_files import ceiling_from_env # noqa: E402
+from entrabot.sandbox.policy import canonicalize_paths, clamp_to_ceiling # noqa: E402
+
+
+def _real(p: str) -> str:
+ return os.path.realpath(os.path.expanduser(p))
+
+
+def main(argv: list[str] | None = None) -> int:
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument("--cmd", required=True, help="commandLine to run in the sandbox")
+ parser.add_argument("--ro", action="append", default=[], help="path requested READ access")
+ parser.add_argument("--rw", action="append", default=[], help="path requested WRITE access")
+ args = parser.parse_args(argv)
+
+ result: dict = {
+ "requested_ro": args.ro,
+ "requested_rw": args.rw,
+ }
+
+ try:
+ ceiling = ceiling_from_env()
+ result["ceiling_ro"] = ceiling.readonly_paths
+ result["ceiling_rw"] = ceiling.readwrite_paths
+
+ runner = get_sandbox_runner()
+ caps = runner.get_capabilities()
+ result["backend"] = caps["backend"]
+
+ requested = SandboxPolicy(
+ backend="process",
+ command_line=args.cmd,
+ readonly_paths=args.ro,
+ readwrite_paths=args.rw,
+ timeout_ms=ceiling.timeout_ms,
+ network_default_policy="block",
+ keychain_access=False,
+ )
+
+ clamped = clamp_to_ceiling(requested, ceiling, caps)
+ # The clamp money-shot: which requested paths were dropped because they
+ # were NOT within the operator ceiling (the agent tried to widen).
+ kept_rw = {_real(p) for p in clamped.readwrite_paths}
+ kept_ro = {_real(p) for p in clamped.readonly_paths}
+ result["dropped_rw"] = [p for p in args.rw if _real(p) not in kept_rw]
+ result["dropped_ro"] = [p for p in args.ro if _real(p) not in kept_ro]
+
+ if clamped.readonly_paths:
+ clamped.readonly_paths = canonicalize_paths(clamped.readonly_paths)
+ if clamped.readwrite_paths:
+ clamped.readwrite_paths = canonicalize_paths(clamped.readwrite_paths)
+ result["clamped_ro"] = clamped.readonly_paths
+ result["clamped_rw"] = clamped.readwrite_paths
+
+ run = runner.run(clamped)
+ result["exit_code"] = run.exit_code
+ result["allowed"] = run.exit_code == 0
+ result["stdout"] = run.stdout.strip()
+ result["stderr"] = run.stderr.strip()
+ result["timed_out"] = run.timed_out
+
+ except (
+ SandboxUnavailableError,
+ SandboxUntrustedBinaryError,
+ SandboxBackendUnsupportedError,
+ SandboxPolicyError,
+ SandboxTimeoutError,
+ ) as exc:
+ result["error"] = f"{type(exc).__name__}: {exc}"
+ result["allowed"] = False
+ except Exception as exc: # noqa: BLE001 — surface anything else as a harness error
+ result["error"] = f"{type(exc).__name__}: {exc}"
+ result["allowed"] = False
+
+ print(json.dumps(result))
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/scripts/entra_provisioning.py b/scripts/entra_provisioning.py
index c7d0ccb..6c706bd 100644
--- a/scripts/entra_provisioning.py
+++ b/scripts/entra_provisioning.py
@@ -325,21 +325,33 @@ def _remove_legacy_password_credentials(app_id: str) -> int:
# State persistence
# ---------------------------------------------------------------------------
-_STATE_FILE = Path(__file__).resolve().parent.parent / ".entrabot-state.json"
+_DEFAULT_STATE_FILE = Path(__file__).resolve().parent.parent / ".entrabot-state.json"
+
+
+def _state_file() -> Path:
+ override = os.environ.get("ENTRABOT_STATE_FILE", "").strip()
+ if not override:
+ return _DEFAULT_STATE_FILE
+ path = Path(override).expanduser()
+ if not path.is_absolute():
+ path = _DEFAULT_STATE_FILE.parent / path
+ return path
def _load_state() -> dict:
- if _STATE_FILE.is_file():
+ state_file = _state_file()
+ if state_file.is_file():
try:
- return json.loads(_STATE_FILE.read_text())
+ return json.loads(state_file.read_text())
except (json.JSONDecodeError, OSError):
return {}
return {}
def _save_state(state: dict) -> None:
- _STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
- _STATE_FILE.write_text(json.dumps(state, indent=2) + "\n")
+ state_file = _state_file()
+ state_file.parent.mkdir(parents=True, exist_ok=True)
+ state_file.write_text(json.dumps(state, indent=2) + "\n")
def get_state(key: str) -> str | None:
diff --git a/scripts/mxc-mac-stdin-compat.patch b/scripts/mxc-mac-stdin-compat.patch
new file mode 100644
index 0000000..dc80ca1
--- /dev/null
+++ b/scripts/mxc-mac-stdin-compat.patch
@@ -0,0 +1,52 @@
+diff --git a/src/core/mxc_darwin/src/main.rs b/src/core/mxc_darwin/src/main.rs
+index b0dea97..ed77214 100644
+--- a/src/core/mxc_darwin/src/main.rs
++++ b/src/core/mxc_darwin/src/main.rs
+@@ -10,10 +10,12 @@
+ //! use still requires macOS.
+
+ use std::fmt::Write;
++use std::io::Read;
+ use std::process;
+
+ use clap::Parser;
+ use wxc_common::config_parser::load_request;
++use wxc_common::encoding::base64_encode;
+ use wxc_common::logger::{Logger, Mode};
+ use wxc_common::models::{ContainmentBackend, ExecutionRequest};
+
+@@ -57,6 +59,17 @@ struct Cli {
+ log_file: Option,
+ }
+
++fn read_config_from_stdin() -> Result