From ad19684b7495c40478b8898c78419c7d0ae235bf Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Fri, 19 Jun 2026 15:30:09 -0700
Subject: [PATCH 1/5] Pin spec v0.63.1 for proposal 0072

Advance the spec submodule pin v0.62.0 -> v0.63.1 to absorb accepted
proposal 0072 (per-fetch cache_ttl_seconds prompt-cache control) at
v0.63.0 plus the v0.63.1 patch (pipeline-utilities coverage fixtures
070/071). Updates __spec_version__, the pyproject spec_version, the
smoke-test assertion, and regenerates the bundled AGENTS.md.

conformance.toml records 0072 as implemented; the v0.63.1 fixtures add
no proposal entry (coverage for the already-implemented 0069 / 0070).
---
 conformance.toml             | 7 +++++++
 openarmature-spec            | 2 +-
 pyproject.toml               | 2 +-
 src/openarmature/AGENTS.md   | 4 ++--
 src/openarmature/__init__.py | 2 +-
 tests/test_smoke.py          | 2 +-
 6 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/conformance.toml b/conformance.toml
index 5e10814..f9579f5 100644
--- a/conformance.toml
+++ b/conformance.toml
@@ -705,3 +705,10 @@ note = "The OTel observer synthesizes an openarmature.invocation span at the roo
 status = "partial"
 since = "0.15.0"
 note = "The Langfuse observer promotes a recognized userId caller-metadata key to the first-class trace.userId (additive: the key also stays in trace.metadata.userId), and sets trace.sessionId from openarmature.session_id when present. trace.userId is LIVE (sourced from 0034 caller metadata): fixture 084 cases 2/3/4 (not-session-bound, userId present additive, userId absent) pass. partial because trace.sessionId is DORMANT -- openarmature.session_id is established by the sessions capability (0020, observability §5.6), unimplemented in python until v0.19.0, so there is no session_id source yet; the trace(session_id=) plumbing is wired end to end but the observer passes None. Fixture 084 session-bound cases 1 + 5 are deferred (per-case) pending 0020. Langfuse-only: no OTel change (the OTel side already carries openarmature.session_id + openarmature.user.* as span attributes; no trace-level OTel equivalent)."
+
+# Spec v0.63.0 (proposal 0072).  Per-fetch cache_ttl_seconds read-side
+# control (prompt-management §5 / §6 + conformance-adapter §6.8).
+[proposals."0072"]
+status = "implemented"
+since = "0.15.0"
+note = "PromptBackend.fetch / PromptManager.fetch / get gain an optional cache_ttl_seconds read-side control (absent / None = current behavior; 0 = force a fresh read past any cache; N > 0 = bound a served entry's staleness to N seconds; negative is rejected). It governs only which cached entry MAY be served for this fetch, not whether / how the result is cached. python's bundled backends (filesystem, in-memory) are cacheless and treat it as a no-op; the manager threads it through the §9 fallback chain and rejects negatives. render is unchanged. The TTL semantics are exercised by a caching prompt-backend conformance-harness primitive (§6.8: caches by (name, label), source_read_count, advance_clock controllable clock); fixtures 033/034 pass. No production caching backend ships (per §5, cacheless backends no-op). The v0.63.1 pin also wires pipeline-utilities coverage fixtures 070/071 (already-implemented 0069/0070 behavior; no new proposal)."
diff --git a/openarmature-spec b/openarmature-spec
index 963504e..821099c 160000
--- a/openarmature-spec
+++ b/openarmature-spec
@@ -1 +1 @@
-Subproject commit 963504ede4fda5ae32cfd5b68331036536a1fefb
+Subproject commit 821099c976ad827608765279ce119c5ecb22c51e
diff --git a/pyproject.toml b/pyproject.toml
index 28e0c43..be68913 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -63,7 +63,7 @@ Specification = "https://github.com/LunarCommand/openarmature-spec"
 openarmature = "openarmature.cli:main"
 
 [tool.openarmature]
-spec_version = "0.62.0"
+spec_version = "0.63.1"
 
 [dependency-groups]
 dev = [
diff --git a/src/openarmature/AGENTS.md b/src/openarmature/AGENTS.md
index 87fc502..c489303 100644
--- a/src/openarmature/AGENTS.md
+++ b/src/openarmature/AGENTS.md
@@ -1,6 +1,6 @@
 # OpenArmature — Agent documentation
 
-*This is the agent guide bundled with the openarmature Python package, version 0.14.0 (spec v0.62.0). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.*
+*This is the agent guide bundled with the openarmature Python package, version 0.14.0 (spec v0.63.1). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.*
 
 ## TL;DR
 
@@ -10,7 +10,7 @@ OpenArmature is a workflow framework for LLM pipelines and tool-calling agents:
 
 ## Capability contracts
 
-_Sourced from openarmature-spec v0.62.0. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md` verbatim — including additions from accepted proposals that this Python implementation may not yet ship. For per-proposal implementation status (implemented / partial / textual-only / not-yet), see the `conformance.toml` manifest at the repo root. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._
+_Sourced from openarmature-spec v0.63.1. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md` verbatim — including additions from accepted proposals that this Python implementation may not yet ship. For per-proposal implementation status (implemented / partial / textual-only / not-yet), see the `conformance.toml` manifest at the repo root. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._
 
 ### Capability: `graph-engine`
 
diff --git a/src/openarmature/__init__.py b/src/openarmature/__init__.py
index 7a41674..9b0ebb1 100644
--- a/src/openarmature/__init__.py
+++ b/src/openarmature/__init__.py
@@ -25,7 +25,7 @@
 """
 
 __version__ = "0.14.0"
-__spec_version__ = "0.62.0"
+__spec_version__ = "0.63.1"
 # Proposal 0052 (spec observability §5.1 / §8.4.1): canonical
 # package-registry name for this implementation. Surfaces on every
 # OTel invocation span as ``openarmature.implementation.name`` and on
diff --git a/tests/test_smoke.py b/tests/test_smoke.py
index e678ac7..e217083 100644
--- a/tests/test_smoke.py
+++ b/tests/test_smoke.py
@@ -9,7 +9,7 @@
 
 def test_package_versions() -> None:
     assert openarmature.__version__ == "0.14.0"
-    assert openarmature.__spec_version__ == "0.62.0"
+    assert openarmature.__spec_version__ == "0.63.1"
 
 
 def test_spec_version_matches_pyproject() -> None:

From fc87b1ada1126e4fff2d5545bedc43391fcd3e3e Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Fri, 19 Jun 2026 15:31:47 -0700
Subject: [PATCH 2/5] Add cache_ttl_seconds prompt cache control (0072)

PromptBackend.fetch, PromptManager.fetch, and PromptManager.get gain an
optional cache_ttl_seconds read-side control: None preserves current
behavior, 0 forces a fresh read past any client-side cache, and N > 0
bounds a served entry's staleness to N seconds; a negative value is
rejected at the manager. It governs only which cached entry may be
served, not whether or how results are cached.

The bundled filesystem backend is cacheless and ignores it; the langfuse
backend forwards it to the SDK's get_prompt cache. Every backend
implementation (mocks + example backends) accepts the new param.

Conformance: a caching prompt-backend harness primitive (source_read_count
plus a controllable advance_clock) drives fixtures 033/034; unit tests
cover the negative-value rejection and the langfuse forwarding.
---
 examples/chat-with-multimodal/main.py         |  4 +-
 examples/langfuse-observability/main.py       |  4 +-
 src/openarmature/prompts/backend.py           | 10 +++-
 .../prompts/backends/filesystem.py            |  7 ++-
 src/openarmature/prompts/backends/langfuse.py | 20 +++++--
 src/openarmature/prompts/manager.py           | 22 ++++++-
 .../conformance/harness/prompt_management.py  | 13 ++++-
 .../test_observability_langfuse.py            |  4 +-
 tests/conformance/test_prompt_management.py   | 58 +++++++++++++++++--
 tests/unit/test_prompts.py                    | 46 ++++++++++++---
 tests/unit/test_prompts_langfuse.py           | 20 ++++++-
 11 files changed, 178 insertions(+), 30 deletions(-)

diff --git a/examples/chat-with-multimodal/main.py b/examples/chat-with-multimodal/main.py
index 57c82c0..329ef31 100644
--- a/examples/chat-with-multimodal/main.py
+++ b/examples/chat-with-multimodal/main.py
@@ -274,7 +274,9 @@ class _NoFetchBackend:
     ``fetch()`` is never invoked.
     """
 
-    async def fetch(self, name: str, label: str = "production") -> Prompt:
+    async def fetch(
+        self, name: str, label: str = "production", *, cache_ttl_seconds: int | None = None
+    ) -> Prompt:
         raise NotImplementedError("example constructs prompts inline; fetch not used")
 
 
diff --git a/examples/langfuse-observability/main.py b/examples/langfuse-observability/main.py
index 56192e2..98fdc75 100644
--- a/examples/langfuse-observability/main.py
+++ b/examples/langfuse-observability/main.py
@@ -123,7 +123,9 @@ def __init__(self) -> None:
             },
         )
 
-    async def fetch(self, name: str, label: str = "production") -> Prompt:
+    async def fetch(
+        self, name: str, label: str = "production", *, cache_ttl_seconds: int | None = None
+    ) -> Prompt:
         if name != "mission-briefing":
             from openarmature.prompts import PromptNotFound
 
diff --git a/src/openarmature/prompts/backend.py b/src/openarmature/prompts/backend.py
index dfdc977..7362e0f 100644
--- a/src/openarmature/prompts/backend.py
+++ b/src/openarmature/prompts/backend.py
@@ -33,7 +33,9 @@ class PromptBackend(Protocol):
     original fetch time, not the cache hit time.
     """
 
-    async def fetch(self, name: str, label: str = "production") -> Prompt:
+    async def fetch(
+        self, name: str, label: str = "production", *, cache_ttl_seconds: int | None = None
+    ) -> Prompt:
         """Return the prompt registered as ``(name, label)``.
 
         ``label`` defaults to ``"production"``. Raises
@@ -41,5 +43,11 @@ async def fetch(self, name: str, label: str = "production") -> Prompt:
         ``PromptStoreUnavailable`` if the backing store is unreachable.
         The returned ``Prompt`` carries its raw template plus
         metadata; rendering is the manager's job, not the backend's.
+
+        ``cache_ttl_seconds`` is a read-side cache control: ``None``
+        preserves the backend's current behavior, ``0`` forces a fresh
+        read past any client-side cache, and ``N > 0`` bounds a served
+        cached entry's staleness to N seconds. Cacheless backends ignore
+        it; caching backends honor it.
         """
         ...
diff --git a/src/openarmature/prompts/backends/filesystem.py b/src/openarmature/prompts/backends/filesystem.py
index 8277056..7b5caa4 100644
--- a/src/openarmature/prompts/backends/filesystem.py
+++ b/src/openarmature/prompts/backends/filesystem.py
@@ -143,13 +143,18 @@ def _resolve_sampling(self, name: str, label: str) -> SamplingConfig | None:
             )
         return _sampling_from_dict(cast(dict[str, Any], raw))
 
-    async def fetch(self, name: str, label: str = "production") -> Prompt:
+    async def fetch(
+        self, name: str, label: str = "production", *, cache_ttl_seconds: int | None = None
+    ) -> Prompt:
         """Read the prompt template and (optionally) its sidecar sampling config.
 
         Returns a ``Prompt`` whose ``version`` is the leading 16 hex
         chars of the template's SHA-256 and ``template_hash`` is the
         full digest. Raises ``PromptNotFound`` when the template is
         missing and ``PromptStoreUnavailable`` on other I/O errors.
+
+        The filesystem backend is cacheless, so ``cache_ttl_seconds`` is
+        accepted for protocol conformance and ignored.
         """
         path = self._template_path(name, label)
         try:
diff --git a/src/openarmature/prompts/backends/langfuse.py b/src/openarmature/prompts/backends/langfuse.py
index 1832306..37e1efd 100644
--- a/src/openarmature/prompts/backends/langfuse.py
+++ b/src/openarmature/prompts/backends/langfuse.py
@@ -46,7 +46,9 @@ class LangfusePromptClient(Protocol):
     tests can supply a lightweight fake.
     """
 
-    def get_prompt(self, name: str, *, label: str = "production") -> TextPromptClient | ChatPromptClient: ...
+    def get_prompt(
+        self, name: str, *, label: str = "production", cache_ttl_seconds: int | None = None
+    ) -> TextPromptClient | ChatPromptClient: ...
 
 
 # Langfuse prompt `config` keys that line up with SamplingConfig's
@@ -89,10 +91,14 @@ class LangfusePromptBackend:
     def __init__(self, client: LangfusePromptClient) -> None:
         self._client = client
 
-    async def fetch(self, name: str, label: str = "production") -> Prompt:
+    async def fetch(
+        self, name: str, label: str = "production", *, cache_ttl_seconds: int | None = None
+    ) -> Prompt:
         # The Langfuse SDK's get_prompt is synchronous (and does its own
-        # client-side caching); run it off the event loop.
-        result = await asyncio.to_thread(self._get_prompt, name, label)
+        # client-side caching); run it off the event loop. The proposal
+        # 0072 cache_ttl_seconds control forwards to that SDK cache:
+        # None = SDK default, 0 = no cache (fresh), N = N-second bound.
+        result = await asyncio.to_thread(self._get_prompt, name, label, cache_ttl_seconds)
 
         if isinstance(result, ChatPromptClient):
             normalized = _normalized_langfuse_entries(result.prompt, name=name, label=label)
@@ -134,9 +140,11 @@ async def fetch(self, name: str, label: str = "production") -> Prompt:
             metadata=_metadata_from(result),
         )
 
-    def _get_prompt(self, name: str, label: str) -> TextPromptClient | ChatPromptClient:
+    def _get_prompt(
+        self, name: str, label: str, cache_ttl_seconds: int | None = None
+    ) -> TextPromptClient | ChatPromptClient:
         try:
-            return self._client.get_prompt(name, label=label)
+            return self._client.get_prompt(name, label=label, cache_ttl_seconds=cache_ttl_seconds)
         except NotFoundError as exc:
             raise PromptNotFound(
                 f"prompt ({name!r}, {label!r}) not found in Langfuse",
diff --git a/src/openarmature/prompts/manager.py b/src/openarmature/prompts/manager.py
index 9e5be7e..94f289b 100644
--- a/src/openarmature/prompts/manager.py
+++ b/src/openarmature/prompts/manager.py
@@ -110,7 +110,9 @@ def _resolve_label(self, label: str | None, name: str) -> str:
             return self._label_resolver.resolve(name)
         return SPEC_FALLBACK_LABEL
 
-    async def fetch(self, name: str, label: str | None = None) -> Prompt:
+    async def fetch(
+        self, name: str, label: str | None = None, *, cache_ttl_seconds: int | None = None
+    ) -> Prompt:
         """Consult composed backends in order, applying the fallback chain.
 
         Label is resolved by a three-step chain: explicit argument >
@@ -123,12 +125,23 @@ async def fetch(self, name: str, label: str | None = None) -> Prompt:
         - ``PromptStoreUnavailable`` from a backend continues to the
           next. After ALL backends are exhausted with unavailable
           failures, the manager raises ``PromptStoreUnavailable``.
+
+        ``cache_ttl_seconds`` is a read-side cache control forwarded to
+        each backend's ``fetch``: ``None`` keeps
+        current behavior, ``0`` forces a fresh read, ``N > 0`` bounds a
+        served entry's staleness to N seconds; a negative value is
+        rejected. Cacheless backends ignore it.
         """
+        if cache_ttl_seconds is not None and cache_ttl_seconds < 0:
+            raise ValueError(
+                f"cache_ttl_seconds must be >= 0 (got {cache_ttl_seconds!r}); "
+                "None preserves current behavior, 0 forces a fresh read"
+            )
         resolved_label = self._resolve_label(label, name)
         causes: list[BaseException] = []
         for backend in self._backends:
             try:
-                return await backend.fetch(name, resolved_label)
+                return await backend.fetch(name, resolved_label, cache_ttl_seconds=cache_ttl_seconds)
             except PromptNotFound:
                 raise
             except PromptStoreUnavailable as exc:
@@ -520,13 +533,16 @@ async def get(
         variables: Mapping[str, Any] | None = None,
         *,
         placeholders: Mapping[str, Sequence[Message]] | None = None,
+        cache_ttl_seconds: int | None = None,
     ) -> PromptResult:
         """Convenience equivalent to ``render(await fetch(name, label), variables)``.
 
         ``label`` follows the same three-step resolution as :meth:`fetch`.
         ``placeholders`` is forwarded to :meth:`render`.
+        ``cache_ttl_seconds`` is forwarded to :meth:`fetch` (the read-side
+        cache control).
         """
-        prompt = await self.fetch(name, label)
+        prompt = await self.fetch(name, label, cache_ttl_seconds=cache_ttl_seconds)
         return self.render(prompt, variables, placeholders=placeholders)
 
 
diff --git a/tests/conformance/harness/prompt_management.py b/tests/conformance/harness/prompt_management.py
index 94de30a..45c712a 100644
--- a/tests/conformance/harness/prompt_management.py
+++ b/tests/conformance/harness/prompt_management.py
@@ -60,6 +60,10 @@ class FixtureBackendSpec(_StrictModel):
     name: str
     prompts: list[FixturePromptSpec] = []
     simulate_unavailable: bool = False
+    # Proposal 0072 (conformance-adapter §6.8): when true, the backend is
+    # a caching primitive — it caches by (name, label), counts source
+    # reads, and honors cache_ttl_seconds via a controllable clock.
+    caching: bool = False
 
 
 class FixtureLabelResolverSpec(_StrictModel):
@@ -131,8 +135,12 @@ class FixtureCall(_StrictModel):
     # ``operation`` is required for fetch / render / get calls. The
     # ``construct_prompt_group`` shape uses the target as the operation
     # indicator (no separate operation field on the call).
-    operation: Literal["fetch", "render", "get"] | None = None
+    operation: Literal["fetch", "render", "get", "advance_clock"] | None = None
     name: str | None = None
+    # Proposal 0072: per-fetch read-side cache control, and the
+    # advance_clock control op's step (in seconds).
+    cache_ttl_seconds: int | None = None
+    seconds: int | None = None
     # `label` is optional per spec §6 v0.26.0: omitting it triggers
     # the configured LabelResolver (step 2) or the spec fallback
     # `"production"` (step 3). Distinct from ``label: null`` which
@@ -237,3 +245,6 @@ class PromptManagementFixture(_StrictModel):
     tertiary_manager: FixtureManagerSpec | None = None
     tertiary_calls: list[FixtureCall] = []
     expected: FixtureExpectedTopLevel | None = None
+    # Proposal 0072: per-backend end-state assertions (e.g.
+    # source_read_count) for the caching primitive.
+    expected_backend_state: dict[str, dict[str, Any]] | None = None
diff --git a/tests/conformance/test_observability_langfuse.py b/tests/conformance/test_observability_langfuse.py
index afe50cc..570359c 100644
--- a/tests/conformance/test_observability_langfuse.py
+++ b/tests/conformance/test_observability_langfuse.py
@@ -376,7 +376,9 @@ def __init__(self, prompts: dict[str, dict[str, Any]], *, with_langfuse_referenc
                 observability_entities=observability_entities,
             )
 
-    async def fetch(self, name: str, label: str = "production") -> Prompt:
+    async def fetch(
+        self, name: str, label: str = "production", *, cache_ttl_seconds: int | None = None
+    ) -> Prompt:
         return self._prompts[name]
 
 
diff --git a/tests/conformance/test_prompt_management.py b/tests/conformance/test_prompt_management.py
index e917d0f..af47539 100644
--- a/tests/conformance/test_prompt_management.py
+++ b/tests/conformance/test_prompt_management.py
@@ -264,8 +264,22 @@ def __init__(self, spec: FixtureBackendSpec) -> None:
                     ),
                 )
         self.call_count = 0
-
-    async def fetch(self, name: str, label: str = "production") -> Prompt:
+        # Proposal 0072 (conformance-adapter §6.8) caching-primitive
+        # state. ``source_read_count`` counts only source reads (cache
+        # miss / bypass / staleness), distinct from ``call_count`` (every
+        # fetch). The clock is controllable via ``advance_clock``.
+        self._caching = spec.caching
+        self._clock_seconds = 0
+        self._cache_entry_time: dict[tuple[str, str], int] = {}
+        self.source_read_count = 0
+
+    def advance_clock(self, seconds: int) -> None:
+        """Advance the controllable clock."""
+        self._clock_seconds += seconds
+
+    async def fetch(
+        self, name: str, label: str = "production", *, cache_ttl_seconds: int | None = None
+    ) -> Prompt:
         self.call_count += 1
         if self._simulate_unavailable:
             raise PromptStoreUnavailable(
@@ -279,8 +293,24 @@ async def fetch(self, name: str, label: str = "production") -> Prompt:
                 label=label,
                 backend=self.name,
             )
+        if self._caching and not self._serves_from_cache(key, cache_ttl_seconds):
+            # Source read: count it and (re)stamp the cache entry's age.
+            self.source_read_count += 1
+            self._cache_entry_time[key] = self._clock_seconds
         return self._prompts[key]
 
+    def _serves_from_cache(self, key: tuple[str, str], cache_ttl_seconds: int | None) -> bool:
+        # Proposal 0072 read-side control: 0 bypasses the cache (always a
+        # source read); a missing entry is a source read; None serves any
+        # cached entry; N > 0 serves only while the entry is younger than
+        # N seconds (else a fresh source read).
+        if cache_ttl_seconds == 0 or key not in self._cache_entry_time:
+            return False
+        if cache_ttl_seconds is None:
+            return True
+        age = self._clock_seconds - self._cache_entry_time[key]
+        return age < cache_ttl_seconds
+
 
 # ---------------------------------------------------------------------------
 # Fixture runner
@@ -362,7 +392,12 @@ async def _run_call(
         backend = backends[target.backend]
         if operation == "fetch":
             assert call.name is not None and call.label is not None
-            return await backend.fetch(call.name, call.label), None
+            return await backend.fetch(call.name, call.label, cache_ttl_seconds=call.cache_ttl_seconds), None
+        if operation == "advance_clock":
+            # Proposal 0072 §6.8: advance the caching backend's clock.
+            assert call.seconds is not None
+            backend.advance_clock(call.seconds)
+            return None, None
         raise AssertionError(f"unsupported backend operation: {operation!r}")
     except PromptError as exc:
         return None, exc
@@ -600,9 +635,11 @@ async def test_prompt_management_fixture(fixture_path: Path) -> None:
         (fixture.tertiary_manager, fixture.tertiary_calls),
     ]
     for manager_spec, manager_calls in manager_pairs:
-        if manager_spec is None:
-            continue
-        manager = _build_manager(manager_spec, backends, resolvers_map)
+        # ``manager_spec is None`` with direct-backend calls is the
+        # proposal 0072 fixtures 033/034 shape (no manager; calls target
+        # backends directly). Build a manager only when one is declared;
+        # direct-backend calls don't need it.
+        manager = _build_manager(manager_spec, backends, resolvers_map) if manager_spec is not None else None
         for call in manager_calls:
             result, raised = await _run_call(call, backends, manager, captures)
             _assert_per_call(call, result, raised, backends)
@@ -639,6 +676,15 @@ async def test_prompt_management_fixture(fixture_path: Path) -> None:
             if case_fixture.expected is not None:
                 _apply_top_level_expected(case_fixture.expected, captures)
 
+    # Proposal 0072: per-backend end-state (e.g. source_read_count from
+    # the caching primitive).
+    if fixture.expected_backend_state is not None:
+        for backend_name, state in fixture.expected_backend_state.items():
+            backend = backends[backend_name]
+            for attr, want in state.items():
+                got = getattr(backend, attr)
+                assert got == want, f"backend {backend_name!r} {attr}: got {got!r}, expected {want!r}"
+
     if fixture.expected is None:
         return
 
diff --git a/tests/unit/test_prompts.py b/tests/unit/test_prompts.py
index 24e601d..0e2a33f 100644
--- a/tests/unit/test_prompts.py
+++ b/tests/unit/test_prompts.py
@@ -222,7 +222,9 @@ def test_render_empty_string_output_maps_to_prompt_render_error() -> None:
     prompt = _make_prompt(template="{{ x if x else '' }}")
 
     class _NullBackend:
-        async def fetch(self, name: str, label: str = "production") -> Prompt:
+        async def fetch(
+            self, name: str, label: str = "production", *, cache_ttl_seconds: int | None = None
+        ) -> Prompt:
             return prompt
 
     manager = PromptManager(_NullBackend())
@@ -236,7 +238,9 @@ def test_render_propagates_identity_fields() -> None:
     prompt = _make_prompt()
 
     class _Backend:
-        async def fetch(self, name: str, label: str = "production") -> Prompt:
+        async def fetch(
+            self, name: str, label: str = "production", *, cache_ttl_seconds: int | None = None
+        ) -> Prompt:
             return prompt
 
     manager = PromptManager(_Backend())
@@ -250,6 +254,20 @@ async def fetch(self, name: str, label: str = "production") -> Prompt:
     assert len(result.messages) == 1
 
 
+async def test_fetch_rejects_negative_cache_ttl_seconds() -> None:
+    prompt = _make_prompt()
+
+    class _Backend:
+        async def fetch(
+            self, name: str, label: str = "production", *, cache_ttl_seconds: int | None = None
+        ) -> Prompt:
+            return prompt
+
+    manager = PromptManager(_Backend())
+    with pytest.raises(ValueError, match="cache_ttl_seconds must be >= 0"):
+        await manager.fetch("greeting", "production", cache_ttl_seconds=-1)
+
+
 # ---------------------------------------------------------------------------
 # FilesystemPromptBackend
 # ---------------------------------------------------------------------------
@@ -374,7 +392,9 @@ class _Hit:
         def __init__(self) -> None:
             self.calls = 0
 
-        async def fetch(self, name: str, label: str = "production") -> Prompt:
+        async def fetch(
+            self, name: str, label: str = "production", *, cache_ttl_seconds: int | None = None
+        ) -> Prompt:
             self.calls += 1
             return prompt
 
@@ -382,7 +402,9 @@ class _Second:
         def __init__(self) -> None:
             self.calls = 0
 
-        async def fetch(self, name: str, label: str = "production") -> Prompt:
+        async def fetch(
+            self, name: str, label: str = "production", *, cache_ttl_seconds: int | None = None
+        ) -> Prompt:
             self.calls += 1
             return prompt
 
@@ -398,7 +420,9 @@ def test_manager_render_caches_compiled_templates_by_hash() -> None:
     prompt = _make_prompt()
 
     class _Backend:
-        async def fetch(self, name: str, label: str = "production") -> Prompt:
+        async def fetch(
+            self, name: str, label: str = "production", *, cache_ttl_seconds: int | None = None
+        ) -> Prompt:
             return prompt
 
     manager = PromptManager(_Backend())
@@ -414,7 +438,9 @@ async def test_manager_render_signature_returns_user_message() -> None:
     prompt = _make_prompt()
 
     class _Backend:
-        async def fetch(self, name: str, label: str = "production") -> Prompt:
+        async def fetch(
+            self, name: str, label: str = "production", *, cache_ttl_seconds: int | None = None
+        ) -> Prompt:
             return prompt
 
     manager = PromptManager(_Backend())
@@ -672,7 +698,9 @@ async def test_chat_segment_template_cache_is_content_stable() -> None:
 
 
 class _DummyBackend:
-    async def fetch(self, name: str, label: str = "production") -> Any:
+    async def fetch(
+        self, name: str, label: str = "production", *, cache_ttl_seconds: int | None = None
+    ) -> Any:
         raise NotImplementedError
 
 
@@ -727,7 +755,9 @@ class _StubBackend:
     def __init__(self, prompt: Prompt) -> None:
         self._prompt = prompt
 
-    async def fetch(self, name: str, label: str = "production") -> Prompt:
+    async def fetch(
+        self, name: str, label: str = "production", *, cache_ttl_seconds: int | None = None
+    ) -> Prompt:
         return self._prompt
 
 
diff --git a/tests/unit/test_prompts_langfuse.py b/tests/unit/test_prompts_langfuse.py
index 22d0c42..d3021dd 100644
--- a/tests/unit/test_prompts_langfuse.py
+++ b/tests/unit/test_prompts_langfuse.py
@@ -70,9 +70,13 @@ def __init__(self, *, result: Any = None, exc: BaseException | None = None) -> N
         self._result = result
         self._exc = exc
         self.calls: list[tuple[str, str]] = []
+        self.last_cache_ttl_seconds: int | None = None
 
-    def get_prompt(self, name: str, *, label: str = "production", **_: Any) -> Any:
+    def get_prompt(
+        self, name: str, *, label: str = "production", cache_ttl_seconds: int | None = None, **_: Any
+    ) -> Any:
         self.calls.append((name, label))
+        self.last_cache_ttl_seconds = cache_ttl_seconds
         if self._exc is not None:
             raise self._exc
         return self._result
@@ -108,6 +112,20 @@ async def test_fetch_passes_label_through() -> None:
     assert fake.calls == [("greeting", "staging")]
 
 
+async def test_fetch_forwards_cache_ttl_seconds_to_sdk() -> None:
+    # The langfuse backend forwards the read-side cache control to the
+    # SDK's own get_prompt cache; an absent value forwards None (the
+    # SDK's default TTL), preserving prior behavior.
+    fake = _FakeClient(result=_text_client())
+    backend = LangfusePromptBackend(fake)
+
+    await backend.fetch("greeting", "production", cache_ttl_seconds=42)
+    assert fake.last_cache_ttl_seconds == 42
+
+    await backend.fetch("greeting", "production")
+    assert fake.last_cache_ttl_seconds is None
+
+
 async def test_chat_prompt_maps_to_chat_prompt() -> None:
     # Per proposal 0046 (v0.38.0): Langfuse chat prompts now map to
     # a ChatPrompt with one ContentSegment per Langfuse chat message

From 88e31fc238f49c76455a6d2ff7cddeee2255dd85 Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Fri, 19 Jun 2026 15:32:12 -0700
Subject: [PATCH 3/5] Wire pipeline-utilities patch fixtures 070/071

Wire the v0.63.1 coverage fixtures into the conformance runners: 071
(fan-out degrade strict-reducer-raise) joins the failure-isolation set
in the pipeline-utilities runner, and 070 (crash-injection after_node
resume) joins the checkpoint runner alongside 067. python's behavior is
already conformant (unit-tested in v0.14.0); this is harness selection.
---
 tests/conformance/test_checkpoint.py         | 4 +++-
 tests/conformance/test_pipeline_utilities.py | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/conformance/test_checkpoint.py b/tests/conformance/test_checkpoint.py
index 9b6861b..672ac25 100644
--- a/tests/conformance/test_checkpoint.py
+++ b/tests/conformance/test_checkpoint.py
@@ -72,8 +72,10 @@
 # 069 (fan-out degrade refinements, proposal 0069, v0.59.0) is a mixed
 # fixture: this runner drives its crash_injection/resume case and skips the
 # plain FI-degrade cases (owned by test_pipeline_utilities.py).
+# 070 (crash-injection after_node resume, proposal 0070 coverage, spec
+# v0.63.1) is a crash/resume fixture this runner owns, alongside 067.
 _CHECKPOINT_FIXTURE_NUMBERS: frozenset[int] = frozenset(
-    (set(range(24, 32)) - {28}) | set(range(48, 57)) | {67, 69}
+    (set(range(24, 32)) - {28}) | set(range(48, 57)) | {67, 69, 70}
 )
 
 # Fixtures that need resume-aware test seams the conformance adapter
diff --git a/tests/conformance/test_pipeline_utilities.py b/tests/conformance/test_pipeline_utilities.py
index 3f697fb..47ef155 100644
--- a/tests/conformance/test_pipeline_utilities.py
+++ b/tests/conformance/test_pipeline_utilities.py
@@ -93,7 +93,9 @@ def _load(path: Path) -> dict[str, Any]:
 # at v0.58.0; 069 (fan-out degrade refinements, 0069) at v0.59.0 — this runner
 # drives its FI-degrade cases and skips its crash_injection/resume case (owned
 # by test_checkpoint.py, which also owns fixture 067, hence the gap at 67).
-_FAILURE_ISOLATION_FIXTURES = frozenset(range(58, 67)) | {68, 69}
+# 071 (fan-out degrade strict-reducer-raise, proposal 0069 coverage,
+# spec v0.63.1) is an FI-degrade fixture this runner drives.
+_FAILURE_ISOLATION_FIXTURES = frozenset(range(58, 67)) | {68, 69, 71}
 
 
 def _fixture_paths() -> list[Path]:

From fa6c3d51cbc2df256f74d2ac6d6314efef99443e Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Fri, 19 Jun 2026 15:32:33 -0700
Subject: [PATCH 4/5] Document 0072 prompt cache control and changelog

Document the cache_ttl_seconds read-side control in the prompt-management
concepts page, and add the 0.15.0 changelog entry plus advance the
cycle's spec-pin bullet to v0.63.1 (proposal 0072 + the 070/071 patch).
---
 CHANGELOG.md             |  3 ++-
 docs/concepts/prompts.md | 26 ++++++++++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index de7964f..75ade74 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,10 +11,11 @@ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). The
 - **Detached-trace invocation span** (proposal 0061, observability §4.4, spec v0.61.0). The OTel observer now synthesizes an `openarmature.invocation` span at the root of each detached trace (a detached subgraph and each detached fan-out instance), carrying the parent's shared `invocation_id` (detached mode is observer-side trace rendering, not a new run) and the detached unit's own `entry_node`; the detached subgraph / instance span nests under it. A raising detached subgraph surfaces ERROR plus the error category and an OTel exception event on both the parent dispatch span and the detached invocation span. This is observer-side only, with no graph-engine change; the Langfuse observer is unchanged (its Trace entity already plays the invocation-level-container role). Conformance fixtures 008 (rewritten) and 058 (newly wired) run in `test_observability`.
 - **Per-attempt LLM spans under call-level retry** (proposal 0050, observability §5.5 / llm-provider §7.1). Completes proposal 0050, which shipped `partial` in v0.14.0 (failure-isolation middleware and the `complete(retry=...)` loop landed then; the per-attempt span surface was deferred). Under call-level retry the OTel observer now emits one `openarmature.llm.complete` span per attempt, each carrying `openarmature.llm.attempt_index` (0-based, 0..N-1, and 0 for a no-retry call). An intermediate failed attempt's span carries ERROR status plus its error category and the request-side attributes; the final attempt's span carries the terminal outcome and, on success, the full response surface. A python-internal `LlmRetryAttemptEvent`, dispatched once per attempt, is the sole source of the OTel span; the terminal `LlmCompletionEvent` / `LlmFailedEvent` stay one per call (payload, latency, Langfuse Generation) and no longer drive the OTel span. Langfuse renders one terminal Generation per call, with the per-attempt detail on the OTel span surface only (a spec-side §8 clarification to pin this is tracked, non-blocking). `conformance.toml` flips proposal 0050 to `implemented`; the call-level fixtures 056-058 are driven through the provider plus OTel observer and the single-attempt observability fixture 057 is wired.
 - **Langfuse `trace.userId` / `trace.sessionId` population** (proposal 0064, observability §8.4.1, spec v0.62.0). The Langfuse observer now promotes a recognized `userId` key in the caller-supplied invocation metadata to Langfuse's first-class `trace.userId` field (the Users dashboard), additively: the key also remains at `trace.metadata.userId`. Promotion is automatic and unconditional; an absent key leaves `trace.userId` unset. The `LangfuseClient.trace()` surface (the Protocol, the in-memory client, and the SDK adapter) gains `session_id` / `user_id`. `trace.sessionId` is sourced from `openarmature.session_id`, which the sessions capability (proposal 0020) establishes; that capability is not yet implemented in python, so the `sessionId` plumbing is in place but dormant (no source) and unset in the interim. `conformance.toml` records proposal 0064 `partial` on that basis: fixture 084 cases 2/3/4 (not session-bound, `userId` present additively, `userId` absent) run, and the session-bound cases 1/5 defer until 0020. Langfuse-only: the OTel side already carries `openarmature.session_id` and `openarmature.user.*` as span attributes, and OTel has no trace-level session/user field.
+- **Per-fetch prompt cache control: `cache_ttl_seconds`** (proposal 0072, prompt-management §5 / §6, spec v0.63.0). `PromptBackend.fetch`, `PromptManager.fetch`, and `PromptManager.get` gain an optional `cache_ttl_seconds` read-side control: `None` preserves current behavior, `0` forces a fresh read past any client-side cache, and `N > 0` bounds a served entry's staleness to N seconds; a negative value is rejected at the manager. It governs only which cached entry may be served, not whether or how results are cached. The bundled filesystem backend is cacheless and ignores it; the bundled Langfuse backend forwards it to the Langfuse SDK's `get_prompt` cache. Conformance fixtures 033/034 run through a caching harness backend (conformance-adapter §6.8: `source_read_count` plus a controllable `advance_clock`).
 
 ### Changed
 
-- **Pinned spec advances v0.60.0 → v0.62.0** across the v0.15.0 cycle: v0.61.0 (proposal 0061, the detached-trace invocation span above) and v0.62.0 (proposal 0064, the Langfuse session/user population above). `conformance.toml` records 0061 `implemented` and 0064 `partial` (its `sessionId` half is dormant pending the sessions capability). Proposal 0050 needed no pin bump of its own (it was already within the pin from its v0.42.0 acceptance); its v0.14.0 `partial` entry flips to `implemented` with the per-attempt span surface above.
+- **Pinned spec advances v0.60.0 → v0.63.1** across the v0.15.0 cycle: v0.61.0 (proposal 0061, the detached-trace invocation span above), v0.62.0 (proposal 0064, the Langfuse session/user population above), v0.63.0 (proposal 0072, the prompt cache control above), and the v0.63.1 patch (pipeline-utilities coverage fixtures 070/071 for the already-implemented 0069 / 0070 behavior, no new proposal). `conformance.toml` records 0061 / 0072 `implemented` and 0064 `partial` (its `sessionId` half is dormant pending the sessions capability). Proposal 0050 needed no pin bump of its own (it was already within the pin from its v0.42.0 acceptance); its v0.14.0 `partial` entry flips to `implemented` with the per-attempt span surface above.
 
 ## [0.14.0] — 2026-06-17
 
diff --git a/docs/concepts/prompts.md b/docs/concepts/prompts.md
index 08f87b0..6bc2d5f 100644
--- a/docs/concepts/prompts.md
+++ b/docs/concepts/prompts.md
@@ -47,6 +47,32 @@ Why two operations instead of one? Three reasons:
 The convenience `get()` operation gives you the single-call
 shape when you want it without removing the separability.
 
+## Refreshing cached prompts: `cache_ttl_seconds`
+
+`fetch` and `get` take an optional `cache_ttl_seconds` that controls how
+fresh a served prompt must be, for backends that maintain a client-side
+cache:
+
+- omitted / `None` keeps the backend's current behavior;
+- `0` forces a fresh read past any cache;
+- `N > 0` serves a cached entry only while it is younger than N seconds,
+  re-reading the source once it ages past N.
+
+A negative value is rejected. It is a read-side control: it governs which
+cached entry may be served for this fetch, not whether or how results are
+cached. Cacheless backends (the bundled filesystem backend) ignore it; the
+bundled Langfuse backend forwards it to the Langfuse SDK's own prompt cache.
+
+```python
+# Always re-read from the backend, bypassing any cache:
+fresh = await manager.fetch("greeting", "production", cache_ttl_seconds=0)
+
+# Serve a cached entry only if it's under five minutes old:
+recent = await manager.get(
+    "greeting", "production", {"user": "Alice"}, cache_ttl_seconds=300
+)
+```
+
 ## Prompt identity
 
 Every `Prompt` carries five identity fields:

From 0f3944c11e74ab3bd631a6d3840a623f0e371def Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Fri, 19 Jun 2026 15:55:42 -0700
Subject: [PATCH 5/5] Forward cache_ttl_seconds on manager-target fixture calls

PR #172 review: _run_call's manager-target fetch/get dropped
call.cache_ttl_seconds, so target: manager fixtures couldn't exercise
the manager's threading (033/034 use direct-backend targets, so they
were unaffected). Forward the control on both, matching the
direct-backend path.
---
 tests/conformance/test_prompt_management.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/conformance/test_prompt_management.py b/tests/conformance/test_prompt_management.py
index af47539..bb91f6b 100644
--- a/tests/conformance/test_prompt_management.py
+++ b/tests/conformance/test_prompt_management.py
@@ -347,7 +347,10 @@ async def _run_call(
             assert manager is not None
             if operation == "fetch":
                 assert call.name is not None
-                return await manager.fetch(call.name, call.label), None
+                return (
+                    await manager.fetch(call.name, call.label, cache_ttl_seconds=call.cache_ttl_seconds),
+                    None,
+                )
             if operation == "render":
                 # Either inline fetched_prompt or a ref to a capture.
                 if call.fetched_prompt_ref is not None:
@@ -382,6 +385,7 @@ async def _run_call(
                         call.label,
                         call.variables or {},
                         placeholders=placeholders,
+                        cache_ttl_seconds=call.cache_ttl_seconds,
                     ),
                     None,
                 )