From 7fe6fdd972efa5bb1bf8a1900ffbcf3160f871d6 Mon Sep 17 00:00:00 2001
From: Yu Yi <yiyu@google.com>
Date: Tue, 9 Jun 2026 19:06:59 -0400
Subject: [PATCH] =?UTF-8?q?loop:=20graduated=20guardrails=20=E2=80=94=20id?=
 =?UTF-8?q?entical-call=20and=20consecutive-mistake=20detectors=20(closes?=
 =?UTF-8?q?=20#344)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two detectors watch every tool round, on by default with opt-out via
RunRequest.Guardrails (zero value = defaults, mirroring RetryPolicy):

- Identical-call: a single-call round whose name+canonicalized-args
  hash matches the previous round's extends a streak. At 3 the loop
  injects a corrective user message ("it will keep producing the same
  result — change arguments or approach"); at 5 the run ends with the
  typed ErrRepeatedToolCalls. Any change of arguments resets.
- Consecutive mistakes: rounds where every tool result IsError. At 3 a
  corrective message ("re-read the error messages before acting");
  at 6 the run ends with ErrTooManyMistakes. Any success resets.

Injected messages are marked glue/guardrail in metadata;
EventGuardrail reports kind/count/action so UIs and the goal loop can
surface them. Graduated nudge-then-halt policy per Gemini CLI, with
Cline's thresholds (docs/coding-harness-roadmap.md P2.8).

The "no tool used" reprompt from the roadmap is deliberately omitted:
glue turns legitimately end without tool calls, and the narrated-stall
case is covered by AutoContinue (#343).

TestRunMaxTurnsDefaultIs32 now disables guardrails — it scripts 50
identical calls to test the turn budget, which the repeat detector
would correctly halt first.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 CHANGELOG.md            |  15 +++
 loop/guardrails.go      | 195 +++++++++++++++++++++++++++++++++++++
 loop/guardrails_test.go | 209 ++++++++++++++++++++++++++++++++++++++++
 loop/run.go             |  21 ++++
 loop/run_test.go        |   3 +
 5 files changed, 443 insertions(+)
 create mode 100644 loop/guardrails.go
 create mode 100644 loop/guardrails_test.go

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bdfc852..b1df2b5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,21 @@ and [`agents/peggy/CHANGELOG.md`](agents/peggy/CHANGELOG.md).
 
 ## Unreleased
 
+- **Loop guardrails (`loop`).** Two graduated detectors now watch every
+  tool round, on by default (`RunRequest.Guardrails`, zero value =
+  defaults; `Disabled` opts out): repeating the **same tool call with
+  identical arguments** draws a corrective user message at 3
+  consecutive occurrences and ends the run with the typed
+  `ErrRepeatedToolCalls` at 5; **consecutive all-error tool rounds**
+  draw a corrective message at 3 and end the run with
+  `ErrTooManyMistakes` at 6. Streaks reset on any change of arguments
+  or any successful result; injected messages are marked
+  `glue/guardrail` in metadata and `EventGuardrail` reports
+  kind/count/action. These are the failure shapes that waste tokens
+  fastest on open-weight models — and unattended `glue goal` runs
+  ([docs/coding-harness-roadmap.md](docs/coding-harness-roadmap.md)
+  P2.8). Closes #344.
+
 - **Next-speaker stall recovery (`loop`, `glue`, `cmd/glue`).** Gemini's
   classic stall — narrating "I will now update the file." and stopping
   without calling a tool — is now recovered: with the new opt-in
diff --git a/loop/guardrails.go b/loop/guardrails.go
new file mode 100644
index 0000000..792fab0
--- /dev/null
+++ b/loop/guardrails.go
@@ -0,0 +1,195 @@
+package loop
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"errors"
+	"fmt"
+	"strings"
+)
+
+// Guardrails detect two pathological loop shapes that waste tokens
+// fastest on open-weight models: repeating the same tool call with
+// identical arguments, and burning turn after turn on nothing but
+// failing tool calls. The response is graduated (Gemini CLI's policy):
+// first inject a corrective user message, only halt when the model
+// ignores it.
+
+// GuardrailPolicy bounds the loop guardrails. The zero value enables
+// them with defaults; set Disabled to turn them off.
+type GuardrailPolicy struct {
+	// Disabled turns the guardrails off entirely.
+	Disabled bool
+
+	// RepeatNudge is the number of consecutive identical tool calls
+	// (same name, same canonical arguments) that triggers a corrective
+	// user message. Zero means DefaultRepeatNudge.
+	RepeatNudge int
+
+	// RepeatHalt is the number of consecutive identical calls that
+	// ends the run with [ErrRepeatedToolCalls]. Zero means
+	// DefaultRepeatHalt.
+	RepeatHalt int
+
+	// MistakeNudge is the number of consecutive all-error tool rounds
+	// that triggers a corrective user message. Zero means
+	// DefaultMistakeNudge.
+	MistakeNudge int
+
+	// MistakeHalt is the number of consecutive all-error tool rounds
+	// that ends the run with [ErrTooManyMistakes]. Zero means
+	// DefaultMistakeHalt.
+	MistakeHalt int
+}
+
+// Guardrail defaults, matching the thresholds Cline and Gemini CLI
+// converged on.
+const (
+	DefaultRepeatNudge  = 3
+	DefaultRepeatHalt   = 5
+	DefaultMistakeNudge = 3
+	DefaultMistakeHalt  = 6
+)
+
+// EventGuardrail is emitted when a guardrail injects a corrective
+// message or halts the run. Event.Error carries the reason;
+// Event.Metadata carries "kind" ("repeat" or "mistakes"), "count", and
+// "action" ("nudge" or "halt").
+const EventGuardrail EventType = "guardrail"
+
+// ErrRepeatedToolCalls ends a run after the model kept issuing the
+// same tool call with identical arguments despite a corrective nudge.
+var ErrRepeatedToolCalls = errors.New("loop: model repeated the same tool call past the guardrail limit")
+
+// ErrTooManyMistakes ends a run after every tool call failed for
+// several consecutive rounds despite a corrective nudge.
+var ErrTooManyMistakes = errors.New("loop: too many consecutive failed tool rounds")
+
+const repeatNudgeMessage = "You have repeated the exact same tool call several times. It will keep producing the same result. Step back, reconsider the approach, and either change the arguments or use a different tool."
+
+const mistakeNudgeMessage = "Your last several tool calls all failed. Stop and re-read the error messages carefully before acting again — they describe what to fix. If the same approach keeps failing, try a different one."
+
+func (p GuardrailPolicy) withDefaults() GuardrailPolicy {
+	if p.RepeatNudge <= 0 {
+		p.RepeatNudge = DefaultRepeatNudge
+	}
+	if p.RepeatHalt <= 0 {
+		p.RepeatHalt = DefaultRepeatHalt
+	}
+	if p.MistakeNudge <= 0 {
+		p.MistakeNudge = DefaultMistakeNudge
+	}
+	if p.MistakeHalt <= 0 {
+		p.MistakeHalt = DefaultMistakeHalt
+	}
+	return p
+}
+
+type guardAction int
+
+const (
+	guardOK guardAction = iota
+	guardNudge
+	guardHalt
+)
+
+type guardVerdict struct {
+	action  guardAction
+	kind    string // "repeat" | "mistakes"
+	count   int
+	message string // nudge text
+	err     error  // halt error
+}
+
+// guardState tracks consecutive identical calls and consecutive
+// all-error tool rounds across the turns of one Run.
+type guardState struct {
+	policy GuardrailPolicy
+
+	lastCallKey  string
+	repeatCount  int
+	repeatNudged bool
+
+	mistakeRounds int
+	mistakeNudged bool
+}
+
+func newGuardState(policy GuardrailPolicy) *guardState {
+	return &guardState{policy: policy.withDefaults()}
+}
+
+// observe inspects one tool round (the calls of an assistant turn and
+// their results) and returns the action to take.
+func (g *guardState) observe(calls []ToolCall, results []Message) guardVerdict {
+	if g.policy.Disabled {
+		return guardVerdict{action: guardOK}
+	}
+
+	// Identical-call tracking: a round consisting of exactly one call
+	// whose name+arguments hash matches the previous round's extends
+	// the streak; anything else resets it.
+	if len(calls) == 1 {
+		key := toolCallKey(calls[0])
+		if key == g.lastCallKey {
+			g.repeatCount++
+		} else {
+			g.lastCallKey = key
+			g.repeatCount = 1
+			g.repeatNudged = false
+		}
+	} else {
+		g.lastCallKey = ""
+		g.repeatCount = 0
+		g.repeatNudged = false
+	}
+
+	// Mistake tracking: a round where every result is an error extends
+	// the streak; any success resets it.
+	allErrors := len(results) > 0
+	for _, m := range results {
+		if !m.IsError {
+			allErrors = false
+			break
+		}
+	}
+	if allErrors {
+		g.mistakeRounds++
+	} else {
+		g.mistakeRounds = 0
+		g.mistakeNudged = false
+	}
+
+	if g.repeatCount >= g.policy.RepeatHalt {
+		return guardVerdict{action: guardHalt, kind: "repeat", count: g.repeatCount, err: fmt.Errorf("%w (%d identical calls to %s)", ErrRepeatedToolCalls, g.repeatCount, calls[0].Name)}
+	}
+	if g.mistakeRounds >= g.policy.MistakeHalt {
+		return guardVerdict{action: guardHalt, kind: "mistakes", count: g.mistakeRounds, err: fmt.Errorf("%w (%d consecutive failed rounds)", ErrTooManyMistakes, g.mistakeRounds)}
+	}
+	if g.repeatCount >= g.policy.RepeatNudge && !g.repeatNudged {
+		g.repeatNudged = true
+		return guardVerdict{action: guardNudge, kind: "repeat", count: g.repeatCount, message: repeatNudgeMessage}
+	}
+	if g.mistakeRounds >= g.policy.MistakeNudge && !g.mistakeNudged {
+		g.mistakeNudged = true
+		return guardVerdict{action: guardNudge, kind: "mistakes", count: g.mistakeRounds, message: mistakeNudgeMessage}
+	}
+	return guardVerdict{action: guardOK}
+}
+
+// toolCallKey canonicalizes a call for identity comparison: name plus
+// a hash of the raw arguments with whitespace collapsed.
+func toolCallKey(call ToolCall) string {
+	args := strings.Join(strings.Fields(string(call.Arguments)), "")
+	sum := sha256.Sum256([]byte(call.Name + "\x00" + args))
+	return hex.EncodeToString(sum[:8])
+}
+
+// guardrailUserMessage is the injected corrective message, marked in
+// metadata so stores and UIs can identify it.
+func guardrailUserMessage(text, kind string) Message {
+	return Message{
+		Role:     MessageRoleUser,
+		Content:  []ContentPart{{Type: ContentTypeText, Text: text}},
+		Metadata: map[string]any{"glue/guardrail": kind},
+	}
+}
diff --git a/loop/guardrails_test.go b/loop/guardrails_test.go
new file mode 100644
index 0000000..fe70f33
--- /dev/null
+++ b/loop/guardrails_test.go
@@ -0,0 +1,209 @@
+package loop
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"testing"
+)
+
+// guardTool is a scriptable tool whose executor returns errors or
+// successes per call.
+func guardTool(name string, results []bool) Tool {
+	i := 0
+	return Tool{
+		ToolSpec: ToolSpec{Name: name},
+		Execute: func(_ context.Context, _ ToolCall) (ToolResult, error) {
+			ok := true
+			if i < len(results) {
+				ok = results[i]
+			}
+			i++
+			if ok {
+				return ToolResult{Content: []ContentPart{{Type: ContentTypeText, Text: "fine"}}}, nil
+			}
+			return ToolResult{IsError: true, Content: []ContentPart{{Type: ContentTypeText, Text: "boom"}}}, nil
+		},
+	}
+}
+
+// callTurn scripts an assistant turn making one tool call.
+func callTurn(id, name, args string) []ProviderEvent {
+	return []ProviderEvent{
+		{Type: ProviderEventToolCall, ToolCall: &ToolCall{ID: id, Name: name, Arguments: json.RawMessage(args)}},
+		{Type: ProviderEventDone},
+	}
+}
+
+func TestGuardrailRepeatNudgeThenHalt(t *testing.T) {
+	t.Parallel()
+	// The model repeats the exact same call every turn, ignoring the nudge.
+	turns := [][]ProviderEvent{}
+	for i := 0; i < 6; i++ {
+		turns = append(turns, callTurn("c", "probe", `{"x":1}`))
+	}
+	provider := &fakeProvider{turns: turns}
+	var events []Event
+	_, err := Run(context.Background(), RunRequest{
+		Provider: provider,
+		Tools:    []Tool{guardTool("probe", nil)},
+		Messages: []Message{{Role: MessageRoleUser, Content: []ContentPart{{Type: ContentTypeText, Text: "go"}}}},
+		Emit: func(e Event) {
+			if e.Type == EventGuardrail {
+				events = append(events, e)
+			}
+		},
+	})
+	if !errors.Is(err, ErrRepeatedToolCalls) {
+		t.Fatalf("err = %v, want ErrRepeatedToolCalls", err)
+	}
+	if provider.calls != 5 {
+		t.Fatalf("provider calls = %d, want 5 (halt at the 5th identical call)", provider.calls)
+	}
+	if len(events) != 2 {
+		t.Fatalf("guardrail events = %d, want 2 (nudge then halt)", len(events))
+	}
+	if events[0].Metadata["action"] != "nudge" || events[1].Metadata["action"] != "halt" {
+		t.Fatalf("event actions = %v / %v", events[0].Metadata["action"], events[1].Metadata["action"])
+	}
+}
+
+func TestGuardrailRepeatResetsOnDifferentArgs(t *testing.T) {
+	t.Parallel()
+	turns := [][]ProviderEvent{
+		callTurn("a", "probe", `{"x":1}`),
+		callTurn("b", "probe", `{"x":2}`),
+		callTurn("c", "probe", `{"x":3}`),
+		callTurn("d", "probe", `{"x":4}`),
+		{{Type: ProviderEventTextDelta, Delta: "done"}, {Type: ProviderEventDone}},
+	}
+	provider := &fakeProvider{turns: turns}
+	res, err := Run(context.Background(), RunRequest{
+		Provider: provider,
+		Tools:    []Tool{guardTool("probe", nil)},
+		Messages: []Message{{Role: MessageRoleUser, Content: []ContentPart{{Type: ContentTypeText, Text: "go"}}}},
+	})
+	if err != nil {
+		t.Fatalf("Run: %v", err)
+	}
+	for _, m := range res.NewMessages {
+		if m.Metadata["glue/guardrail"] != nil {
+			t.Fatalf("unexpected guardrail message: %#v", m)
+		}
+	}
+}
+
+func TestGuardrailMistakesNudgeThenHalt(t *testing.T) {
+	t.Parallel()
+	// Different args each turn (no repeat trigger), but every result
+	// fails. Nudge after 3 failed rounds, halt after 6.
+	turns := [][]ProviderEvent{}
+	for i := 0; i < 7; i++ {
+		turns = append(turns, callTurn("c", "probe", `{"attempt":`+string(rune('0'+i))+`}`))
+	}
+	provider := &fakeProvider{turns: turns}
+	fails := make([]bool, 10) // all false = all errors
+	var nudges, halts int
+	_, err := Run(context.Background(), RunRequest{
+		Provider: provider,
+		Tools:    []Tool{guardTool("probe", fails)},
+		Messages: []Message{{Role: MessageRoleUser, Content: []ContentPart{{Type: ContentTypeText, Text: "go"}}}},
+		Emit: func(e Event) {
+			if e.Type == EventGuardrail {
+				switch e.Metadata["action"] {
+				case "nudge":
+					nudges++
+				case "halt":
+					halts++
+				}
+			}
+		},
+	})
+	if !errors.Is(err, ErrTooManyMistakes) {
+		t.Fatalf("err = %v, want ErrTooManyMistakes", err)
+	}
+	if provider.calls != 6 {
+		t.Fatalf("provider calls = %d, want 6", provider.calls)
+	}
+	if nudges != 1 || halts != 1 {
+		t.Fatalf("nudges = %d halts = %d, want 1/1", nudges, halts)
+	}
+}
+
+func TestGuardrailMistakesResetOnSuccess(t *testing.T) {
+	t.Parallel()
+	turns := [][]ProviderEvent{
+		callTurn("a", "probe", `{"x":1}`),
+		callTurn("b", "probe", `{"x":2}`),
+		callTurn("c", "probe", `{"x":3}`), // success resets the streak
+		callTurn("d", "probe", `{"x":4}`),
+		{{Type: ProviderEventTextDelta, Delta: "done"}, {Type: ProviderEventDone}},
+	}
+	provider := &fakeProvider{turns: turns}
+	res, err := Run(context.Background(), RunRequest{
+		Provider: provider,
+		Tools:    []Tool{guardTool("probe", []bool{false, false, true, false})},
+		Messages: []Message{{Role: MessageRoleUser, Content: []ContentPart{{Type: ContentTypeText, Text: "go"}}}},
+	})
+	if err != nil {
+		t.Fatalf("Run: %v", err)
+	}
+	for _, m := range res.NewMessages {
+		if m.Metadata["glue/guardrail"] != nil {
+			t.Fatalf("streak should have reset, got nudge: %#v", m)
+		}
+	}
+}
+
+func TestGuardrailDisabled(t *testing.T) {
+	t.Parallel()
+	turns := [][]ProviderEvent{}
+	for i := 0; i < 7; i++ {
+		turns = append(turns, callTurn("c", "probe", `{"x":1}`))
+	}
+	turns = append(turns, []ProviderEvent{{Type: ProviderEventTextDelta, Delta: "done"}, {Type: ProviderEventDone}})
+	provider := &fakeProvider{turns: turns}
+	_, err := Run(context.Background(), RunRequest{
+		Provider:   provider,
+		Tools:      []Tool{guardTool("probe", nil)},
+		Guardrails: GuardrailPolicy{Disabled: true},
+		Messages:   []Message{{Role: MessageRoleUser, Content: []ContentPart{{Type: ContentTypeText, Text: "go"}}}},
+	})
+	if err != nil {
+		t.Fatalf("Run: %v", err)
+	}
+	if provider.calls != 8 {
+		t.Fatalf("provider calls = %d, want 8 (no guardrails)", provider.calls)
+	}
+}
+
+func TestGuardrailNudgeMessageShape(t *testing.T) {
+	t.Parallel()
+	turns := [][]ProviderEvent{
+		callTurn("a", "probe", `{"x":1}`),
+		callTurn("b", "probe", `{"x":1}`),
+		callTurn("c", "probe", `{"x":1}`),
+		{{Type: ProviderEventTextDelta, Delta: "ok I'll stop"}, {Type: ProviderEventDone}},
+	}
+	provider := &fakeProvider{turns: turns}
+	res, err := Run(context.Background(), RunRequest{
+		Provider: provider,
+		Tools:    []Tool{guardTool("probe", nil)},
+		Messages: []Message{{Role: MessageRoleUser, Content: []ContentPart{{Type: ContentTypeText, Text: "go"}}}},
+	})
+	if err != nil {
+		t.Fatalf("Run: %v", err)
+	}
+	var nudge *Message
+	for i := range res.NewMessages {
+		if res.NewMessages[i].Metadata["glue/guardrail"] == "repeat" {
+			nudge = &res.NewMessages[i]
+		}
+	}
+	if nudge == nil {
+		t.Fatalf("no guardrail nudge in transcript: %#v", res.NewMessages)
+	}
+	if nudge.Role != MessageRoleUser {
+		t.Fatalf("nudge role = %s", nudge.Role)
+	}
+}
diff --git a/loop/run.go b/loop/run.go
index b19c661..8a9303b 100644
--- a/loop/run.go
+++ b/loop/run.go
@@ -49,6 +49,11 @@ type RunRequest struct {
 	// message (at most twice per run) instead of ending the turn.
 	// Recovers the classic Gemini narrate-then-stop stall.
 	AutoContinue bool
+
+	// Guardrails bounds the repeated-call and consecutive-mistake
+	// detectors. The zero value enables them with defaults; set
+	// Guardrails.Disabled to turn them off.
+	Guardrails GuardrailPolicy
 }
 
 // RunResult is returned by [Run]. Messages is the full transcript including
@@ -105,6 +110,7 @@ func Run(ctx context.Context, req RunRequest) (RunResult, error) {
 	lastAssistantMsg := -1
 	lastAssistantNew := -1
 	autoContinues := 0
+	guard := newGuardState(req.Guardrails)
 	for turn := 0; turn < maxTurns; turn++ {
 		if err := ctx.Err(); err != nil {
 			return fail(err)
@@ -147,6 +153,21 @@ func Run(ctx context.Context, req RunRequest) (RunResult, error) {
 		messages = append(messages, toolMessages...)
 		newMessages = append(newMessages, toolMessages...)
 
+		switch verdict := guard.observe(toolCalls, toolMessages); verdict.action {
+		case guardNudge:
+			nudge := guardrailUserMessage(verdict.message, verdict.kind)
+			messages = append(messages, nudge)
+			newMessages = append(newMessages, nudge)
+			emit(Event{Type: EventGuardrail, Error: verdict.message, Metadata: map[string]any{
+				"kind": verdict.kind, "count": verdict.count, "action": "nudge",
+			}})
+		case guardHalt:
+			emit(Event{Type: EventGuardrail, Error: verdict.err.Error(), Metadata: map[string]any{
+				"kind": verdict.kind, "count": verdict.count, "action": "halt",
+			}})
+			return fail(verdict.err)
+		}
+
 		emit(Event{Type: EventTurnEnd, Message: messagePtr(assistant)})
 	}
 
diff --git a/loop/run_test.go b/loop/run_test.go
index c907485..badde1d 100644
--- a/loop/run_test.go
+++ b/loop/run_test.go
@@ -271,6 +271,9 @@ func TestRunMaxTurnsDefaultIs32(t *testing.T) {
 	_, err := Run(context.Background(), RunRequest{
 		Provider: &fakeProvider{turns: turns},
 		Tools:    []Tool{noop},
+		// This test exercises the turn budget; the identical-call
+		// guardrail would halt these scripted repeats first.
+		Guardrails: GuardrailPolicy{Disabled: true},
 	})
 	if err == nil || !strings.Contains(err.Error(), "maximum turns exceeded (32)") {
 		t.Fatalf("err = %v, want default max turns 32", err)