openshift · enxebre · May 5, 2026
diff --git a/ci-operator/config/openshift/hypershift/openshift-hypershift-main.yaml b/ci-operator/config/openshift/hypershift/openshift-hypershift-main.yaml
@@ -130,6 +130,11 @@ resources:
       cpu: 100m
       memory: 200Mi
 tests:
+- always_run: false
+  as: eval-agents
+  optional: true
+  steps:
+    workflow: hypershift-eval-agents
 - always_run: false
   as: agentic-qe-aws
   optional: true

diff --git a/ci-operator/step-registry/hypershift/eval-agents/OWNERS b/ci-operator/step-registry/hypershift/eval-agents/OWNERS
@@ -0,0 +1,14 @@
+approvers:
+- bryan-cox
+- csrwng
+- celebdor
+- devguyio
+- enxebre
+- sjenning
+reviewers:
+- bryan-cox
+- csrwng
+- celebdor
+- devguyio
+- enxebre
+- sjenning
diff --git a/...erator/step-registry/hypershift/eval-agents/hypershift-eval-agents-workflow.metadata.json b/...erator/step-registry/hypershift/eval-agents/hypershift-eval-agents-workflow.metadata.json
@@ -0,0 +1,21 @@
+{
+	"path": "hypershift/eval-agents/hypershift-eval-agents-workflow.yaml",
+	"owners": {
+		"approvers": [
+			"bryan-cox",
+			"csrwng",
+			"celebdor",
+			"devguyio",
+			"enxebre",
+			"sjenning"
+		],
+		"reviewers": [
+			"bryan-cox",
+			"csrwng",
+			"celebdor",
+			"devguyio",
+			"enxebre",
+			"sjenning"
+		]
+	}
+}
diff --git a/ci-operator/step-registry/hypershift/eval-agents/hypershift-eval-agents-workflow.yaml b/ci-operator/step-registry/hypershift/eval-agents/hypershift-eval-agents-workflow.yaml
@@ -0,0 +1,18 @@
+workflow:
+  as: hypershift-eval-agents
+  steps:
+    pre:
+      - ref: hypershift-eval-agents-setup
+    test:
+      - ref: hypershift-eval-agents-run
+  documentation: |-
+    HyperShift eval-agents workflow for testing agent definitions.
+
+    This workflow:
+    1. Setup: Installs Node.js/npm, verifies Claude Code CLI
+    2. Run: Executes make eval-agents which runs promptfoo eval scenarios
+
+    Each scenario sends a prompt to a Claude Code agent, then uses an
+    LLM judge to check the output against expected criteria.
+
+    Configurable via env var: EVAL_FOCUS (promptfoo filter pattern).
diff --git a/ci-operator/step-registry/hypershift/eval-agents/run/OWNERS b/ci-operator/step-registry/hypershift/eval-agents/run/OWNERS
@@ -0,0 +1,14 @@
+approvers:
+- bryan-cox
+- csrwng
+- celebdor
+- devguyio
+- enxebre
+- sjenning
+reviewers:
+- bryan-cox
+- csrwng
+- celebdor
+- devguyio
+- enxebre
+- sjenning
diff --git a/ci-operator/step-registry/hypershift/eval-agents/run/hypershift-eval-agents-run-commands.sh b/ci-operator/step-registry/hypershift/eval-agents/run/hypershift-eval-agents-run-commands.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+set -euo pipefail
+
+echo "=== HyperShift Eval Agents ==="
+
+cd /go/src/github.com/openshift/hypershift
+
+MAKE_ARGS="EVAL_OUTPUT=${ARTIFACT_DIR}/results.xml"
+
+if [ -n "${EVAL_FOCUS:-}" ]; then
+    echo "Filter: ${EVAL_FOCUS}"
+    MAKE_ARGS="${MAKE_ARGS} EVAL_FILTER=${EVAL_FOCUS}"
+fi
+
+echo "Running eval-agents with promptfoo..."
+make eval-agents ${MAKE_ARGS}
+
+echo "=== Eval Agents Complete ==="
diff --git a/...tor/step-registry/hypershift/eval-agents/run/hypershift-eval-agents-run-ref.metadata.json b/...tor/step-registry/hypershift/eval-agents/run/hypershift-eval-agents-run-ref.metadata.json
@@ -0,0 +1,21 @@
+{
+	"path": "hypershift/eval-agents/run/hypershift-eval-agents-run-ref.yaml",
+	"owners": {
+		"approvers": [
+			"bryan-cox",
+			"csrwng",
+			"celebdor",
+			"devguyio",
+			"enxebre",
+			"sjenning"
+		],
+		"reviewers": [
+			"bryan-cox",
+			"csrwng",
+			"celebdor",
+			"devguyio",
+			"enxebre",
+			"sjenning"
+		]
+	}
+}
diff --git a/ci-operator/step-registry/hypershift/eval-agents/run/hypershift-eval-agents-run-ref.yaml b/ci-operator/step-registry/hypershift/eval-agents/run/hypershift-eval-agents-run-ref.yaml
@@ -0,0 +1,42 @@
+ref:
+  as: hypershift-eval-agents-run
+  from: claude-ai-helpers
+  commands: hypershift-eval-agents-run-commands.sh
+  env:
+  - name: CLAUDE_CODE_USE_VERTEX
+    default: "1"
+    documentation: |-
+      Enable Vertex AI for Claude Code.
+  - name: CLOUD_ML_REGION
+    default: "global"
+    documentation: |-
+      Google Cloud region for Vertex AI.
+  - name: ANTHROPIC_VERTEX_PROJECT_ID
+    default: "itpc-gcp-hybrid-pe-eng-claude"
+    documentation: |-
+      Google Cloud project ID for Vertex AI authentication.
+  - name: GOOGLE_APPLICATION_CREDENTIALS
+    default: "/var/run/claude-code-service-account/claude-prow"
+    documentation: |-
+      Path to the Google Cloud service account JSON key file.
+  - name: EVAL_FOCUS
+    default: ""
+    documentation: |-
+      Filter pattern to run a subset of eval scenarios.
+      Passed to promptfoo as --filter-pattern.
+  - name: MULTISTAGE_PARAM_OVERRIDE_EVAL_FOCUS
+    default: ""
+    documentation: |-
+      Gangway API override for EVAL_FOCUS.
+  resources:
+    requests:
+      cpu: 500m
+      memory: 1Gi
+  credentials:
+  - namespace: test-credentials
+    name: hypershift-team-claude-prow
+    mount_path: /var/run/claude-code-service-account
+  documentation: |-
+    Run step for the HyperShift eval-agents job.
+    Executes make eval-agents which runs promptfoo to evaluate
+    agent definitions using LLM judge assertions.
diff --git a/ci-operator/step-registry/hypershift/eval-agents/setup/OWNERS b/ci-operator/step-registry/hypershift/eval-agents/setup/OWNERS
@@ -0,0 +1,14 @@
+approvers:
+- bryan-cox
+- csrwng
+- celebdor
+- devguyio
+- enxebre
+- sjenning
+reviewers:
+- bryan-cox
+- csrwng
+- celebdor
+- devguyio
+- enxebre
+- sjenning
diff --git a/...rator/step-registry/hypershift/eval-agents/setup/hypershift-eval-agents-setup-commands.sh b/...rator/step-registry/hypershift/eval-agents/setup/hypershift-eval-agents-setup-commands.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+set -euo pipefail
+
+echo "=== HyperShift Eval Agents Setup ==="
+
+echo "Verifying Claude Code CLI..."
+claude --version || { echo "ERROR: Claude Code CLI not found"; exit 1; }
+
+echo "Installing Node.js and npm..."
+dnf install -y nodejs npm || yum install -y nodejs npm
+node --version
+npm --version
+
+echo "Setup complete"
diff --git a/...step-registry/hypershift/eval-agents/setup/hypershift-eval-agents-setup-ref.metadata.json b/...step-registry/hypershift/eval-agents/setup/hypershift-eval-agents-setup-ref.metadata.json
@@ -0,0 +1,21 @@
+{
+	"path": "hypershift/eval-agents/setup/hypershift-eval-agents-setup-ref.yaml",
+	"owners": {
+		"approvers": [
+			"bryan-cox",
+			"csrwng",
+			"celebdor",
+			"devguyio",
+			"enxebre",
+			"sjenning"
+		],
+		"reviewers": [
+			"bryan-cox",
+			"csrwng",
+			"celebdor",
+			"devguyio",
+			"enxebre",
+			"sjenning"
+		]
+	}
+}
diff --git a/ci-operator/step-registry/hypershift/eval-agents/setup/hypershift-eval-agents-setup-ref.yaml b/ci-operator/step-registry/hypershift/eval-agents/setup/hypershift-eval-agents-setup-ref.yaml
@@ -0,0 +1,32 @@
+ref:
+  as: hypershift-eval-agents-setup
+  from: claude-ai-helpers
+  commands: hypershift-eval-agents-setup-commands.sh
+  env:
+  - name: CLAUDE_CODE_USE_VERTEX
+    default: "1"
+    documentation: |-
+      Enable Vertex AI for Claude Code.
+  - name: CLOUD_ML_REGION
+    default: "global"
+    documentation: |-
+      Google Cloud region for Vertex AI.
+  - name: ANTHROPIC_VERTEX_PROJECT_ID
+    default: "itpc-gcp-hybrid-pe-eng-claude"
+    documentation: |-
+      Google Cloud project ID for Vertex AI authentication.
+  - name: GOOGLE_APPLICATION_CREDENTIALS
+    default: "/var/run/claude-code-service-account/claude-prow"
+    documentation: |-
+      Path to the Google Cloud service account JSON key file.
+  resources:
+    requests:
+      cpu: 100m
+      memory: 200Mi
+  credentials:
+  - namespace: test-credentials
+    name: hypershift-team-claude-prow
+    mount_path: /var/run/claude-code-service-account
+  documentation: |-
+    Setup step for the HyperShift eval-agents job.
+    Installs Node.js/npm for promptfoo and verifies Claude Code CLI.