diff --git a/ci-operator/config/openshift/hypershift/openshift-hypershift-main.yaml b/ci-operator/config/openshift/hypershift/openshift-hypershift-main.yaml index 87c297ee73f3b..6a0660dcfda65 100644 --- a/ci-operator/config/openshift/hypershift/openshift-hypershift-main.yaml +++ b/ci-operator/config/openshift/hypershift/openshift-hypershift-main.yaml @@ -130,6 +130,11 @@ resources: cpu: 100m memory: 200Mi tests: +- always_run: false + as: eval-agents + optional: true + steps: + workflow: hypershift-eval-agents - always_run: false as: agentic-qe-aws optional: true diff --git a/ci-operator/step-registry/hypershift/eval-agents/OWNERS b/ci-operator/step-registry/hypershift/eval-agents/OWNERS new file mode 100644 index 0000000000000..f3e834938cd8b --- /dev/null +++ b/ci-operator/step-registry/hypershift/eval-agents/OWNERS @@ -0,0 +1,14 @@ +approvers: +- bryan-cox +- csrwng +- celebdor +- devguyio +- enxebre +- sjenning +reviewers: +- bryan-cox +- csrwng +- celebdor +- devguyio +- enxebre +- sjenning diff --git a/ci-operator/step-registry/hypershift/eval-agents/hypershift-eval-agents-workflow.metadata.json b/ci-operator/step-registry/hypershift/eval-agents/hypershift-eval-agents-workflow.metadata.json new file mode 100644 index 0000000000000..129a65549405b --- /dev/null +++ b/ci-operator/step-registry/hypershift/eval-agents/hypershift-eval-agents-workflow.metadata.json @@ -0,0 +1,21 @@ +{ + "path": "hypershift/eval-agents/hypershift-eval-agents-workflow.yaml", + "owners": { + "approvers": [ + "bryan-cox", + "csrwng", + "celebdor", + "devguyio", + "enxebre", + "sjenning" + ], + "reviewers": [ + "bryan-cox", + "csrwng", + "celebdor", + "devguyio", + "enxebre", + "sjenning" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/hypershift/eval-agents/hypershift-eval-agents-workflow.yaml b/ci-operator/step-registry/hypershift/eval-agents/hypershift-eval-agents-workflow.yaml new file mode 100644 index 0000000000000..fa016c7a47a1a --- /dev/null +++ b/ci-operator/step-registry/hypershift/eval-agents/hypershift-eval-agents-workflow.yaml @@ -0,0 +1,18 @@ +workflow: + as: hypershift-eval-agents + steps: + pre: + - ref: hypershift-eval-agents-setup + test: + - ref: hypershift-eval-agents-run + documentation: |- + HyperShift eval-agents workflow for testing agent definitions. + + This workflow: + 1. Setup: Installs Node.js/npm, verifies Claude Code CLI + 2. Run: Executes make eval-agents which runs promptfoo eval scenarios + + Each scenario sends a prompt to a Claude Code agent, then uses an + LLM judge to check the output against expected criteria. + + Configurable via env var: EVAL_FOCUS (promptfoo filter pattern). diff --git a/ci-operator/step-registry/hypershift/eval-agents/run/OWNERS b/ci-operator/step-registry/hypershift/eval-agents/run/OWNERS new file mode 100644 index 0000000000000..f3e834938cd8b --- /dev/null +++ b/ci-operator/step-registry/hypershift/eval-agents/run/OWNERS @@ -0,0 +1,14 @@ +approvers: +- bryan-cox +- csrwng +- celebdor +- devguyio +- enxebre +- sjenning +reviewers: +- bryan-cox +- csrwng +- celebdor +- devguyio +- enxebre +- sjenning diff --git a/ci-operator/step-registry/hypershift/eval-agents/run/hypershift-eval-agents-run-commands.sh b/ci-operator/step-registry/hypershift/eval-agents/run/hypershift-eval-agents-run-commands.sh new file mode 100755 index 0000000000000..c24bf3b205555 --- /dev/null +++ b/ci-operator/step-registry/hypershift/eval-agents/run/hypershift-eval-agents-run-commands.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -euo pipefail + +echo "=== HyperShift Eval Agents ===" + +cd /go/src/github.com/openshift/hypershift + +MAKE_ARGS="EVAL_OUTPUT=${ARTIFACT_DIR}/results.xml" + +if [ -n "${EVAL_FOCUS:-}" ]; then + echo "Filter: ${EVAL_FOCUS}" + MAKE_ARGS="${MAKE_ARGS} EVAL_FILTER=${EVAL_FOCUS}" +fi + +echo "Running eval-agents with promptfoo..." +make eval-agents ${MAKE_ARGS} + +echo "=== Eval Agents Complete ===" diff --git a/ci-operator/step-registry/hypershift/eval-agents/run/hypershift-eval-agents-run-ref.metadata.json b/ci-operator/step-registry/hypershift/eval-agents/run/hypershift-eval-agents-run-ref.metadata.json new file mode 100644 index 0000000000000..9a1dbfde7d773 --- /dev/null +++ b/ci-operator/step-registry/hypershift/eval-agents/run/hypershift-eval-agents-run-ref.metadata.json @@ -0,0 +1,21 @@ +{ + "path": "hypershift/eval-agents/run/hypershift-eval-agents-run-ref.yaml", + "owners": { + "approvers": [ + "bryan-cox", + "csrwng", + "celebdor", + "devguyio", + "enxebre", + "sjenning" + ], + "reviewers": [ + "bryan-cox", + "csrwng", + "celebdor", + "devguyio", + "enxebre", + "sjenning" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/hypershift/eval-agents/run/hypershift-eval-agents-run-ref.yaml b/ci-operator/step-registry/hypershift/eval-agents/run/hypershift-eval-agents-run-ref.yaml new file mode 100644 index 0000000000000..5c81733592f94 --- /dev/null +++ b/ci-operator/step-registry/hypershift/eval-agents/run/hypershift-eval-agents-run-ref.yaml @@ -0,0 +1,42 @@ +ref: + as: hypershift-eval-agents-run + from: claude-ai-helpers + commands: hypershift-eval-agents-run-commands.sh + env: + - name: CLAUDE_CODE_USE_VERTEX + default: "1" + documentation: |- + Enable Vertex AI for Claude Code. + - name: CLOUD_ML_REGION + default: "global" + documentation: |- + Google Cloud region for Vertex AI. + - name: ANTHROPIC_VERTEX_PROJECT_ID + default: "itpc-gcp-hybrid-pe-eng-claude" + documentation: |- + Google Cloud project ID for Vertex AI authentication. + - name: GOOGLE_APPLICATION_CREDENTIALS + default: "/var/run/claude-code-service-account/claude-prow" + documentation: |- + Path to the Google Cloud service account JSON key file. + - name: EVAL_FOCUS + default: "" + documentation: |- + Filter pattern to run a subset of eval scenarios. + Passed to promptfoo as --filter-pattern. + - name: MULTISTAGE_PARAM_OVERRIDE_EVAL_FOCUS + default: "" + documentation: |- + Gangway API override for EVAL_FOCUS. + resources: + requests: + cpu: 500m + memory: 1Gi + credentials: + - namespace: test-credentials + name: hypershift-team-claude-prow + mount_path: /var/run/claude-code-service-account + documentation: |- + Run step for the HyperShift eval-agents job. + Executes make eval-agents which runs promptfoo to evaluate + agent definitions using LLM judge assertions. diff --git a/ci-operator/step-registry/hypershift/eval-agents/setup/OWNERS b/ci-operator/step-registry/hypershift/eval-agents/setup/OWNERS new file mode 100644 index 0000000000000..f3e834938cd8b --- /dev/null +++ b/ci-operator/step-registry/hypershift/eval-agents/setup/OWNERS @@ -0,0 +1,14 @@ +approvers: +- bryan-cox +- csrwng +- celebdor +- devguyio +- enxebre +- sjenning +reviewers: +- bryan-cox +- csrwng +- celebdor +- devguyio +- enxebre +- sjenning diff --git a/ci-operator/step-registry/hypershift/eval-agents/setup/hypershift-eval-agents-setup-commands.sh b/ci-operator/step-registry/hypershift/eval-agents/setup/hypershift-eval-agents-setup-commands.sh new file mode 100755 index 0000000000000..34c9eab461f90 --- /dev/null +++ b/ci-operator/step-registry/hypershift/eval-agents/setup/hypershift-eval-agents-setup-commands.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -euo pipefail + +echo "=== HyperShift Eval Agents Setup ===" + +echo "Verifying Claude Code CLI..." +claude --version || { echo "ERROR: Claude Code CLI not found"; exit 1; } + +echo "Installing Node.js and npm..." +dnf install -y nodejs npm || yum install -y nodejs npm +node --version +npm --version + +echo "Setup complete" diff --git a/ci-operator/step-registry/hypershift/eval-agents/setup/hypershift-eval-agents-setup-ref.metadata.json b/ci-operator/step-registry/hypershift/eval-agents/setup/hypershift-eval-agents-setup-ref.metadata.json new file mode 100644 index 0000000000000..224fd24e22f05 --- /dev/null +++ b/ci-operator/step-registry/hypershift/eval-agents/setup/hypershift-eval-agents-setup-ref.metadata.json @@ -0,0 +1,21 @@ +{ + "path": "hypershift/eval-agents/setup/hypershift-eval-agents-setup-ref.yaml", + "owners": { + "approvers": [ + "bryan-cox", + "csrwng", + "celebdor", + "devguyio", + "enxebre", + "sjenning" + ], + "reviewers": [ + "bryan-cox", + "csrwng", + "celebdor", + "devguyio", + "enxebre", + "sjenning" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/hypershift/eval-agents/setup/hypershift-eval-agents-setup-ref.yaml b/ci-operator/step-registry/hypershift/eval-agents/setup/hypershift-eval-agents-setup-ref.yaml new file mode 100644 index 0000000000000..462541be06262 --- /dev/null +++ b/ci-operator/step-registry/hypershift/eval-agents/setup/hypershift-eval-agents-setup-ref.yaml @@ -0,0 +1,32 @@ +ref: + as: hypershift-eval-agents-setup + from: claude-ai-helpers + commands: hypershift-eval-agents-setup-commands.sh + env: + - name: CLAUDE_CODE_USE_VERTEX + default: "1" + documentation: |- + Enable Vertex AI for Claude Code. + - name: CLOUD_ML_REGION + default: "global" + documentation: |- + Google Cloud region for Vertex AI. + - name: ANTHROPIC_VERTEX_PROJECT_ID + default: "itpc-gcp-hybrid-pe-eng-claude" + documentation: |- + Google Cloud project ID for Vertex AI authentication. + - name: GOOGLE_APPLICATION_CREDENTIALS + default: "/var/run/claude-code-service-account/claude-prow" + documentation: |- + Path to the Google Cloud service account JSON key file. + resources: + requests: + cpu: 100m + memory: 200Mi + credentials: + - namespace: test-credentials + name: hypershift-team-claude-prow + mount_path: /var/run/claude-code-service-account + documentation: |- + Setup step for the HyperShift eval-agents job. + Installs Node.js/npm for promptfoo and verifies Claude Code CLI.