diff --git a/.github/workflows/opencode-review.yml b/.github/workflows/opencode-review.yml index 4b6a0abb..f2a317be 100644 --- a/.github/workflows/opencode-review.yml +++ b/.github/workflows/opencode-review.yml @@ -36,8 +36,8 @@ concurrency: group: >- opencode-review-${{ github.event_name }}-${{ github.event.pull_request.base.repo.full_name || github.event.inputs.target_repository || github.repository }}-${{ - github.event_name == 'pull_request_target' && format('pr-{0}-{1}', github.event.pull_request.number, github.event.pull_request.head.sha) || - github.event.inputs.pr_number != '' && github.event.inputs.pr_head_sha != '' && format('pr-{0}-{1}', github.event.inputs.pr_number, github.event.inputs.pr_head_sha) || + github.event_name == 'pull_request_target' && format('pr-{0}', github.event.pull_request.number) || + github.event.inputs.pr_number != '' && format('pr-{0}', github.event.inputs.pr_number) || github.event.inputs.pr_number || github.run_id }} cancel-in-progress: true @@ -2378,12 +2378,6 @@ jobs: fi } - gh_error_is_rate_limited() { - local error_file="$1" - [ -s "$error_file" ] || return 1 - grep -Eiq '(API rate limit exceeded|rate limit exceeded|secondary rate limit)' "$error_file" - } - emit_change_flow_mermaid_graph() { local merge_state="${1:-UNKNOWN}" local changed_files_file surfaces_file idx next_node @@ -2633,6 +2627,18 @@ jobs: APPROVAL_CHECK_WAIT_SLEEP_SECONDS: "30" CHECK_LOOKUP_RETRY_ATTEMPTS: "5" CHECK_LOOKUP_RETRY_SLEEP_SECONDS: "5" + OPENCODE_MODEL_CANDIDATES: "github-models/openai/gpt-5-nano" + OPENCODE_MODEL_ATTEMPTS: "1" + OPENCODE_RUN_TIMEOUT_SECONDS: "240" + OPENCODE_EXPORT_TIMEOUT_SECONDS: "120" + OPENCODE_TOTAL_RETRY_BUDGET_SECONDS: "360" + OPENCODE_BACKOFF_INITIAL_SECONDS: "30" + OPENCODE_BACKOFF_MAX_SECONDS: "30" + OPENCODE_FIRST_ATTEMPT_AGENT: ci-review + OPENCODE_AGENT: ci-review-fallback + OPENCODE_EXHAUSTED_REKICK_INITIAL_SLEEP_SECONDS: "15" + OPENCODE_EXHAUSTED_REKICK_MAX_SLEEP_SECONDS: "300" + OPENCODE_EXHAUSTED_REKICK_MAX_TOTAL_SECONDS: "4200" run: | set -euo pipefail echo "::group::OpenCode Review Approval Gate" @@ -2902,26 +2908,12 @@ jobs: fi warn_gh_publication_failure "pull review with fallback review token" "$gh_error_file" fi - if [ "$event" = "APPROVE" ] && gh_error_is_rate_limited "$gh_error_file"; then - rm -f "$gh_error_file" "$review_payload_file" - update_review_overview "$event" "$body" || true - if [ -n "${GITHUB_STEP_SUMMARY:-}" ]; then - { - printf '## OpenCode approve review publication skipped\n\n' - printf -- '- Head SHA: `%s`\n' "$HEAD_SHA" - printf -- '- Workflow run: %s\n' "$RUN_ID" - printf -- '- Workflow attempt: %s\n\n' "$RUN_ATTEMPT" - printf 'OpenCode completed the approval gate, but GitHub rejected the pull-review write due to API rate limiting. The required workflow remains successful because failed checks, mergeability, and unresolved review threads were already gated before approval.\n\n' - printf '%s\n' "$body" - } >>"$GITHUB_STEP_SUMMARY" - fi - printf '::warning::OpenCode could not publish the APPROVE pull review for head %s because the GitHub API rate limit was exceeded; keeping the successful approval gate result because pre-approval source, check, mergeability, and review-thread gates passed.\n' "$HEAD_SHA" - return 0 - fi rm -f "$gh_error_file" "$review_payload_file" update_review_overview "$event" "$body" || true printf '::error::OpenCode could not publish the pull review for head %s, so the review state was not changed.\n' "$HEAD_SHA" - echo "::endgroup::" + case "$event" in + REQUEST_CHANGES | INLINE_COMMENT_PUBLISH_FAILED) echo "::endgroup::" ;; + esac exit 1 fi rm -f "$gh_error_file" "$review_payload_file" @@ -4785,6 +4777,64 @@ jobs: scripts/ci/collect_failed_check_evidence.sh "$evidence_file" } + rekick_model_pool_on_exhaustion() { + local rekick_attempt=1 + local rekick_output outcome model rekick_status + local sleep_seconds="${OPENCODE_EXHAUSTED_REKICK_INITIAL_SLEEP_SECONDS:-15}" + local max_sleep_seconds="${OPENCODE_EXHAUSTED_REKICK_MAX_SLEEP_SECONDS:-300}" + local max_total_seconds="${OPENCODE_EXHAUSTED_REKICK_MAX_TOTAL_SECONDS:-4200}" + local started_at now elapsed + + started_at="$(date +%s)" + + while [ "${OPENCODE_MODEL_POOL_OUTCOME:-}" = "exhausted" ]; do + if [ "$max_total_seconds" -gt 0 ]; then + now="$(date +%s)" + elapsed="$((now - started_at))" + if [ "$elapsed" -ge "$max_total_seconds" ]; then + printf 'OpenCode model pool remained exhausted for %s seconds; stopping re-kicks and continuing with fail-closed handling.\n' "$elapsed" >&2 + break + fi + fi + printf 'OpenCode model pool exhausted; re-kicking model pool (attempt %s).\n' "$rekick_attempt" + rekick_output="$(mktemp)" + rekick_status=0 + GITHUB_OUTPUT="$rekick_output" OPENCODE_OUTPUT_FILE="$OPENCODE_MODEL_POOL_OUTPUT_FILE" \ + bash "$GITHUB_WORKSPACE/scripts/ci/run_opencode_review_model_pool.sh" || rekick_status=$? + if [ "$rekick_status" -ne 0 ]; then + printf 'OpenCode model pool re-kick command exited with status %s; continuing with fail-closed handling.\n' "$rekick_status" >&2 + fi + outcome="$(awk -F= '/^review_status=/{v=$2} END{print v}' "$rekick_output")" + model="$(awk -F= '/^review_model=/{v=$2} END{print v}' "$rekick_output")" + rm -f "$rekick_output" + + if [ -z "$outcome" ]; then + printf 'OpenCode model pool re-kick produced no review_status output; treating outcome as exhausted.\n' >&2 + outcome="exhausted" + fi + OPENCODE_MODEL_POOL_OUTCOME="$outcome" + OPENCODE_MODEL_POOL_MODEL="$model" + if [ "$outcome" = "success" ]; then + if [ -z "$model" ]; then + printf 'OpenCode model pool re-kick succeeded but published an empty review_model.\n' >&2 + fi + printf 'OpenCode model pool re-kick recovered with model: %s\n' "${model:-unknown}" + break + fi + if [ "$sleep_seconds" -gt 0 ]; then + printf 'OpenCode model pool still exhausted after re-kick attempt %s; retrying in %s seconds.\n' "$rekick_attempt" "$sleep_seconds" + sleep "$sleep_seconds" + fi + if [ "$sleep_seconds" -lt "$max_sleep_seconds" ]; then + sleep_seconds=$((sleep_seconds * 2)) + if [ "$sleep_seconds" -gt "$max_sleep_seconds" ]; then + sleep_seconds="$max_sleep_seconds" + fi + fi + rekick_attempt=$((rekick_attempt + 1)) + done + } + live_head_sha="$(gh api -X GET "repos/${GH_REPOSITORY}/pulls/${PR_NUMBER}" --jq '.head.sha')" if [ "$live_head_sha" != "$HEAD_SHA" ]; then echo "stale OpenCode run: event head=${HEAD_SHA}, live head=${live_head_sha}; skipping review side effects." @@ -4796,6 +4846,7 @@ jobs: request_changes_for_coverage_evidence_failure fi + rekick_model_pool_on_exhaustion opencode_review_outcome="${OPENCODE_MODEL_POOL_OUTCOME:-unknown}" if [ "$opencode_review_outcome" != "success" ]; then diff --git a/scripts/ci/test_strix_quick_gate.sh b/scripts/ci/test_strix_quick_gate.sh index ffea57cd..6ab71381 100755 --- a/scripts/ci/test_strix_quick_gate.sh +++ b/scripts/ci/test_strix_quick_gate.sh @@ -508,15 +508,15 @@ assert_opencode_review_uses_codegraph_and_gpt5_fallback() { assert_file_contains "$REPO_ROOT/scripts/ci/run_opencode_review_model_pool.sh" "Read and follow the complete review contract" "opencode review uses a compact launcher while keeping the full review contract on disk" assert_file_contains "$REPO_ROOT/scripts/ci/run_opencode_review_model_pool.sh" "tokens_limit_reached" "opencode review detects provider context-window overflow" assert_file_contains "$REPO_ROOT/scripts/ci/run_opencode_review_model_pool.sh" "skipping remaining attempts for this model" "opencode review skips same-model retries after context-window overflow" - assert_file_contains "$workflow_file" 'OPENCODE_RUN_TIMEOUT_SECONDS: "600"' "opencode primary review has a bounded per-model timeout before trying fallback models" - assert_file_contains "$workflow_file" 'OPENCODE_TOTAL_RETRY_BUDGET_SECONDS: "3600"' "opencode model pool has a one-hour total retry budget" + assert_file_contains "$workflow_file" 'OPENCODE_RUN_TIMEOUT_SECONDS: "180"' "opencode primary review has a bounded per-model timeout before trying fallback models" + assert_file_contains "$workflow_file" 'OPENCODE_TOTAL_RETRY_BUDGET_SECONDS: "1200"' "opencode model pool has a twenty-minute total retry budget" assert_file_contains "$workflow_file" "needs.coverage-evidence.result == 'success'" "opencode model pool only runs after coverage evidence passed" assert_file_contains "$workflow_file" "id: opencode_review_model_pool" "opencode DeepSeek V3 fallback still runs after a primary model timeout or step failure when coverage evidence passed" assert_file_contains "$workflow_file" "always()" "opencode fallback chain uses always() so failed model steps cannot skip every fallback" assert_file_contains "$workflow_file" 'OPENCODE_MODEL_ATTEMPTS: "1"' "opencode fallback tries the catalog promptly instead of spending the entire review on one model" assert_file_contains "$workflow_file" "Run OpenCode PR Review model pool" "opencode review includes a broad catalog fallback pool" assert_file_contains "$workflow_file" "continue-on-error: true" "opencode model step timeouts do not prevent fallback review publication" - assert_file_contains "$workflow_file" "github-models/openai/gpt-5-chat github-models/openai/gpt-5-mini github-models/openai/gpt-5-nano github-models/openai/o3 github-models/openai/o3-mini github-models/openai/o4-mini github-models/mistral-ai/mistral-medium-2505 github-models/meta/llama-4-maverick-17b-128e-instruct-fp8 github-models/meta/llama-4-scout-17b-16e-instruct" "opencode review tries catalog-available tool-calling fallbacks after DeepSeek and GPT-5 paths" + assert_file_contains "$workflow_file" "github-models/openai/o4-mini github-models/openai/o3-mini github-models/openai/o3 github-models/mistral-ai/mistral-medium-2505 github-models/meta/llama-4-maverick-17b-128e-instruct-fp8 github-models/meta/llama-4-scout-17b-16e-instruct" "opencode review tries catalog-available tool-calling fallbacks before spending time on known failing candidates" assert_file_contains "$workflow_file" "The publish gate re-runs source-backed validation against PR-head data" "opencode review publish gate validates model output against the PR-head worktree" assert_file_contains "$workflow_file" '"openai/o3"' "opencode config declares OpenAI o3 fallback" assert_file_contains "$workflow_file" '"openai/o4-mini"' "opencode config declares OpenAI o4-mini fallback" @@ -618,10 +618,10 @@ assert_opencode_review_uses_codegraph_and_gpt5_fallback() { assert_file_contains "$workflow_file" "no model produced a valid review control block" "opencode model-failure path documents why approval is withheld" assert_file_contains "$workflow_file" 'OPENCODE_MODEL_ATTEMPTS: "1"' "opencode primary and fallback paths avoid multi-attempt stalls on one model" assert_file_contains "$workflow_file" 'OPENCODE_MODEL_ATTEMPTS: "1"' "opencode catalog fallback tries each model once before moving on" - assert_file_contains "$workflow_file" 'OPENCODE_RUN_TIMEOUT_SECONDS: "600"' "opencode catalog fallback has a bounded model review timeout before step timeout" + assert_file_contains "$workflow_file" 'OPENCODE_RUN_TIMEOUT_SECONDS: "180"' "opencode catalog fallback has a bounded model review timeout before step timeout" assert_file_contains "$REPO_ROOT/scripts/ci/run_opencode_review_model_pool.sh" "OpenCode %s attempt %s/%s failed" "opencode catalog fallback records per-model retry failures" assert_file_contains "$REPO_ROOT/scripts/ci/run_opencode_review_model_pool.sh" "exponential backoff" "opencode model retry paths use exponential backoff instead of fixed sleeps" - assert_file_contains "$workflow_file" "github-models/openai/o3 github-models/openai/o3-mini github-models/openai/o4-mini" "opencode review includes additional OpenAI reasoning model fallbacks" + assert_file_contains "$workflow_file" "github-models/openai/o4-mini github-models/openai/o3-mini github-models/openai/o3" "opencode review includes additional OpenAI reasoning model fallbacks" assert_file_contains "$workflow_file" "coverage-evidence:" "opencode workflow measures coverage before review" assert_file_contains "$workflow_file" "github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request_target'" "manual and required OpenCode reviews measure coverage instead of approving skipped coverage evidence" assert_file_contains "$workflow_file" "Exchange OpenCode app token for target repository coverage reads" "coverage evidence can read private target repositories through the OpenCode app token" @@ -791,10 +791,10 @@ assert_opencode_review_uses_codegraph_and_gpt5_fallback() { assert_file_contains "$workflow_file" 'warn_gh_publication_failure "initial review overview comment"' "opencode initial overview comment soft-fails permission-denied publication errors" assert_file_contains "$workflow_file" 'warn_gh_publication_failure "pull review with primary review token"' "opencode approval explains primary review publication failures" assert_file_contains "$workflow_file" 'warn_gh_publication_failure "pull review with fallback review token"' "opencode approval explains fallback review publication failures" - assert_file_contains "$workflow_file" 'gh_error_is_rate_limited()' "opencode approval detects rate-limited publication failures" - assert_file_contains "$workflow_file" '[ "$event" = "APPROVE" ] && gh_error_is_rate_limited "$gh_error_file"' "opencode approval only soft-fails rate-limited approve publication failures" - assert_file_contains "$workflow_file" 'OpenCode could not publish the APPROVE pull review for head %s because the GitHub API rate limit was exceeded' "opencode approval keeps successful gate results for rate-limited approval review publication" - assert_file_contains "$workflow_file" 'OpenCode could not publish the pull review for head %s, so the review state was not changed.' "opencode approval fails when review publication fails" + assert_file_contains "$workflow_file" 'OpenCode could not publish the pull review for head %s, so the review state was not changed.' "opencode approval fails closed when review publication fails" + assert_file_contains "$workflow_file" 'REQUEST_CHANGES | INLINE_COMMENT_PUBLISH_FAILED) echo "::endgroup::" ;;' "opencode only closes a review-body log group for events that opened one" + assert_file_not_contains "$workflow_file" 'OpenCode approve review publication skipped' "opencode approval does not report skipped approval publication as success" + assert_file_not_contains "$workflow_file" 'keeping the successful approval gate result' "opencode approval does not soft-pass without publishing an approving review" assert_file_contains "$workflow_file" 'warn_gh_publication_failure "review overview comment"' "opencode approval soft-fails permission-denied overview publication" assert_file_not_contains "$workflow_file" 'gh api -X DELETE "repos/${GH_REPOSITORY}/issues/comments/${comment_id}"' "opencode review must not delete Review Overview gate evidence" assert_file_not_contains "$workflow_file" '--file "$OPENCODE_EVIDENCE_FILE"' "opencode review must not attach evidence content to GitHub Models requests" @@ -805,9 +805,8 @@ assert_opencode_review_uses_codegraph_and_gpt5_fallback() { assert_file_contains "$workflow_file" 'GH_TOKEN: ${{ secrets.OPENCODE_APPROVE_TOKEN || steps.review_read_app_token.outputs.token || github.token }}' "opencode manual dispatch uses the cross-repo approval token for target PR evidence lookups with app-token fallback" assert_file_contains "$workflow_file" 'repos/${GH_REPOSITORY}' "opencode review workflow uses env-backed repository context in shell commands" assert_file_contains "$workflow_file" "Run OpenCode PR Review model pool" "opencode review starts the central model pool" - assert_file_contains "$workflow_file" "github-models/deepseek/deepseek-r1-0528" "opencode review starts with a reachable DeepSeek R1 reasoning model" - assert_file_contains "$workflow_file" "github-models/deepseek/deepseek-v3-0324" "opencode review has a reachable DeepSeek V3 fallback model" - assert_file_contains "$workflow_file" "github-models/openai/gpt-5" "opencode review still has a bounded GPT-5 fallback model" + assert_file_contains "$workflow_file" "OPENCODE_MODEL_CANDIDATES: \"github-models/openai/o4-mini github-models/openai/o3-mini github-models/openai/o3" "opencode review starts with bounded tool-calling OpenAI reasoning models" + assert_file_not_contains "$workflow_file" "OPENCODE_MODEL_CANDIDATES: \"github-models/deepseek" "opencode review does not start with known unsupported DeepSeek tool-use candidates" assert_file_contains "$workflow_file" "Publish bounded OpenCode review comment" "opencode review workflow publishes the agent control comment for the approval gate" assert_file_contains "$workflow_file" "statusCheckRollup" "opencode review workflow reads current-head GitHub Checks before approval" assert_file_contains "$workflow_file" "OPENCODE_FAILED_CHECK_EVIDENCE_FILE" "opencode review workflow persists failed-check evidence across review and approval steps" diff --git a/tests/test_opencode_agent_contract.py b/tests/test_opencode_agent_contract.py index c5dd7a9b..04eda5db 100644 --- a/tests/test_opencode_agent_contract.py +++ b/tests/test_opencode_agent_contract.py @@ -186,6 +186,9 @@ def test_workflow_provisions_sandbox_tool_and_reviewer_agent(): assert "Run OpenCode PR Review model pool" in workflow assert "opencode_review_model_pool" in workflow assert "run_opencode_review_model_pool.sh" in workflow + assert "rekick_model_pool_on_exhaustion" in workflow + assert "format('pr-{0}', github.event.pull_request.number)" in workflow + assert "format('pr-{0}-{1}', github.event.pull_request.number, github.event.pull_request.head.sha)" not in workflow assert "OPENCODE_MODEL_CANDIDATES" in workflow model_pool_runner = Path("scripts/ci/run_opencode_review_model_pool.sh").read_text(encoding="utf-8") assert "assert_reasoning_effort_for_candidate" in model_pool_runner @@ -220,6 +223,9 @@ def test_workflow_provisions_sandbox_tool_and_reviewer_agent(): assert 'OPENCODE_EXPORT_TIMEOUT_SECONDS: "120"' in workflow assert 'OPENCODE_TOTAL_RETRY_BUDGET_SECONDS: "360"' in workflow assert 'OPENCODE_BACKOFF_MAX_SECONDS: "30"' in workflow + assert 'OPENCODE_EXHAUSTED_REKICK_INITIAL_SLEEP_SECONDS: "15"' in workflow + assert 'OPENCODE_EXHAUSTED_REKICK_MAX_SLEEP_SECONDS: "300"' in workflow + assert 'OPENCODE_EXHAUSTED_REKICK_MAX_TOTAL_SECONDS: "4200"' in workflow assert "${{ runner.temp }}/opencode-review-model-pool.md" in workflow assert re.search(r'check-runs" \\\n\s+-f per_page=100 \\\n\s+--paginate \\\n\s+--slurp \|\n\s+jq -r "\$jq_filter"', workflow) assert not re.search(r"--slurp\s*\\\n\s*--jq", workflow)