From 0746ca4a46e773f5a263989b2c55e9dcbe434051 Mon Sep 17 00:00:00 2001
From: jp-cruz <jp-cruz@users.noreply.github.com>
Date: Wed, 17 Jun 2026 21:43:59 -0500
Subject: [PATCH 1/2] feat: extend audit rig to multi-language supply-chain
 coverage

Adds osv-scanner (multi-ecosystem dependency vuln + malicious-package
scan covering Python/npm/Cargo), shellcheck, and a custom Semgrep
ruleset for risky-exec / supply-chain patterns (curl|bash installers,
PowerShell download-cradles, decode-and-exec, TLS-verification bypass,
VCS-URL installs, npm install-scripts re-enabled).

- semgrep/legionforge-risky-exec.yml: 7 custom rules (validated 8/8 on
  fixtures, 0 false positives on clean input)
- .github/workflows/supply-chain.yml: reusable CI (osv-scanner +
  risky-exec + optional token-gated Socket.dev job)
- scripts/audit.sh + audit.ps1: osv-scanner/shellcheck/risky-exec
  sections, each self-skipping when language/tool absent
- .pre-commit-config.yaml: shellcheck + lockfile-triggered osv-scanner
- README: inventory, CI wiring, multi-language note, tool versions

cargo-deny (Rust policy) and PSScriptAnalyzer (PowerShell SAST) are the
documented next pass.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .github/workflows/supply-chain.yml | 100 ++++++++++++++++++++++++
 .pre-commit-config.yaml            |  18 +++++
 README.md                          |  20 ++++-
 scripts/audit.ps1                  |  61 ++++++++++++++-
 scripts/audit.sh                   |  60 ++++++++++++++-
 semgrep/legionforge-risky-exec.yml | 120 +++++++++++++++++++++++++++++
 6 files changed, 372 insertions(+), 7 deletions(-)
 create mode 100644 .github/workflows/supply-chain.yml
 create mode 100644 semgrep/legionforge-risky-exec.yml

diff --git a/.github/workflows/supply-chain.yml b/.github/workflows/supply-chain.yml
new file mode 100644
index 0000000..07722db
--- /dev/null
+++ b/.github/workflows/supply-chain.yml
@@ -0,0 +1,100 @@
+# Reusable workflow: supply-chain hardening.
+#
+#   osv-scanner  — multi-ecosystem dependency vuln + malicious-package scan
+#                  (Python / npm / Cargo / Go / … from lockfiles)
+#   risky-exec   — LegionForge custom Semgrep rules: curl|bash installers,
+#                  PowerShell download-cradles, decode-and-exec, TLS bypass
+#   socket       — OPTIONAL behavioral npm analysis; runs only when the caller
+#                  provides a SOCKET_SECURITY_API_KEY secret
+#
+# Caller syntax:
+#
+#   jobs:
+#     supply-chain:
+#       uses: LegionForge/dev-rig/.github/workflows/supply-chain.yml@main
+#       secrets: inherit            # only needed to enable the Socket job
+#
+# Complements audit.yml (pip-audit/licenses). osv-scanner adds the malicious-
+# package feed and the npm/Cargo ecosystems that pip-audit doesn't cover.
+
+name: Supply Chain
+
+on:
+  workflow_call:
+    inputs:
+      osv-version:
+        description: "osv-scanner image tag to pin"
+        required: false
+        type: string
+        default: "v2.3.8"
+      rig-ref:
+        description: "dev-rig ref to pull the custom risky-exec ruleset from"
+        required: false
+        type: string
+        default: "main"
+    secrets:
+      SOCKET_SECURITY_API_KEY:
+        description: "Optional Socket.dev API key — enables the npm behavioral scan"
+        required: false
+
+jobs:
+  # ── Dependency vulnerabilities + known-malicious packages ────────────────────
+  osv-scanner:
+    name: osv-scanner (deps + malicious packages)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      # Pinned container image (matches the rig's docker-based tooling pattern).
+      # --allow-no-lockfiles: a repo with nothing to scan is a pass, not a fail.
+      - name: osv-scanner scan
+        run: |
+          docker run --rm -v "${{ github.workspace }}:/src" \
+            "ghcr.io/google/osv-scanner:${{ inputs.osv-version }}" \
+            scan source --recursive --allow-no-lockfiles /src
+
+  # ── Custom risky-exec / supply-chain pattern rules ───────────────────────────
+  risky-exec:
+    name: risky-exec (custom Semgrep rules)
+    runs-on: ubuntu-latest
+    container: semgrep/semgrep
+    steps:
+      - name: Checkout caller repo
+        uses: actions/checkout@v4
+
+      # The ruleset lives in dev-rig, not the consuming repo — pull it alongside.
+      - name: Checkout dev-rig ruleset
+        uses: actions/checkout@v4
+        with:
+          repository: LegionForge/dev-rig
+          ref: ${{ inputs.rig-ref }}
+          path: .dev-rig
+
+      - name: semgrep risky-exec
+        run: |
+          semgrep --config .dev-rig/semgrep/legionforge-risky-exec.yml \
+            --error --metrics=off \
+            --exclude .dev-rig .
+
+  # ── Optional Socket.dev behavioral npm scan ──────────────────────────────────
+  # Self-skips when no SOCKET_SECURITY_API_KEY secret is provided, so callers
+  # without a Socket account incur no failure and leak no dependency data.
+  socket:
+    name: socket (optional npm behavioral)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Socket scan (skips without token)
+        env:
+          SOCKET_SECURITY_API_KEY: ${{ secrets.SOCKET_SECURITY_API_KEY }}
+        run: |
+          if [ -z "${SOCKET_SECURITY_API_KEY:-}" ]; then
+            echo "SOCKET_SECURITY_API_KEY not set — skipping optional Socket scan."
+            exit 0
+          fi
+          if [ ! -f package.json ]; then
+            echo "No package.json — nothing for Socket to scan."
+            exit 0
+          fi
+          npx -y @socketsecurity/cli@latest scan create --report .
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7d9fbc0..670c4ff 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -52,6 +52,24 @@ repos:
           - psutil
           - uvicorn
 
+  # ── shellcheck — shell script correctness + footguns ──────────────────────
+  - repo: https://github.com/koalaman/shellcheck-precommit
+    rev: v0.10.0
+    hooks:
+      - id: shellcheck
+
+  # ── osv-scanner — dependency vuln + malicious-package scan (multi-ecosystem)─
+  # Local hook (uses the installed osv-scanner binary). Triggers only when a
+  # lockfile changes; scans the whole tree so cross-ecosystem repos are covered.
+  - repo: local
+    hooks:
+      - id: osv-scanner
+        name: osv-scanner (deps + malicious packages)
+        entry: osv-scanner scan source --recursive --allow-no-lockfiles .
+        language: system
+        pass_filenames: false
+        files: '(requirements.*\.txt|poetry\.lock|uv\.lock|Pipfile\.lock|package-lock\.json|pnpm-lock\.yaml|yarn\.lock|Cargo\.lock|go\.sum)$'
+
   # ── gitleaks — secret scanning (OWASP SAMM — Implementation/Secure Build) ──
   - repo: https://github.com/gitleaks/gitleaks
     rev: v8.18.4
diff --git a/README.md b/README.md
index f35cd97..ea9b12f 100644
--- a/README.md
+++ b/README.md
@@ -12,10 +12,13 @@ Shared CI pipeline, pre-commit hooks, and pytest fixtures for LegionForge projec
 | `.github/workflows/test.yml` | Reusable CI job: pytest + coverage enforcement |
 | `.github/workflows/sast.yml` | Reusable CI job: semgrep (p/python + p/fastapi) + CodeQL |
 | `.github/workflows/audit.yml` | Reusable CI job: pip-audit CVE scan + pip-licenses compliance |
+| `.github/workflows/supply-chain.yml` | Reusable CI job: osv-scanner (multi-ecosystem deps + malicious packages) + risky-exec custom rules + optional Socket.dev |
 | `.github/workflows/secrets.yml` | Reusable CI job: gitleaks secret scanning |
 | `.github/workflows/sbom.yml` | Reusable CI job: CycloneDX SBOM generation |
+| `semgrep/legionforge-risky-exec.yml` | Custom Semgrep ruleset: curl\|bash installers, PowerShell download-cradles, decode-and-exec, TLS bypass |
+| `scripts/audit.sh` / `scripts/audit.ps1` | Local audit harness — Python + osv-scanner + shellcheck + semgrep + risky-exec |
 | `.pre-commit-hooks.yaml` | Hook definitions consumed via pre-commit |
-| `.pre-commit-config.yaml` | Default config to copy into new projects (includes gitleaks) |
+| `.pre-commit-config.yaml` | Default config to copy into new projects (includes gitleaks, shellcheck, osv-scanner) |
 | `SECURITY.md` | Vulnerability disclosure policy template — copy and adjust |
 | `src/legionforge_dev_rig/fixtures/` | Shared pytest fixtures (httpx mocking, etc.) |
 | `examples/` | Template conftest.py and example tests |
@@ -94,8 +97,19 @@ jobs:
 
   audit:
     uses: LegionForge/dev-rig/.github/workflows/audit.yml@main
+
+  supply-chain:
+    uses: LegionForge/dev-rig/.github/workflows/supply-chain.yml@main
+    secrets: inherit                   # ← only needed to enable the optional Socket.dev scan
 ```
 
+> **Multi-language note.** `supply-chain.yml` and the local `audit.sh`/`audit.ps1`
+> harness cover Python, JS/TS, and Rust (via osv-scanner lockfile scanning) plus
+> shell (shellcheck) and risky-exec patterns in shell/PowerShell/CI YAML. Each
+> section self-skips when its files or tools aren't present, so the rig is safe
+> to wire into any repo regardless of language mix. cargo-deny (Rust policy) and
+> PSScriptAnalyzer (PowerShell SAST) are the planned next additions.
+
 ### 4 — Add shared fixtures to tests/conftest.py
 
 ```python
@@ -133,6 +147,8 @@ pre-commit autoupdate
 | bandit | 1.7 | `pyproject.toml [tool.bandit]` |
 | mypy | 1.10 | `pyproject.toml [tool.mypy]` |
 | pip-audit | 2.7 | no config — runs against installed packages |
-| semgrep | 1.70 | rulesets passed as CLI args |
+| osv-scanner | 2.3 | no config — scans lockfiles recursively (`brew install osv-scanner`) |
+| shellcheck | 0.10 | inline directives / `.shellcheckrc` (`brew install shellcheck`) |
+| semgrep | 1.70 | rulesets passed as CLI args + `semgrep/legionforge-risky-exec.yml` |
 | pytest-cov | 5 | `pyproject.toml [tool.pytest.ini_options]` |
 | pre-commit | 3.7 | `.pre-commit-config.yaml` |
diff --git a/scripts/audit.ps1 b/scripts/audit.ps1
index a540f0e..1b80923 100644
--- a/scripts/audit.ps1
+++ b/scripts/audit.ps1
@@ -1,11 +1,15 @@
 <#
 .SYNOPSIS
-    LegionForge audit harness — ruff, bandit, mypy, pip-audit, semgrep.
+    LegionForge audit harness — ruff, bandit, mypy, pip-audit, osv-scanner,
+    shellcheck, semgrep, and the custom risky-exec supply-chain ruleset.
 
 .DESCRIPTION
-    Runs all five static-analysis and security tools against a project directory.
+    Runs the static-analysis and security tools against a project directory.
     Native tools (ruff/bandit/mypy/pip-audit) run directly via py -3.13.
-    Semgrep runs via Docker to avoid Windows/Python 3.13+ build failures.
+    osv-scanner (multi-ecosystem dependency + malicious-package scan) and
+    shellcheck run as native binaries; both self-skip when absent.
+    Semgrep and the custom risky-exec ruleset run via Docker to avoid
+    Windows/Python 3.13+ build failures.
 
     Per-project configuration lives in two small files at the project root:
       .audit-dirs       — space-separated source dirs (e.g. "llm_valet svcmgr")
@@ -32,6 +36,10 @@ param(
 
 $ProjectPath = (Resolve-Path $ProjectPath).Path
 
+# Dev-rig root (this script lives in <rig>/scripts/) — locates the bundled
+# custom Semgrep ruleset regardless of the consuming project's CWD.
+$RigRoot = Split-Path $PSScriptRoot -Parent
+
 # ── Read per-project config ───────────────────────────────────────────────────
 
 # Which source directories to scan (ruff / bandit / mypy / semgrep).
@@ -120,6 +128,36 @@ Invoke-Tool "pip-audit" {
     py -3.13 -m pip_audit .
 }
 
+# ── osv-scanner — multi-ecosystem dependency + malicious-package scan ─────────
+# One pass over every lockfile (Python / npm / Cargo / …), checked against OSV
+# (CVEs + malicious-packages feed). --allow-no-lockfiles: nothing to scan is a
+# pass, not a failure. Self-skips when the binary isn't installed.
+
+if (Get-Command osv-scanner -ErrorAction SilentlyContinue) {
+    Invoke-Tool "osv-scanner" {
+        osv-scanner scan source --recursive --allow-no-lockfiles .
+    }
+} else {
+    Write-Section "osv-scanner"
+    Write-Host "  osv-scanner not installed — skipping (winget install Google.osv-scanner)" -ForegroundColor DarkGray
+}
+
+# ── shellcheck — shell script correctness + footguns ─────────────────────────
+# Runs only when the repo contains shell scripts.
+
+$shellFiles = Get-ChildItem -Path $ProjectPath -Recurse -File -Include *.sh, *.bash -ErrorAction SilentlyContinue |
+    Where-Object { $_.FullName -notmatch '[\\/](\.git|\.venv|node_modules)[\\/]' }
+if ($shellFiles) {
+    if (Get-Command shellcheck -ErrorAction SilentlyContinue) {
+        Invoke-Tool "shellcheck" {
+            shellcheck @($shellFiles.FullName)
+        }
+    } else {
+        Write-Section "shellcheck"
+        Write-Host "  $($shellFiles.Count) shell script(s) found but shellcheck not installed — skipping" -ForegroundColor DarkGray
+    }
+}
+
 # ── semgrep — OWASP / framework-specific vulnerability patterns ───────────────
 # Runs inside the official Docker image to avoid Windows/Python 3.13 build issues.
 # First run pulls the image (~200 MB); subsequent runs use the local cache.
@@ -142,6 +180,23 @@ Invoke-Tool "semgrep" {
     & docker @dockerArgs
 }
 
+# ── risky-exec — LegionForge custom supply-chain / RCE pattern rules ──────────
+# Flags curl|bash installers, PowerShell download-cradles, decode-and-exec, and
+# TLS-bypass patterns ecosystem scanners can't see. Same Docker semgrep image,
+# mounting the rig's bundled ruleset read-only.
+
+$riskyRules = Join-Path $RigRoot "semgrep/legionforge-risky-exec.yml"
+if (Test-Path $riskyRules) {
+    Invoke-Tool "risky-exec" {
+        $rigSemgrep = Join-Path $RigRoot "semgrep"
+        & docker run --rm `
+            -v "${ProjectPath}:/src" `
+            -v "${rigSemgrep}:/rules:ro" `
+            semgrep/semgrep `
+            semgrep --config /rules/legionforge-risky-exec.yml /src --error
+    }
+}
+
 # ── Summary ───────────────────────────────────────────────────────────────────
 
 Write-Section "Summary"
diff --git a/scripts/audit.sh b/scripts/audit.sh
index 1a78094..b062f63 100644
--- a/scripts/audit.sh
+++ b/scripts/audit.sh
@@ -1,5 +1,12 @@
 #!/usr/bin/env bash
-# LegionForge audit harness — ruff, bandit, mypy, pip-audit, semgrep.
+# LegionForge audit harness.
+#   Python  : ruff, bandit, mypy, pip-audit
+#   All deps: osv-scanner (Python / npm / Cargo / … vulns + malicious packages)
+#   Shell   : shellcheck
+#   Patterns: semgrep (per-project packs) + risky-exec (custom supply-chain rules)
+#
+# Language sections self-skip when their files/tools aren't present, so the same
+# harness runs cleanly against single- and multi-language repos.
 #
 # Usage:
 #   ./audit.sh                        # from the project root
@@ -11,6 +18,11 @@
 
 set -euo pipefail
 
+# Resolve the dev-rig root (this script lives in <rig>/scripts/) so the bundled
+# custom Semgrep ruleset is found regardless of the consuming project's CWD.
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+RIG_ROOT="$(dirname "$SCRIPT_DIR")"
+
 PROJECT="${1:-$(pwd)}"
 PROJECT="$(cd "$PROJECT" && pwd)"   # resolve to absolute path
 
@@ -37,6 +49,7 @@ GRAY='\033[0;90m'
 RESET='\033[0m'
 
 declare -A RESULTS
+declare -a ORDER=()   # preserves run order for the summary (assoc arrays don't)
 
 section() { echo -e "\n${GRAY}── $1 $(printf '─%.0s' $(seq 1 $((62 - ${#1}))))${RESET}"; }
 
@@ -44,6 +57,7 @@ run_tool() {
     local name="$1"
     shift
     section "$name"
+    ORDER+=("$name")
     pushd "$PROJECT" > /dev/null
     if "$@"; then
         echo -e "  ${GREEN}✓  $name passed${RESET}"
@@ -82,6 +96,34 @@ run_tool "mypy" py -3.13 -m mypy "${SOURCE_DIR_LIST[@]}"
 
 run_tool "pip-audit" py -3.13 -m pip_audit .
 
+# ── osv-scanner — multi-ecosystem dependency + malicious-package scan ──────────
+# One pass over every lockfile in the repo (Python / npm / Cargo / Go / …),
+# checked against the OSV database — CVEs *and* the malicious-packages feed that
+# pip-audit lacks. --allow-no-lockfiles makes a repo with nothing to scan a pass,
+# not a failure, so the harness stays usable everywhere.
+if command -v osv-scanner > /dev/null 2>&1; then
+    run_tool "osv-scanner" osv-scanner scan source --recursive --allow-no-lockfiles .
+else
+    section "osv-scanner"
+    echo -e "  ${GRAY}osv-scanner not installed — skipping (brew install osv-scanner)${RESET}"
+fi
+
+# ── shellcheck — shell script correctness + footguns ──────────────────────────
+# Runs only when the repo actually contains shell scripts.
+shell_files=()
+while IFS= read -r f; do shell_files+=("$f"); done < <(
+    find "$PROJECT" -type f \( -name '*.sh' -o -name '*.bash' \) \
+        -not -path '*/.git/*' -not -path '*/.venv/*' -not -path '*/node_modules/*' 2>/dev/null
+)
+if [[ ${#shell_files[@]} -gt 0 ]]; then
+    if command -v shellcheck > /dev/null 2>&1; then
+        run_tool "shellcheck" shellcheck "${shell_files[@]}"
+    else
+        section "shellcheck"
+        echo -e "  ${GRAY}${#shell_files[@]} shell script(s) found but shellcheck not installed — skipping (brew install shellcheck)${RESET}"
+    fi
+fi
+
 # ── semgrep (via Docker) ──────────────────────────────────────────────────────
 
 semgrep_args=()
@@ -103,11 +145,25 @@ run_tool "semgrep" env MSYS_NO_PATHCONV=1 docker run --rm \
     semgrep/semgrep \
     semgrep "${semgrep_args[@]}"
 
+# ── risky-exec — LegionForge custom supply-chain / RCE pattern rules ───────────
+# Flags curl|bash installers, PowerShell download-cradles, decode-and-exec, and
+# TLS-bypass patterns that ecosystem scanners can't see. Same Docker semgrep
+# image, mounting the rig's bundled ruleset read-only.
+RISKY_RULES="$RIG_ROOT/semgrep/legionforge-risky-exec.yml"
+if [[ -f "$RISKY_RULES" ]]; then
+    WIN_RIG_SEMGREP=$(cygpath -m "$RIG_ROOT/semgrep" 2>/dev/null || echo "$RIG_ROOT/semgrep")
+    run_tool "risky-exec" env MSYS_NO_PATHCONV=1 docker run --rm \
+        -v "${WIN_PROJECT}:/src" \
+        -v "${WIN_RIG_SEMGREP}:/rules:ro" \
+        semgrep/semgrep \
+        semgrep --config /rules/legionforge-risky-exec.yml /src --error
+fi
+
 # ── Summary ───────────────────────────────────────────────────────────────────
 
 section "Summary"
 failed=0
-for tool in ruff bandit mypy pip-audit semgrep; do
+for tool in "${ORDER[@]}"; do
     if [[ "${RESULTS[$tool]}" == "PASS" ]]; then
         echo -e "  ${GREEN}✓  $tool${RESET}"
     else
diff --git a/semgrep/legionforge-risky-exec.yml b/semgrep/legionforge-risky-exec.yml
new file mode 100644
index 0000000..42b5abd
--- /dev/null
+++ b/semgrep/legionforge-risky-exec.yml
@@ -0,0 +1,120 @@
+# LegionForge custom Semgrep ruleset — risky execution / supply-chain patterns.
+#
+# Targets the install-time and remote-code-execution patterns that ecosystem
+# scanners (osv-scanner, pip-audit) do NOT catch: curl|bash installers,
+# PowerShell download-cradles, decode-and-exec, and TLS-verification bypasses.
+#
+# Scope: shell (.sh/.bash), PowerShell (.ps1/.psm1), and CI YAML, matched in
+# Semgrep "generic" mode so a single ruleset spans every file type.
+#
+# Run:
+#   semgrep --config semgrep/legionforge-risky-exec.yml --error <path>
+# Wired into scripts/audit.sh and .github/workflows/supply-chain.yml.
+
+rules:
+  # ── Remote pipe-to-shell ─────────────────────────────────────────────────────
+  - id: lf-remote-pipe-to-shell
+    languages: [generic]
+    severity: ERROR
+    message: >-
+      Remote content piped directly into a shell (curl|wget ... | sh/bash).
+      The downloaded script runs with no review, no pinning, and no integrity
+      check — a single compromised host or MITM yields code execution.
+      Download to a file, verify a checksum/signature, then execute.
+    paths:
+      include: ["*.sh", "*.bash", "*.yml", "*.yaml", "*.zsh"]
+    patterns:
+      - pattern-regex: '(curl|wget)\b[^\n|]*\|[^\n]*\b(sh|bash|zsh|dash)\b'
+
+  # ── PowerShell download-cradle ───────────────────────────────────────────────
+  - id: lf-powershell-download-cradle
+    languages: [generic]
+    severity: ERROR
+    message: >-
+      PowerShell download-cradle (DownloadString/DownloadFile + IEX or
+      Invoke-WebRequest|iex). Fetches and executes remote code in one step,
+      the Windows equivalent of curl|bash. Stage the payload to disk and
+      verify it before running.
+    paths:
+      include: ["*.ps1", "*.psm1", "*.yml", "*.yaml"]
+    patterns:
+      - pattern-either:
+          - pattern-regex: '(?i)(New-Object\s+Net\.WebClient)[^\n]*\.DownloadString'
+          - pattern-regex: '(?i)DownloadString\([^\n]*\)[^\n]*\|\s*(iex|Invoke-Expression)'
+          - pattern-regex: '(?i)(iwr|Invoke-WebRequest|curl)\b[^\n|]*\|\s*(iex|Invoke-Expression)'
+
+  # ── Decode-and-execute (obfuscation) ─────────────────────────────────────────
+  - id: lf-decode-and-exec
+    languages: [generic]
+    severity: ERROR
+    message: >-
+      Base64/hex content decoded and piped to an interpreter. Common
+      obfuscation for malicious install hooks — legitimate code rarely needs
+      to decode itself before running.
+    paths:
+      include: ["*.sh", "*.bash", "*.ps1", "*.psm1", "*.yml", "*.yaml"]
+    patterns:
+      - pattern-either:
+          - pattern-regex: '\bbase64\s+(-d|--decode)\b[^\n]*\|[^\n]*\b(sh|bash|python|perl|node)\b'
+          - pattern-regex: '(?i)\[Convert\]::FromBase64String\([^\n]*\)[^\n]*(iex|Invoke-Expression)'
+          - pattern-regex: '(?i)-(enc|encodedcommand)\b'
+
+  # ── eval over fetched / command-substituted content ──────────────────────────
+  - id: lf-eval-remote
+    languages: [generic]
+    severity: ERROR
+    message: >-
+      eval over command-substituted remote content (eval "$(curl ...)").
+      Executes whatever the remote endpoint returns. Avoid eval on any
+      externally sourced string.
+    paths:
+      include: ["*.sh", "*.bash", "*.zsh", "*.yml", "*.yaml"]
+    patterns:
+      - pattern-regex: '\beval\b[^\n]*\$\((curl|wget)\b'
+
+  # ── TLS verification disabled on download ────────────────────────────────────
+  - id: lf-tls-verification-disabled
+    languages: [generic]
+    severity: WARNING
+    message: >-
+      TLS certificate verification disabled during a network fetch
+      (curl -k / --insecure, wget --no-check-certificate, or a PowerShell
+      ServerCertificateValidationCallback override). Defeats MITM protection
+      on the very download you are trusting.
+    paths:
+      include: ["*.sh", "*.bash", "*.ps1", "*.psm1", "*.yml", "*.yaml"]
+    patterns:
+      - pattern-either:
+          - pattern-regex: '\bcurl\b[^\n]*\s(-k|--insecure)\b'
+          - pattern-regex: '\bwget\b[^\n]*--no-check-certificate\b'
+          - pattern-regex: '(?i)ServerCertificateValidationCallback\s*=\s*\{?\s*\$?true'
+
+  # ── Install straight from a VCS URL / git ref ────────────────────────────────
+  - id: lf-install-from-vcs-url
+    languages: [generic]
+    severity: WARNING
+    message: >-
+      Package installed directly from a VCS/URL rather than a pinned registry
+      release (pip install git+..., npm install <git-url>, cargo install
+      --git). Bypasses lockfiles and registry review; pin to a published,
+      hash-locked version instead.
+    paths:
+      include: ["*.sh", "*.bash", "*.ps1", "*.psm1", "*.yml", "*.yaml"]
+    patterns:
+      - pattern-either:
+          - pattern-regex: '\bpip(3)?\s+install\b[^\n]*\bgit\+'
+          - pattern-regex: '\bnpm\s+(install|i)\b[^\n]*\b(git\+|https?://|github:)'
+          - pattern-regex: '\bcargo\s+install\b[^\n]*--git\b'
+
+  # ── npm install scripts re-enabled inline ────────────────────────────────────
+  - id: lf-npm-scripts-reenabled
+    languages: [generic]
+    severity: WARNING
+    message: >-
+      npm invoked with install scripts force-enabled (--foreground-scripts or
+      --ignore-scripts=false). The rig default is ignore-scripts=true to block
+      postinstall payloads; only re-enable for a specific, reviewed package.
+    paths:
+      include: ["*.sh", "*.bash", "*.yml", "*.yaml"]
+    patterns:
+      - pattern-regex: '\bnpm\b[^\n]*(--foreground-scripts|--ignore-scripts[=\s]+false)'

From 30564f6250e0f239794138dc31c2916b5d755987 Mon Sep 17 00:00:00 2001
From: JP Cruz <115298310+jp-cruz@users.noreply.github.com>
Date: Wed, 17 Jun 2026 22:12:06 -0500
Subject: [PATCH 2/2] chore: validate + harden the rig (self-CI, fixture fix,
 rule tuning) (#11)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* chore: validate + harden the rig (self-CI, fixture fix, rule tuning)

Validation pass over the multi-language rig. Adds end-to-end self-testing
and fixes three real issues that validation surfaced.

Self-test CI:
- .github/workflows/ci.yml: on push/PR, invokes all 7 reusable workflows
  (lint/test/sast/audit/supply-chain/secrets/sbom) against the rig itself
  via LOCAL refs, so a PR validates its own workflow versions. The
  reusable workflows had never been exercised end-to-end before this.

Bug fixes found by validation:
- fixtures/http.py: mock_http_client passed the respx MockRouter as an
  httpx transport, which is not a valid transport (AttributeError:
  handle_async_request) on httpx 0.28 / respx 0.23 — the fixture was
  broken for any consumer. Now returns a plain AsyncClient intercepted by
  the active respx.mock(). Also drops an unused import (F401).
- tests/: add the rig's first real test suite for the shipped fixtures
  (100% coverage of the fixtures module), which doubles as test.yml's
  self-test target.

Rule tuning:
- risky-exec: exclude full-line #-comments via pattern-not-regex so
  documented "curl … | bash" usage notes don't false-positive (the
  generic_comment_style option does not apply to pattern-regex). Add
  --exclude semgrep so the ruleset file doesn't self-match its own
  patterns; add --exclude .claude so worktree copies aren't double-scanned.
- pyproject: per-file-ignore S101 (assert) for tests/ and examples/.

Validated locally: ruff/mypy/bandit clean, pytest 6/6 (100% cov),
risky-exec regression 10/10 on fixtures with 0 comment false positives,
rig self-scan clean, all YAML parses, audit.sh shellcheck-clean.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>

* ci: run self-test on all PRs, not just those targeting main

Stacked PRs (and this one) target a feature branch, so the
branches:[main] filter would skip self-CI entirely.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>

* fix(ci): add [dev] extra so lint.yml/test.yml install lint tooling

lint.yml runs 'pip install -e .[dev]' but the rig defined its tools only
under the [analysis] extra — so ruff/bandit/mypy were never installed and
self-CI lint failed with 'No module named ruff'. The rig now provides the
[dev] extra its own reusable workflows expect.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>

---------

Co-authored-by: jp-cruz <jp-cruz@users.noreply.github.com>
Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
---
 .github/workflows/ci.yml                 | 52 ++++++++++++++++++++++
 .github/workflows/supply-chain.yml       |  2 +-
 pyproject.toml                           | 17 +++++++
 scripts/audit.ps1                        |  5 ++-
 scripts/audit.sh                         |  6 ++-
 semgrep/legionforge-risky-exec.yml       | 13 ++++++
 src/legionforge_dev_rig/fixtures/http.py | 10 +++--
 tests/conftest.py                        |  5 +++
 tests/test_fixtures_http.py              | 56 ++++++++++++++++++++++++
 9 files changed, 158 insertions(+), 8 deletions(-)
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 tests/conftest.py
 create mode 100644 tests/test_fixtures_http.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..83251cb
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,52 @@
+# Self-test CI for the dev-rig.
+#
+# The rig's other workflows are all reusable (workflow_call) and were never
+# exercised end-to-end. This workflow invokes each of them against the rig
+# itself — using LOCAL refs (./.github/workflows/*.yml) so a PR validates its
+# own versions of the workflows, not the ones already on main. It both proves
+# the reusable workflows work and guards them against silent regressions.
+
+name: CI (self-test)
+
+on:
+  push:
+    branches: [main]
+  pull_request:        # all PRs, regardless of base — so stacked branches self-test too
+
+jobs:
+  lint:
+    uses: ./.github/workflows/lint.yml
+    with:
+      source-dirs: "src/legionforge_dev_rig"
+      extra-mypy-deps: "types-PyYAML httpx respx pytest"
+
+  test:
+    uses: ./.github/workflows/test.yml
+    with:
+      coverage-source: "legionforge_dev_rig"
+      coverage-threshold: 80
+
+  sast:
+    uses: ./.github/workflows/sast.yml
+    with:
+      source-dirs: "src/legionforge_dev_rig"
+      semgrep-configs: "p/python"
+    permissions:
+      security-events: write
+
+  audit:
+    uses: ./.github/workflows/audit.yml
+
+  supply-chain:
+    uses: ./.github/workflows/supply-chain.yml
+    with:
+      # Pull the risky-exec ruleset from THIS branch so self-CI tests the PR's
+      # rules, not whatever is on main.
+      rig-ref: ${{ github.head_ref || github.ref_name }}
+    secrets: inherit
+
+  secrets:
+    uses: ./.github/workflows/secrets.yml
+
+  sbom:
+    uses: ./.github/workflows/sbom.yml
diff --git a/.github/workflows/supply-chain.yml b/.github/workflows/supply-chain.yml
index 07722db..59dc378 100644
--- a/.github/workflows/supply-chain.yml
+++ b/.github/workflows/supply-chain.yml
@@ -74,7 +74,7 @@ jobs:
         run: |
           semgrep --config .dev-rig/semgrep/legionforge-risky-exec.yml \
             --error --metrics=off \
-            --exclude .dev-rig .
+            --exclude .dev-rig --exclude .claude --exclude node_modules --exclude semgrep .
 
   # ── Optional Socket.dev behavioral npm scan ──────────────────────────────────
   # Self-skips when no SOCKET_SECURITY_API_KEY secret is provided, so callers
diff --git a/pyproject.toml b/pyproject.toml
index 4fa14ef..003cd56 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,6 +28,18 @@ analysis = [
     "pre-commit>=3.7",
     "types-PyYAML",
 ]
+# The `dev` extra is the contract the reusable lint.yml / test.yml workflows
+# install (`pip install -e .[dev]`). Consuming projects provide their own; the
+# rig defines one so it can self-test those workflows (see ci.yml).
+dev = [
+    "ruff>=0.4",
+    "bandit[toml]>=1.7",
+    "mypy>=1.10",
+    "pytest>=8",
+    "pytest-asyncio>=0.23",
+    "pytest-cov>=5",
+    "types-PyYAML",
+]
 
 [tool.pytest.ini_options]
 asyncio_mode = "auto"
@@ -43,6 +55,11 @@ ignore = [
     "S607",   # partial executable path — acceptable for system binaries
 ]
 
+[tool.ruff.lint.per-file-ignores]
+# Tests and examples legitimately use `assert` (S101) for verification.
+"tests/**" = ["S101"]
+"examples/**" = ["S101"]
+
 [tool.bandit]
 exclude_dirs = [".venv", "venv", ".git", "tests"]
 skips = [
diff --git a/scripts/audit.ps1 b/scripts/audit.ps1
index 1b80923..4de4dae 100644
--- a/scripts/audit.ps1
+++ b/scripts/audit.ps1
@@ -146,7 +146,7 @@ if (Get-Command osv-scanner -ErrorAction SilentlyContinue) {
 # Runs only when the repo contains shell scripts.
 
 $shellFiles = Get-ChildItem -Path $ProjectPath -Recurse -File -Include *.sh, *.bash -ErrorAction SilentlyContinue |
-    Where-Object { $_.FullName -notmatch '[\\/](\.git|\.venv|node_modules)[\\/]' }
+    Where-Object { $_.FullName -notmatch '[\\/](\.git|\.venv|node_modules|\.claude)[\\/]' }
 if ($shellFiles) {
     if (Get-Command shellcheck -ErrorAction SilentlyContinue) {
         Invoke-Tool "shellcheck" {
@@ -193,7 +193,8 @@ if (Test-Path $riskyRules) {
             -v "${ProjectPath}:/src" `
             -v "${rigSemgrep}:/rules:ro" `
             semgrep/semgrep `
-            semgrep --config /rules/legionforge-risky-exec.yml /src --error
+            semgrep --config /rules/legionforge-risky-exec.yml /src --error `
+            --exclude .claude --exclude .git --exclude node_modules --exclude semgrep
     }
 }
 
diff --git a/scripts/audit.sh b/scripts/audit.sh
index b062f63..b53eff1 100644
--- a/scripts/audit.sh
+++ b/scripts/audit.sh
@@ -113,7 +113,8 @@ fi
 shell_files=()
 while IFS= read -r f; do shell_files+=("$f"); done < <(
     find "$PROJECT" -type f \( -name '*.sh' -o -name '*.bash' \) \
-        -not -path '*/.git/*' -not -path '*/.venv/*' -not -path '*/node_modules/*' 2>/dev/null
+        -not -path '*/.git/*' -not -path '*/.venv/*' -not -path '*/node_modules/*' \
+        -not -path '*/.claude/*' 2>/dev/null
 )
 if [[ ${#shell_files[@]} -gt 0 ]]; then
     if command -v shellcheck > /dev/null 2>&1; then
@@ -156,7 +157,8 @@ if [[ -f "$RISKY_RULES" ]]; then
         -v "${WIN_PROJECT}:/src" \
         -v "${WIN_RIG_SEMGREP}:/rules:ro" \
         semgrep/semgrep \
-        semgrep --config /rules/legionforge-risky-exec.yml /src --error
+        semgrep --config /rules/legionforge-risky-exec.yml /src --error \
+        --exclude .claude --exclude .git --exclude node_modules --exclude semgrep
 fi
 
 # ── Summary ───────────────────────────────────────────────────────────────────
diff --git a/semgrep/legionforge-risky-exec.yml b/semgrep/legionforge-risky-exec.yml
index 42b5abd..6b59686 100644
--- a/semgrep/legionforge-risky-exec.yml
+++ b/semgrep/legionforge-risky-exec.yml
@@ -7,6 +7,12 @@
 # Scope: shell (.sh/.bash), PowerShell (.ps1/.psm1), and CI YAML, matched in
 # Semgrep "generic" mode so a single ruleset spans every file type.
 #
+# Comment handling: rules use pattern-regex (raw text), which bypasses generic
+# mode's comment tokenizer — so each rule ANDs a `pattern-not-regex` excluding
+# full-line #-comments. This keeps documented usage examples (e.g. a
+# "# curl ... | bash" install note) from tripping the rules; only executable
+# lines are flagged. (# is the line-comment char in shell, PowerShell, and YAML.)
+#
 # Run:
 #   semgrep --config semgrep/legionforge-risky-exec.yml --error <path>
 # Wired into scripts/audit.sh and .github/workflows/supply-chain.yml.
@@ -25,6 +31,7 @@ rules:
       include: ["*.sh", "*.bash", "*.yml", "*.yaml", "*.zsh"]
     patterns:
       - pattern-regex: '(curl|wget)\b[^\n|]*\|[^\n]*\b(sh|bash|zsh|dash)\b'
+      - pattern-not-regex: '(?m)^\s*#.*'
 
   # ── PowerShell download-cradle ───────────────────────────────────────────────
   - id: lf-powershell-download-cradle
@@ -42,6 +49,7 @@ rules:
           - pattern-regex: '(?i)(New-Object\s+Net\.WebClient)[^\n]*\.DownloadString'
           - pattern-regex: '(?i)DownloadString\([^\n]*\)[^\n]*\|\s*(iex|Invoke-Expression)'
           - pattern-regex: '(?i)(iwr|Invoke-WebRequest|curl)\b[^\n|]*\|\s*(iex|Invoke-Expression)'
+      - pattern-not-regex: '(?m)^\s*#.*'
 
   # ── Decode-and-execute (obfuscation) ─────────────────────────────────────────
   - id: lf-decode-and-exec
@@ -58,6 +66,7 @@ rules:
           - pattern-regex: '\bbase64\s+(-d|--decode)\b[^\n]*\|[^\n]*\b(sh|bash|python|perl|node)\b'
           - pattern-regex: '(?i)\[Convert\]::FromBase64String\([^\n]*\)[^\n]*(iex|Invoke-Expression)'
           - pattern-regex: '(?i)-(enc|encodedcommand)\b'
+      - pattern-not-regex: '(?m)^\s*#.*'
 
   # ── eval over fetched / command-substituted content ──────────────────────────
   - id: lf-eval-remote
@@ -71,6 +80,7 @@ rules:
       include: ["*.sh", "*.bash", "*.zsh", "*.yml", "*.yaml"]
     patterns:
       - pattern-regex: '\beval\b[^\n]*\$\((curl|wget)\b'
+      - pattern-not-regex: '(?m)^\s*#.*'
 
   # ── TLS verification disabled on download ────────────────────────────────────
   - id: lf-tls-verification-disabled
@@ -88,6 +98,7 @@ rules:
           - pattern-regex: '\bcurl\b[^\n]*\s(-k|--insecure)\b'
           - pattern-regex: '\bwget\b[^\n]*--no-check-certificate\b'
           - pattern-regex: '(?i)ServerCertificateValidationCallback\s*=\s*\{?\s*\$?true'
+      - pattern-not-regex: '(?m)^\s*#.*'
 
   # ── Install straight from a VCS URL / git ref ────────────────────────────────
   - id: lf-install-from-vcs-url
@@ -105,6 +116,7 @@ rules:
           - pattern-regex: '\bpip(3)?\s+install\b[^\n]*\bgit\+'
           - pattern-regex: '\bnpm\s+(install|i)\b[^\n]*\b(git\+|https?://|github:)'
           - pattern-regex: '\bcargo\s+install\b[^\n]*--git\b'
+      - pattern-not-regex: '(?m)^\s*#.*'
 
   # ── npm install scripts re-enabled inline ────────────────────────────────────
   - id: lf-npm-scripts-reenabled
@@ -118,3 +130,4 @@ rules:
       include: ["*.sh", "*.bash", "*.yml", "*.yaml"]
     patterns:
       - pattern-regex: '\bnpm\b[^\n]*(--foreground-scripts|--ignore-scripts[=\s]+false)'
+      - pattern-not-regex: '(?m)^\s*#.*'
diff --git a/src/legionforge_dev_rig/fixtures/http.py b/src/legionforge_dev_rig/fixtures/http.py
index 1e2c988..a5cef48 100644
--- a/src/legionforge_dev_rig/fixtures/http.py
+++ b/src/legionforge_dev_rig/fixtures/http.py
@@ -1,5 +1,5 @@
 """Shared httpx / respx fixtures for async HTTP provider testing."""
-from collections.abc import AsyncGenerator, Generator
+from collections.abc import Generator
 from typing import Any
 
 import httpx
@@ -26,9 +26,13 @@ def test_something(respx_mock_base_url):
 @pytest.fixture
 def mock_http_client(respx_mock_base_url: respx.MockRouter) -> httpx.AsyncClient:
     """
-    An httpx.AsyncClient wired into the respx mock router.
+    An httpx.AsyncClient whose requests are intercepted by the respx mock.
     Pass this directly to provider constructors that accept a client parameter.
 
+    The respx_mock_base_url fixture activates respx.mock(), which patches httpx's
+    transport globally — so a plain AsyncClient is routed through the mock. (Do
+    NOT pass the router as `transport=`; a MockRouter is not an httpx transport.)
+
     Usage:
         async def test_health(mock_http_client):
             respx_mock_base_url.get("http://127.0.0.1:11434/api/tags").mock(
@@ -37,7 +41,7 @@ async def test_health(mock_http_client):
             provider = OllamaProvider(client=mock_http_client)
             assert await provider.health_check() is True
     """
-    return httpx.AsyncClient(transport=respx_mock_base_url)
+    return httpx.AsyncClient()
 
 
 def json_response(data: dict[str, Any], status: int = 200) -> httpx.Response:
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..b227387
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,5 @@
+# Re-export the shared fixtures so pytest discovers them in the rig's own
+# self-test suite (mirrors the consuming-project pattern in examples/conftest.py).
+from legionforge_dev_rig.fixtures import mock_http_client, respx_mock_base_url
+
+__all__ = ["mock_http_client", "respx_mock_base_url"]
diff --git a/tests/test_fixtures_http.py b/tests/test_fixtures_http.py
new file mode 100644
index 0000000..94239a8
--- /dev/null
+++ b/tests/test_fixtures_http.py
@@ -0,0 +1,56 @@
+"""Tests for the shared httpx/respx fixtures shipped by the dev-rig.
+
+These also serve as the rig's own self-test target so test.yml is validated
+end-to-end against the rig itself (see .github/workflows/ci.yml).
+"""
+import httpx
+import pytest
+
+from legionforge_dev_rig.fixtures import http as fx
+
+
+def test_json_response_defaults() -> None:
+    resp = fx.json_response({"ok": True})
+    assert resp.status_code == 200
+    assert resp.json() == {"ok": True}
+
+
+def test_json_response_custom_status() -> None:
+    resp = fx.json_response({"created": 1}, status=201)
+    assert resp.status_code == 201
+    assert resp.json() == {"created": 1}
+
+
+def test_error_response_with_detail() -> None:
+    resp = fx.error_response(404, "missing")
+    assert resp.status_code == 404
+    assert resp.json() == {"detail": "missing"}
+
+
+def test_error_response_without_detail() -> None:
+    resp = fx.error_response(500)
+    assert resp.status_code == 500
+    assert resp.json() == {}
+
+
+def test_respx_fixture_stubs_route(respx_mock_base_url) -> None:
+    respx_mock_base_url.get("http://svc.local/ping").mock(
+        return_value=httpx.Response(200, json={"pong": True})
+    )
+    # respx.mock() is active via the fixture; a plain client is intercepted.
+    with httpx.Client() as client:
+        r = client.get("http://svc.local/ping")
+    assert r.json() == {"pong": True}
+
+
+@pytest.mark.asyncio
+async def test_mock_http_client_is_async_and_wired(
+    mock_http_client, respx_mock_base_url
+) -> None:
+    respx_mock_base_url.get("http://svc.local/health").mock(
+        return_value=httpx.Response(200, json={"status": "ok"})
+    )
+    async with mock_http_client as client:
+        r = await client.get("http://svc.local/health")
+    assert r.status_code == 200
+    assert r.json() == {"status": "ok"}