From 0746ca4a46e773f5a263989b2c55e9dcbe434051 Mon Sep 17 00:00:00 2001 From: jp-cruz Date: Wed, 17 Jun 2026 21:43:59 -0500 Subject: [PATCH 1/2] feat: extend audit rig to multi-language supply-chain coverage Adds osv-scanner (multi-ecosystem dependency vuln + malicious-package scan covering Python/npm/Cargo), shellcheck, and a custom Semgrep ruleset for risky-exec / supply-chain patterns (curl|bash installers, PowerShell download-cradles, decode-and-exec, TLS-verification bypass, VCS-URL installs, npm install-scripts re-enabled). - semgrep/legionforge-risky-exec.yml: 7 custom rules (validated 8/8 on fixtures, 0 false positives on clean input) - .github/workflows/supply-chain.yml: reusable CI (osv-scanner + risky-exec + optional token-gated Socket.dev job) - scripts/audit.sh + audit.ps1: osv-scanner/shellcheck/risky-exec sections, each self-skipping when language/tool absent - .pre-commit-config.yaml: shellcheck + lockfile-triggered osv-scanner - README: inventory, CI wiring, multi-language note, tool versions cargo-deny (Rust policy) and PSScriptAnalyzer (PowerShell SAST) are the documented next pass. Co-Authored-By: Claude Opus 4.8 --- .github/workflows/supply-chain.yml | 100 ++++++++++++++++++++++++ .pre-commit-config.yaml | 18 +++++ README.md | 20 ++++- scripts/audit.ps1 | 61 ++++++++++++++- scripts/audit.sh | 60 ++++++++++++++- semgrep/legionforge-risky-exec.yml | 120 +++++++++++++++++++++++++++++ 6 files changed, 372 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/supply-chain.yml create mode 100644 semgrep/legionforge-risky-exec.yml diff --git a/.github/workflows/supply-chain.yml b/.github/workflows/supply-chain.yml new file mode 100644 index 0000000..07722db --- /dev/null +++ b/.github/workflows/supply-chain.yml @@ -0,0 +1,100 @@ +# Reusable workflow: supply-chain hardening. +# +# osv-scanner — multi-ecosystem dependency vuln + malicious-package scan +# (Python / npm / Cargo / Go / … from lockfiles) +# risky-exec — LegionForge custom Semgrep rules: curl|bash installers, +# PowerShell download-cradles, decode-and-exec, TLS bypass +# socket — OPTIONAL behavioral npm analysis; runs only when the caller +# provides a SOCKET_SECURITY_API_KEY secret +# +# Caller syntax: +# +# jobs: +# supply-chain: +# uses: LegionForge/dev-rig/.github/workflows/supply-chain.yml@main +# secrets: inherit # only needed to enable the Socket job +# +# Complements audit.yml (pip-audit/licenses). osv-scanner adds the malicious- +# package feed and the npm/Cargo ecosystems that pip-audit doesn't cover. + +name: Supply Chain + +on: + workflow_call: + inputs: + osv-version: + description: "osv-scanner image tag to pin" + required: false + type: string + default: "v2.3.8" + rig-ref: + description: "dev-rig ref to pull the custom risky-exec ruleset from" + required: false + type: string + default: "main" + secrets: + SOCKET_SECURITY_API_KEY: + description: "Optional Socket.dev API key — enables the npm behavioral scan" + required: false + +jobs: + # ── Dependency vulnerabilities + known-malicious packages ──────────────────── + osv-scanner: + name: osv-scanner (deps + malicious packages) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + # Pinned container image (matches the rig's docker-based tooling pattern). + # --allow-no-lockfiles: a repo with nothing to scan is a pass, not a fail. + - name: osv-scanner scan + run: | + docker run --rm -v "${{ github.workspace }}:/src" \ + "ghcr.io/google/osv-scanner:${{ inputs.osv-version }}" \ + scan source --recursive --allow-no-lockfiles /src + + # ── Custom risky-exec / supply-chain pattern rules ─────────────────────────── + risky-exec: + name: risky-exec (custom Semgrep rules) + runs-on: ubuntu-latest + container: semgrep/semgrep + steps: + - name: Checkout caller repo + uses: actions/checkout@v4 + + # The ruleset lives in dev-rig, not the consuming repo — pull it alongside. + - name: Checkout dev-rig ruleset + uses: actions/checkout@v4 + with: + repository: LegionForge/dev-rig + ref: ${{ inputs.rig-ref }} + path: .dev-rig + + - name: semgrep risky-exec + run: | + semgrep --config .dev-rig/semgrep/legionforge-risky-exec.yml \ + --error --metrics=off \ + --exclude .dev-rig . + + # ── Optional Socket.dev behavioral npm scan ────────────────────────────────── + # Self-skips when no SOCKET_SECURITY_API_KEY secret is provided, so callers + # without a Socket account incur no failure and leak no dependency data. + socket: + name: socket (optional npm behavioral) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Socket scan (skips without token) + env: + SOCKET_SECURITY_API_KEY: ${{ secrets.SOCKET_SECURITY_API_KEY }} + run: | + if [ -z "${SOCKET_SECURITY_API_KEY:-}" ]; then + echo "SOCKET_SECURITY_API_KEY not set — skipping optional Socket scan." + exit 0 + fi + if [ ! -f package.json ]; then + echo "No package.json — nothing for Socket to scan." + exit 0 + fi + npx -y @socketsecurity/cli@latest scan create --report . diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7d9fbc0..670c4ff 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -52,6 +52,24 @@ repos: - psutil - uvicorn + # ── shellcheck — shell script correctness + footguns ────────────────────── + - repo: https://github.com/koalaman/shellcheck-precommit + rev: v0.10.0 + hooks: + - id: shellcheck + + # ── osv-scanner — dependency vuln + malicious-package scan (multi-ecosystem)─ + # Local hook (uses the installed osv-scanner binary). Triggers only when a + # lockfile changes; scans the whole tree so cross-ecosystem repos are covered. + - repo: local + hooks: + - id: osv-scanner + name: osv-scanner (deps + malicious packages) + entry: osv-scanner scan source --recursive --allow-no-lockfiles . + language: system + pass_filenames: false + files: '(requirements.*\.txt|poetry\.lock|uv\.lock|Pipfile\.lock|package-lock\.json|pnpm-lock\.yaml|yarn\.lock|Cargo\.lock|go\.sum)$' + # ── gitleaks — secret scanning (OWASP SAMM — Implementation/Secure Build) ── - repo: https://github.com/gitleaks/gitleaks rev: v8.18.4 diff --git a/README.md b/README.md index f35cd97..ea9b12f 100644 --- a/README.md +++ b/README.md @@ -12,10 +12,13 @@ Shared CI pipeline, pre-commit hooks, and pytest fixtures for LegionForge projec | `.github/workflows/test.yml` | Reusable CI job: pytest + coverage enforcement | | `.github/workflows/sast.yml` | Reusable CI job: semgrep (p/python + p/fastapi) + CodeQL | | `.github/workflows/audit.yml` | Reusable CI job: pip-audit CVE scan + pip-licenses compliance | +| `.github/workflows/supply-chain.yml` | Reusable CI job: osv-scanner (multi-ecosystem deps + malicious packages) + risky-exec custom rules + optional Socket.dev | | `.github/workflows/secrets.yml` | Reusable CI job: gitleaks secret scanning | | `.github/workflows/sbom.yml` | Reusable CI job: CycloneDX SBOM generation | +| `semgrep/legionforge-risky-exec.yml` | Custom Semgrep ruleset: curl\|bash installers, PowerShell download-cradles, decode-and-exec, TLS bypass | +| `scripts/audit.sh` / `scripts/audit.ps1` | Local audit harness — Python + osv-scanner + shellcheck + semgrep + risky-exec | | `.pre-commit-hooks.yaml` | Hook definitions consumed via pre-commit | -| `.pre-commit-config.yaml` | Default config to copy into new projects (includes gitleaks) | +| `.pre-commit-config.yaml` | Default config to copy into new projects (includes gitleaks, shellcheck, osv-scanner) | | `SECURITY.md` | Vulnerability disclosure policy template — copy and adjust | | `src/legionforge_dev_rig/fixtures/` | Shared pytest fixtures (httpx mocking, etc.) | | `examples/` | Template conftest.py and example tests | @@ -94,8 +97,19 @@ jobs: audit: uses: LegionForge/dev-rig/.github/workflows/audit.yml@main + + supply-chain: + uses: LegionForge/dev-rig/.github/workflows/supply-chain.yml@main + secrets: inherit # ← only needed to enable the optional Socket.dev scan ``` +> **Multi-language note.** `supply-chain.yml` and the local `audit.sh`/`audit.ps1` +> harness cover Python, JS/TS, and Rust (via osv-scanner lockfile scanning) plus +> shell (shellcheck) and risky-exec patterns in shell/PowerShell/CI YAML. Each +> section self-skips when its files or tools aren't present, so the rig is safe +> to wire into any repo regardless of language mix. cargo-deny (Rust policy) and +> PSScriptAnalyzer (PowerShell SAST) are the planned next additions. + ### 4 — Add shared fixtures to tests/conftest.py ```python @@ -133,6 +147,8 @@ pre-commit autoupdate | bandit | 1.7 | `pyproject.toml [tool.bandit]` | | mypy | 1.10 | `pyproject.toml [tool.mypy]` | | pip-audit | 2.7 | no config — runs against installed packages | -| semgrep | 1.70 | rulesets passed as CLI args | +| osv-scanner | 2.3 | no config — scans lockfiles recursively (`brew install osv-scanner`) | +| shellcheck | 0.10 | inline directives / `.shellcheckrc` (`brew install shellcheck`) | +| semgrep | 1.70 | rulesets passed as CLI args + `semgrep/legionforge-risky-exec.yml` | | pytest-cov | 5 | `pyproject.toml [tool.pytest.ini_options]` | | pre-commit | 3.7 | `.pre-commit-config.yaml` | diff --git a/scripts/audit.ps1 b/scripts/audit.ps1 index a540f0e..1b80923 100644 --- a/scripts/audit.ps1 +++ b/scripts/audit.ps1 @@ -1,11 +1,15 @@ <# .SYNOPSIS - LegionForge audit harness — ruff, bandit, mypy, pip-audit, semgrep. + LegionForge audit harness — ruff, bandit, mypy, pip-audit, osv-scanner, + shellcheck, semgrep, and the custom risky-exec supply-chain ruleset. .DESCRIPTION - Runs all five static-analysis and security tools against a project directory. + Runs the static-analysis and security tools against a project directory. Native tools (ruff/bandit/mypy/pip-audit) run directly via py -3.13. - Semgrep runs via Docker to avoid Windows/Python 3.13+ build failures. + osv-scanner (multi-ecosystem dependency + malicious-package scan) and + shellcheck run as native binaries; both self-skip when absent. + Semgrep and the custom risky-exec ruleset run via Docker to avoid + Windows/Python 3.13+ build failures. Per-project configuration lives in two small files at the project root: .audit-dirs — space-separated source dirs (e.g. "llm_valet svcmgr") @@ -32,6 +36,10 @@ param( $ProjectPath = (Resolve-Path $ProjectPath).Path +# Dev-rig root (this script lives in /scripts/) — locates the bundled +# custom Semgrep ruleset regardless of the consuming project's CWD. +$RigRoot = Split-Path $PSScriptRoot -Parent + # ── Read per-project config ─────────────────────────────────────────────────── # Which source directories to scan (ruff / bandit / mypy / semgrep). @@ -120,6 +128,36 @@ Invoke-Tool "pip-audit" { py -3.13 -m pip_audit . } +# ── osv-scanner — multi-ecosystem dependency + malicious-package scan ───────── +# One pass over every lockfile (Python / npm / Cargo / …), checked against OSV +# (CVEs + malicious-packages feed). --allow-no-lockfiles: nothing to scan is a +# pass, not a failure. Self-skips when the binary isn't installed. + +if (Get-Command osv-scanner -ErrorAction SilentlyContinue) { + Invoke-Tool "osv-scanner" { + osv-scanner scan source --recursive --allow-no-lockfiles . + } +} else { + Write-Section "osv-scanner" + Write-Host " osv-scanner not installed — skipping (winget install Google.osv-scanner)" -ForegroundColor DarkGray +} + +# ── shellcheck — shell script correctness + footguns ───────────────────────── +# Runs only when the repo contains shell scripts. + +$shellFiles = Get-ChildItem -Path $ProjectPath -Recurse -File -Include *.sh, *.bash -ErrorAction SilentlyContinue | + Where-Object { $_.FullName -notmatch '[\\/](\.git|\.venv|node_modules)[\\/]' } +if ($shellFiles) { + if (Get-Command shellcheck -ErrorAction SilentlyContinue) { + Invoke-Tool "shellcheck" { + shellcheck @($shellFiles.FullName) + } + } else { + Write-Section "shellcheck" + Write-Host " $($shellFiles.Count) shell script(s) found but shellcheck not installed — skipping" -ForegroundColor DarkGray + } +} + # ── semgrep — OWASP / framework-specific vulnerability patterns ─────────────── # Runs inside the official Docker image to avoid Windows/Python 3.13 build issues. # First run pulls the image (~200 MB); subsequent runs use the local cache. @@ -142,6 +180,23 @@ Invoke-Tool "semgrep" { & docker @dockerArgs } +# ── risky-exec — LegionForge custom supply-chain / RCE pattern rules ────────── +# Flags curl|bash installers, PowerShell download-cradles, decode-and-exec, and +# TLS-bypass patterns ecosystem scanners can't see. Same Docker semgrep image, +# mounting the rig's bundled ruleset read-only. + +$riskyRules = Join-Path $RigRoot "semgrep/legionforge-risky-exec.yml" +if (Test-Path $riskyRules) { + Invoke-Tool "risky-exec" { + $rigSemgrep = Join-Path $RigRoot "semgrep" + & docker run --rm ` + -v "${ProjectPath}:/src" ` + -v "${rigSemgrep}:/rules:ro" ` + semgrep/semgrep ` + semgrep --config /rules/legionforge-risky-exec.yml /src --error + } +} + # ── Summary ─────────────────────────────────────────────────────────────────── Write-Section "Summary" diff --git a/scripts/audit.sh b/scripts/audit.sh index 1a78094..b062f63 100644 --- a/scripts/audit.sh +++ b/scripts/audit.sh @@ -1,5 +1,12 @@ #!/usr/bin/env bash -# LegionForge audit harness — ruff, bandit, mypy, pip-audit, semgrep. +# LegionForge audit harness. +# Python : ruff, bandit, mypy, pip-audit +# All deps: osv-scanner (Python / npm / Cargo / … vulns + malicious packages) +# Shell : shellcheck +# Patterns: semgrep (per-project packs) + risky-exec (custom supply-chain rules) +# +# Language sections self-skip when their files/tools aren't present, so the same +# harness runs cleanly against single- and multi-language repos. # # Usage: # ./audit.sh # from the project root @@ -11,6 +18,11 @@ set -euo pipefail +# Resolve the dev-rig root (this script lives in /scripts/) so the bundled +# custom Semgrep ruleset is found regardless of the consuming project's CWD. +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +RIG_ROOT="$(dirname "$SCRIPT_DIR")" + PROJECT="${1:-$(pwd)}" PROJECT="$(cd "$PROJECT" && pwd)" # resolve to absolute path @@ -37,6 +49,7 @@ GRAY='\033[0;90m' RESET='\033[0m' declare -A RESULTS +declare -a ORDER=() # preserves run order for the summary (assoc arrays don't) section() { echo -e "\n${GRAY}── $1 $(printf '─%.0s' $(seq 1 $((62 - ${#1}))))${RESET}"; } @@ -44,6 +57,7 @@ run_tool() { local name="$1" shift section "$name" + ORDER+=("$name") pushd "$PROJECT" > /dev/null if "$@"; then echo -e " ${GREEN}✓ $name passed${RESET}" @@ -82,6 +96,34 @@ run_tool "mypy" py -3.13 -m mypy "${SOURCE_DIR_LIST[@]}" run_tool "pip-audit" py -3.13 -m pip_audit . +# ── osv-scanner — multi-ecosystem dependency + malicious-package scan ────────── +# One pass over every lockfile in the repo (Python / npm / Cargo / Go / …), +# checked against the OSV database — CVEs *and* the malicious-packages feed that +# pip-audit lacks. --allow-no-lockfiles makes a repo with nothing to scan a pass, +# not a failure, so the harness stays usable everywhere. +if command -v osv-scanner > /dev/null 2>&1; then + run_tool "osv-scanner" osv-scanner scan source --recursive --allow-no-lockfiles . +else + section "osv-scanner" + echo -e " ${GRAY}osv-scanner not installed — skipping (brew install osv-scanner)${RESET}" +fi + +# ── shellcheck — shell script correctness + footguns ────────────────────────── +# Runs only when the repo actually contains shell scripts. +shell_files=() +while IFS= read -r f; do shell_files+=("$f"); done < <( + find "$PROJECT" -type f \( -name '*.sh' -o -name '*.bash' \) \ + -not -path '*/.git/*' -not -path '*/.venv/*' -not -path '*/node_modules/*' 2>/dev/null +) +if [[ ${#shell_files[@]} -gt 0 ]]; then + if command -v shellcheck > /dev/null 2>&1; then + run_tool "shellcheck" shellcheck "${shell_files[@]}" + else + section "shellcheck" + echo -e " ${GRAY}${#shell_files[@]} shell script(s) found but shellcheck not installed — skipping (brew install shellcheck)${RESET}" + fi +fi + # ── semgrep (via Docker) ────────────────────────────────────────────────────── semgrep_args=() @@ -103,11 +145,25 @@ run_tool "semgrep" env MSYS_NO_PATHCONV=1 docker run --rm \ semgrep/semgrep \ semgrep "${semgrep_args[@]}" +# ── risky-exec — LegionForge custom supply-chain / RCE pattern rules ─────────── +# Flags curl|bash installers, PowerShell download-cradles, decode-and-exec, and +# TLS-bypass patterns that ecosystem scanners can't see. Same Docker semgrep +# image, mounting the rig's bundled ruleset read-only. +RISKY_RULES="$RIG_ROOT/semgrep/legionforge-risky-exec.yml" +if [[ -f "$RISKY_RULES" ]]; then + WIN_RIG_SEMGREP=$(cygpath -m "$RIG_ROOT/semgrep" 2>/dev/null || echo "$RIG_ROOT/semgrep") + run_tool "risky-exec" env MSYS_NO_PATHCONV=1 docker run --rm \ + -v "${WIN_PROJECT}:/src" \ + -v "${WIN_RIG_SEMGREP}:/rules:ro" \ + semgrep/semgrep \ + semgrep --config /rules/legionforge-risky-exec.yml /src --error +fi + # ── Summary ─────────────────────────────────────────────────────────────────── section "Summary" failed=0 -for tool in ruff bandit mypy pip-audit semgrep; do +for tool in "${ORDER[@]}"; do if [[ "${RESULTS[$tool]}" == "PASS" ]]; then echo -e " ${GREEN}✓ $tool${RESET}" else diff --git a/semgrep/legionforge-risky-exec.yml b/semgrep/legionforge-risky-exec.yml new file mode 100644 index 0000000..42b5abd --- /dev/null +++ b/semgrep/legionforge-risky-exec.yml @@ -0,0 +1,120 @@ +# LegionForge custom Semgrep ruleset — risky execution / supply-chain patterns. +# +# Targets the install-time and remote-code-execution patterns that ecosystem +# scanners (osv-scanner, pip-audit) do NOT catch: curl|bash installers, +# PowerShell download-cradles, decode-and-exec, and TLS-verification bypasses. +# +# Scope: shell (.sh/.bash), PowerShell (.ps1/.psm1), and CI YAML, matched in +# Semgrep "generic" mode so a single ruleset spans every file type. +# +# Run: +# semgrep --config semgrep/legionforge-risky-exec.yml --error +# Wired into scripts/audit.sh and .github/workflows/supply-chain.yml. + +rules: + # ── Remote pipe-to-shell ───────────────────────────────────────────────────── + - id: lf-remote-pipe-to-shell + languages: [generic] + severity: ERROR + message: >- + Remote content piped directly into a shell (curl|wget ... | sh/bash). + The downloaded script runs with no review, no pinning, and no integrity + check — a single compromised host or MITM yields code execution. + Download to a file, verify a checksum/signature, then execute. + paths: + include: ["*.sh", "*.bash", "*.yml", "*.yaml", "*.zsh"] + patterns: + - pattern-regex: '(curl|wget)\b[^\n|]*\|[^\n]*\b(sh|bash|zsh|dash)\b' + + # ── PowerShell download-cradle ─────────────────────────────────────────────── + - id: lf-powershell-download-cradle + languages: [generic] + severity: ERROR + message: >- + PowerShell download-cradle (DownloadString/DownloadFile + IEX or + Invoke-WebRequest|iex). Fetches and executes remote code in one step, + the Windows equivalent of curl|bash. Stage the payload to disk and + verify it before running. + paths: + include: ["*.ps1", "*.psm1", "*.yml", "*.yaml"] + patterns: + - pattern-either: + - pattern-regex: '(?i)(New-Object\s+Net\.WebClient)[^\n]*\.DownloadString' + - pattern-regex: '(?i)DownloadString\([^\n]*\)[^\n]*\|\s*(iex|Invoke-Expression)' + - pattern-regex: '(?i)(iwr|Invoke-WebRequest|curl)\b[^\n|]*\|\s*(iex|Invoke-Expression)' + + # ── Decode-and-execute (obfuscation) ───────────────────────────────────────── + - id: lf-decode-and-exec + languages: [generic] + severity: ERROR + message: >- + Base64/hex content decoded and piped to an interpreter. Common + obfuscation for malicious install hooks — legitimate code rarely needs + to decode itself before running. + paths: + include: ["*.sh", "*.bash", "*.ps1", "*.psm1", "*.yml", "*.yaml"] + patterns: + - pattern-either: + - pattern-regex: '\bbase64\s+(-d|--decode)\b[^\n]*\|[^\n]*\b(sh|bash|python|perl|node)\b' + - pattern-regex: '(?i)\[Convert\]::FromBase64String\([^\n]*\)[^\n]*(iex|Invoke-Expression)' + - pattern-regex: '(?i)-(enc|encodedcommand)\b' + + # ── eval over fetched / command-substituted content ────────────────────────── + - id: lf-eval-remote + languages: [generic] + severity: ERROR + message: >- + eval over command-substituted remote content (eval "$(curl ...)"). + Executes whatever the remote endpoint returns. Avoid eval on any + externally sourced string. + paths: + include: ["*.sh", "*.bash", "*.zsh", "*.yml", "*.yaml"] + patterns: + - pattern-regex: '\beval\b[^\n]*\$\((curl|wget)\b' + + # ── TLS verification disabled on download ──────────────────────────────────── + - id: lf-tls-verification-disabled + languages: [generic] + severity: WARNING + message: >- + TLS certificate verification disabled during a network fetch + (curl -k / --insecure, wget --no-check-certificate, or a PowerShell + ServerCertificateValidationCallback override). Defeats MITM protection + on the very download you are trusting. + paths: + include: ["*.sh", "*.bash", "*.ps1", "*.psm1", "*.yml", "*.yaml"] + patterns: + - pattern-either: + - pattern-regex: '\bcurl\b[^\n]*\s(-k|--insecure)\b' + - pattern-regex: '\bwget\b[^\n]*--no-check-certificate\b' + - pattern-regex: '(?i)ServerCertificateValidationCallback\s*=\s*\{?\s*\$?true' + + # ── Install straight from a VCS URL / git ref ──────────────────────────────── + - id: lf-install-from-vcs-url + languages: [generic] + severity: WARNING + message: >- + Package installed directly from a VCS/URL rather than a pinned registry + release (pip install git+..., npm install , cargo install + --git). Bypasses lockfiles and registry review; pin to a published, + hash-locked version instead. + paths: + include: ["*.sh", "*.bash", "*.ps1", "*.psm1", "*.yml", "*.yaml"] + patterns: + - pattern-either: + - pattern-regex: '\bpip(3)?\s+install\b[^\n]*\bgit\+' + - pattern-regex: '\bnpm\s+(install|i)\b[^\n]*\b(git\+|https?://|github:)' + - pattern-regex: '\bcargo\s+install\b[^\n]*--git\b' + + # ── npm install scripts re-enabled inline ──────────────────────────────────── + - id: lf-npm-scripts-reenabled + languages: [generic] + severity: WARNING + message: >- + npm invoked with install scripts force-enabled (--foreground-scripts or + --ignore-scripts=false). The rig default is ignore-scripts=true to block + postinstall payloads; only re-enable for a specific, reviewed package. + paths: + include: ["*.sh", "*.bash", "*.yml", "*.yaml"] + patterns: + - pattern-regex: '\bnpm\b[^\n]*(--foreground-scripts|--ignore-scripts[=\s]+false)' From 30564f6250e0f239794138dc31c2916b5d755987 Mon Sep 17 00:00:00 2001 From: JP Cruz <115298310+jp-cruz@users.noreply.github.com> Date: Wed, 17 Jun 2026 22:12:06 -0500 Subject: [PATCH 2/2] chore: validate + harden the rig (self-CI, fixture fix, rule tuning) (#11) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: validate + harden the rig (self-CI, fixture fix, rule tuning) Validation pass over the multi-language rig. Adds end-to-end self-testing and fixes three real issues that validation surfaced. Self-test CI: - .github/workflows/ci.yml: on push/PR, invokes all 7 reusable workflows (lint/test/sast/audit/supply-chain/secrets/sbom) against the rig itself via LOCAL refs, so a PR validates its own workflow versions. The reusable workflows had never been exercised end-to-end before this. Bug fixes found by validation: - fixtures/http.py: mock_http_client passed the respx MockRouter as an httpx transport, which is not a valid transport (AttributeError: handle_async_request) on httpx 0.28 / respx 0.23 — the fixture was broken for any consumer. Now returns a plain AsyncClient intercepted by the active respx.mock(). Also drops an unused import (F401). - tests/: add the rig's first real test suite for the shipped fixtures (100% coverage of the fixtures module), which doubles as test.yml's self-test target. Rule tuning: - risky-exec: exclude full-line #-comments via pattern-not-regex so documented "curl … | bash" usage notes don't false-positive (the generic_comment_style option does not apply to pattern-regex). Add --exclude semgrep so the ruleset file doesn't self-match its own patterns; add --exclude .claude so worktree copies aren't double-scanned. - pyproject: per-file-ignore S101 (assert) for tests/ and examples/. Validated locally: ruff/mypy/bandit clean, pytest 6/6 (100% cov), risky-exec regression 10/10 on fixtures with 0 comment false positives, rig self-scan clean, all YAML parses, audit.sh shellcheck-clean. Co-Authored-By: Claude Opus 4.8 * ci: run self-test on all PRs, not just those targeting main Stacked PRs (and this one) target a feature branch, so the branches:[main] filter would skip self-CI entirely. Co-Authored-By: Claude Opus 4.8 * fix(ci): add [dev] extra so lint.yml/test.yml install lint tooling lint.yml runs 'pip install -e .[dev]' but the rig defined its tools only under the [analysis] extra — so ruff/bandit/mypy were never installed and self-CI lint failed with 'No module named ruff'. The rig now provides the [dev] extra its own reusable workflows expect. Co-Authored-By: Claude Opus 4.8 --------- Co-authored-by: jp-cruz Co-authored-by: Claude Opus 4.8 --- .github/workflows/ci.yml | 52 ++++++++++++++++++++++ .github/workflows/supply-chain.yml | 2 +- pyproject.toml | 17 +++++++ scripts/audit.ps1 | 5 ++- scripts/audit.sh | 6 ++- semgrep/legionforge-risky-exec.yml | 13 ++++++ src/legionforge_dev_rig/fixtures/http.py | 10 +++-- tests/conftest.py | 5 +++ tests/test_fixtures_http.py | 56 ++++++++++++++++++++++++ 9 files changed, 158 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 tests/conftest.py create mode 100644 tests/test_fixtures_http.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..83251cb --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,52 @@ +# Self-test CI for the dev-rig. +# +# The rig's other workflows are all reusable (workflow_call) and were never +# exercised end-to-end. This workflow invokes each of them against the rig +# itself — using LOCAL refs (./.github/workflows/*.yml) so a PR validates its +# own versions of the workflows, not the ones already on main. It both proves +# the reusable workflows work and guards them against silent regressions. + +name: CI (self-test) + +on: + push: + branches: [main] + pull_request: # all PRs, regardless of base — so stacked branches self-test too + +jobs: + lint: + uses: ./.github/workflows/lint.yml + with: + source-dirs: "src/legionforge_dev_rig" + extra-mypy-deps: "types-PyYAML httpx respx pytest" + + test: + uses: ./.github/workflows/test.yml + with: + coverage-source: "legionforge_dev_rig" + coverage-threshold: 80 + + sast: + uses: ./.github/workflows/sast.yml + with: + source-dirs: "src/legionforge_dev_rig" + semgrep-configs: "p/python" + permissions: + security-events: write + + audit: + uses: ./.github/workflows/audit.yml + + supply-chain: + uses: ./.github/workflows/supply-chain.yml + with: + # Pull the risky-exec ruleset from THIS branch so self-CI tests the PR's + # rules, not whatever is on main. + rig-ref: ${{ github.head_ref || github.ref_name }} + secrets: inherit + + secrets: + uses: ./.github/workflows/secrets.yml + + sbom: + uses: ./.github/workflows/sbom.yml diff --git a/.github/workflows/supply-chain.yml b/.github/workflows/supply-chain.yml index 07722db..59dc378 100644 --- a/.github/workflows/supply-chain.yml +++ b/.github/workflows/supply-chain.yml @@ -74,7 +74,7 @@ jobs: run: | semgrep --config .dev-rig/semgrep/legionforge-risky-exec.yml \ --error --metrics=off \ - --exclude .dev-rig . + --exclude .dev-rig --exclude .claude --exclude node_modules --exclude semgrep . # ── Optional Socket.dev behavioral npm scan ────────────────────────────────── # Self-skips when no SOCKET_SECURITY_API_KEY secret is provided, so callers diff --git a/pyproject.toml b/pyproject.toml index 4fa14ef..003cd56 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,18 @@ analysis = [ "pre-commit>=3.7", "types-PyYAML", ] +# The `dev` extra is the contract the reusable lint.yml / test.yml workflows +# install (`pip install -e .[dev]`). Consuming projects provide their own; the +# rig defines one so it can self-test those workflows (see ci.yml). +dev = [ + "ruff>=0.4", + "bandit[toml]>=1.7", + "mypy>=1.10", + "pytest>=8", + "pytest-asyncio>=0.23", + "pytest-cov>=5", + "types-PyYAML", +] [tool.pytest.ini_options] asyncio_mode = "auto" @@ -43,6 +55,11 @@ ignore = [ "S607", # partial executable path — acceptable for system binaries ] +[tool.ruff.lint.per-file-ignores] +# Tests and examples legitimately use `assert` (S101) for verification. +"tests/**" = ["S101"] +"examples/**" = ["S101"] + [tool.bandit] exclude_dirs = [".venv", "venv", ".git", "tests"] skips = [ diff --git a/scripts/audit.ps1 b/scripts/audit.ps1 index 1b80923..4de4dae 100644 --- a/scripts/audit.ps1 +++ b/scripts/audit.ps1 @@ -146,7 +146,7 @@ if (Get-Command osv-scanner -ErrorAction SilentlyContinue) { # Runs only when the repo contains shell scripts. $shellFiles = Get-ChildItem -Path $ProjectPath -Recurse -File -Include *.sh, *.bash -ErrorAction SilentlyContinue | - Where-Object { $_.FullName -notmatch '[\\/](\.git|\.venv|node_modules)[\\/]' } + Where-Object { $_.FullName -notmatch '[\\/](\.git|\.venv|node_modules|\.claude)[\\/]' } if ($shellFiles) { if (Get-Command shellcheck -ErrorAction SilentlyContinue) { Invoke-Tool "shellcheck" { @@ -193,7 +193,8 @@ if (Test-Path $riskyRules) { -v "${ProjectPath}:/src" ` -v "${rigSemgrep}:/rules:ro" ` semgrep/semgrep ` - semgrep --config /rules/legionforge-risky-exec.yml /src --error + semgrep --config /rules/legionforge-risky-exec.yml /src --error ` + --exclude .claude --exclude .git --exclude node_modules --exclude semgrep } } diff --git a/scripts/audit.sh b/scripts/audit.sh index b062f63..b53eff1 100644 --- a/scripts/audit.sh +++ b/scripts/audit.sh @@ -113,7 +113,8 @@ fi shell_files=() while IFS= read -r f; do shell_files+=("$f"); done < <( find "$PROJECT" -type f \( -name '*.sh' -o -name '*.bash' \) \ - -not -path '*/.git/*' -not -path '*/.venv/*' -not -path '*/node_modules/*' 2>/dev/null + -not -path '*/.git/*' -not -path '*/.venv/*' -not -path '*/node_modules/*' \ + -not -path '*/.claude/*' 2>/dev/null ) if [[ ${#shell_files[@]} -gt 0 ]]; then if command -v shellcheck > /dev/null 2>&1; then @@ -156,7 +157,8 @@ if [[ -f "$RISKY_RULES" ]]; then -v "${WIN_PROJECT}:/src" \ -v "${WIN_RIG_SEMGREP}:/rules:ro" \ semgrep/semgrep \ - semgrep --config /rules/legionforge-risky-exec.yml /src --error + semgrep --config /rules/legionforge-risky-exec.yml /src --error \ + --exclude .claude --exclude .git --exclude node_modules --exclude semgrep fi # ── Summary ─────────────────────────────────────────────────────────────────── diff --git a/semgrep/legionforge-risky-exec.yml b/semgrep/legionforge-risky-exec.yml index 42b5abd..6b59686 100644 --- a/semgrep/legionforge-risky-exec.yml +++ b/semgrep/legionforge-risky-exec.yml @@ -7,6 +7,12 @@ # Scope: shell (.sh/.bash), PowerShell (.ps1/.psm1), and CI YAML, matched in # Semgrep "generic" mode so a single ruleset spans every file type. # +# Comment handling: rules use pattern-regex (raw text), which bypasses generic +# mode's comment tokenizer — so each rule ANDs a `pattern-not-regex` excluding +# full-line #-comments. This keeps documented usage examples (e.g. a +# "# curl ... | bash" install note) from tripping the rules; only executable +# lines are flagged. (# is the line-comment char in shell, PowerShell, and YAML.) +# # Run: # semgrep --config semgrep/legionforge-risky-exec.yml --error # Wired into scripts/audit.sh and .github/workflows/supply-chain.yml. @@ -25,6 +31,7 @@ rules: include: ["*.sh", "*.bash", "*.yml", "*.yaml", "*.zsh"] patterns: - pattern-regex: '(curl|wget)\b[^\n|]*\|[^\n]*\b(sh|bash|zsh|dash)\b' + - pattern-not-regex: '(?m)^\s*#.*' # ── PowerShell download-cradle ─────────────────────────────────────────────── - id: lf-powershell-download-cradle @@ -42,6 +49,7 @@ rules: - pattern-regex: '(?i)(New-Object\s+Net\.WebClient)[^\n]*\.DownloadString' - pattern-regex: '(?i)DownloadString\([^\n]*\)[^\n]*\|\s*(iex|Invoke-Expression)' - pattern-regex: '(?i)(iwr|Invoke-WebRequest|curl)\b[^\n|]*\|\s*(iex|Invoke-Expression)' + - pattern-not-regex: '(?m)^\s*#.*' # ── Decode-and-execute (obfuscation) ───────────────────────────────────────── - id: lf-decode-and-exec @@ -58,6 +66,7 @@ rules: - pattern-regex: '\bbase64\s+(-d|--decode)\b[^\n]*\|[^\n]*\b(sh|bash|python|perl|node)\b' - pattern-regex: '(?i)\[Convert\]::FromBase64String\([^\n]*\)[^\n]*(iex|Invoke-Expression)' - pattern-regex: '(?i)-(enc|encodedcommand)\b' + - pattern-not-regex: '(?m)^\s*#.*' # ── eval over fetched / command-substituted content ────────────────────────── - id: lf-eval-remote @@ -71,6 +80,7 @@ rules: include: ["*.sh", "*.bash", "*.zsh", "*.yml", "*.yaml"] patterns: - pattern-regex: '\beval\b[^\n]*\$\((curl|wget)\b' + - pattern-not-regex: '(?m)^\s*#.*' # ── TLS verification disabled on download ──────────────────────────────────── - id: lf-tls-verification-disabled @@ -88,6 +98,7 @@ rules: - pattern-regex: '\bcurl\b[^\n]*\s(-k|--insecure)\b' - pattern-regex: '\bwget\b[^\n]*--no-check-certificate\b' - pattern-regex: '(?i)ServerCertificateValidationCallback\s*=\s*\{?\s*\$?true' + - pattern-not-regex: '(?m)^\s*#.*' # ── Install straight from a VCS URL / git ref ──────────────────────────────── - id: lf-install-from-vcs-url @@ -105,6 +116,7 @@ rules: - pattern-regex: '\bpip(3)?\s+install\b[^\n]*\bgit\+' - pattern-regex: '\bnpm\s+(install|i)\b[^\n]*\b(git\+|https?://|github:)' - pattern-regex: '\bcargo\s+install\b[^\n]*--git\b' + - pattern-not-regex: '(?m)^\s*#.*' # ── npm install scripts re-enabled inline ──────────────────────────────────── - id: lf-npm-scripts-reenabled @@ -118,3 +130,4 @@ rules: include: ["*.sh", "*.bash", "*.yml", "*.yaml"] patterns: - pattern-regex: '\bnpm\b[^\n]*(--foreground-scripts|--ignore-scripts[=\s]+false)' + - pattern-not-regex: '(?m)^\s*#.*' diff --git a/src/legionforge_dev_rig/fixtures/http.py b/src/legionforge_dev_rig/fixtures/http.py index 1e2c988..a5cef48 100644 --- a/src/legionforge_dev_rig/fixtures/http.py +++ b/src/legionforge_dev_rig/fixtures/http.py @@ -1,5 +1,5 @@ """Shared httpx / respx fixtures for async HTTP provider testing.""" -from collections.abc import AsyncGenerator, Generator +from collections.abc import Generator from typing import Any import httpx @@ -26,9 +26,13 @@ def test_something(respx_mock_base_url): @pytest.fixture def mock_http_client(respx_mock_base_url: respx.MockRouter) -> httpx.AsyncClient: """ - An httpx.AsyncClient wired into the respx mock router. + An httpx.AsyncClient whose requests are intercepted by the respx mock. Pass this directly to provider constructors that accept a client parameter. + The respx_mock_base_url fixture activates respx.mock(), which patches httpx's + transport globally — so a plain AsyncClient is routed through the mock. (Do + NOT pass the router as `transport=`; a MockRouter is not an httpx transport.) + Usage: async def test_health(mock_http_client): respx_mock_base_url.get("http://127.0.0.1:11434/api/tags").mock( @@ -37,7 +41,7 @@ async def test_health(mock_http_client): provider = OllamaProvider(client=mock_http_client) assert await provider.health_check() is True """ - return httpx.AsyncClient(transport=respx_mock_base_url) + return httpx.AsyncClient() def json_response(data: dict[str, Any], status: int = 200) -> httpx.Response: diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..b227387 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,5 @@ +# Re-export the shared fixtures so pytest discovers them in the rig's own +# self-test suite (mirrors the consuming-project pattern in examples/conftest.py). +from legionforge_dev_rig.fixtures import mock_http_client, respx_mock_base_url + +__all__ = ["mock_http_client", "respx_mock_base_url"] diff --git a/tests/test_fixtures_http.py b/tests/test_fixtures_http.py new file mode 100644 index 0000000..94239a8 --- /dev/null +++ b/tests/test_fixtures_http.py @@ -0,0 +1,56 @@ +"""Tests for the shared httpx/respx fixtures shipped by the dev-rig. + +These also serve as the rig's own self-test target so test.yml is validated +end-to-end against the rig itself (see .github/workflows/ci.yml). +""" +import httpx +import pytest + +from legionforge_dev_rig.fixtures import http as fx + + +def test_json_response_defaults() -> None: + resp = fx.json_response({"ok": True}) + assert resp.status_code == 200 + assert resp.json() == {"ok": True} + + +def test_json_response_custom_status() -> None: + resp = fx.json_response({"created": 1}, status=201) + assert resp.status_code == 201 + assert resp.json() == {"created": 1} + + +def test_error_response_with_detail() -> None: + resp = fx.error_response(404, "missing") + assert resp.status_code == 404 + assert resp.json() == {"detail": "missing"} + + +def test_error_response_without_detail() -> None: + resp = fx.error_response(500) + assert resp.status_code == 500 + assert resp.json() == {} + + +def test_respx_fixture_stubs_route(respx_mock_base_url) -> None: + respx_mock_base_url.get("http://svc.local/ping").mock( + return_value=httpx.Response(200, json={"pong": True}) + ) + # respx.mock() is active via the fixture; a plain client is intercepted. + with httpx.Client() as client: + r = client.get("http://svc.local/ping") + assert r.json() == {"pong": True} + + +@pytest.mark.asyncio +async def test_mock_http_client_is_async_and_wired( + mock_http_client, respx_mock_base_url +) -> None: + respx_mock_base_url.get("http://svc.local/health").mock( + return_value=httpx.Response(200, json={"status": "ok"}) + ) + async with mock_http_client as client: + r = await client.get("http://svc.local/health") + assert r.status_code == 200 + assert r.json() == {"status": "ok"}