diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index affda2039dd0..2a5f413b10f2 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -50,3 +50,18 @@ jobs: steps: - uses: actions/checkout@v4 - run: semgrep --config=hack/semgrep-rules/detectors.yaml pkg/detectors/ + checksecretparts: + # Warning-only: reports detector packages that construct detectors.Result + # without populating SecretParts. Flip to hard-fail (drop continue-on-error + # and run with -fail) after every detector has been migrated. See + # hack/checksecretparts/README.md. + name: checksecretparts (warning) + runs-on: ubuntu-latest + continue-on-error: true + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: "1.25" + - name: Run checksecretparts + run: go run ./hack/checksecretparts ./pkg/detectors diff --git a/hack/checksecretparts/README.md b/hack/checksecretparts/README.md new file mode 100644 index 000000000000..4f8a92832855 --- /dev/null +++ b/hack/checksecretparts/README.md @@ -0,0 +1,59 @@ +# checksecretparts + +Static analysis check that finds detector packages which construct +`detectors.Result` values without populating the `SecretParts` field. + +## What it checks + +For each directory under `pkg/detectors/` (recursing into subpackages): + +1. Find every composite literal of the form `detectors.Result{...}` or + `&detectors.Result{...}` in non-test `.go` files. +2. If the package does not mention `SecretParts` anywhere (neither in the + literal nor in a later `x.SecretParts = ...` assignment), emit a warning + for each construction site. + +Test files (`_test.go`) are ignored on both sides — construction sites in +tests are not flagged, and `SecretParts` references in tests do not suppress +findings, because some tests zero the field for comparison. + +## Why warning-only + +This check ships as **warning-only** because ~907 existing detectors do not +yet populate `SecretParts` (see the SecretParts design doc, step C). +Hard-failing today would block every unrelated PR. The check is wired into +CI with `continue-on-error: true` so the findings are visible without +gating merges. + +## Running locally + +```sh +# Warning mode (default): prints findings, always exits 0 unless scanning fails. +go run ./hack/checksecretparts + +# Scan specific directories instead of ./pkg/detectors. +go run ./hack/checksecretparts ./pkg/detectors/aws ./pkg/detectors/github + +# Fail mode: exit 1 if any findings are reported. Use this once every detector +# has been migrated to populate SecretParts. +go run ./hack/checksecretparts -fail +``` + +## Flipping warning → fail + +Once step C is complete and every detector populates `SecretParts`, make +this check gating: + +1. In `.github/workflows/lint.yml`, drop `continue-on-error: true` from the + `checksecretparts` job and change the run step to pass `-fail`. +2. Land any remaining migrations in the same PR as the flip. + +## Scope limits + +- It is a syntactic check. It matches `detectors.Result` by selector-expr + name; packages that rename the import (`d "...detectors"`) would not be + caught. No such rename exists in the current codebase. +- It does not verify that `SecretParts` is populated on every code path — + only that the package touches the field at all. A finer-grained + dataflow check is deliberately out of scope; the rough check is enough + to surface unmigrated detectors. diff --git a/hack/checksecretparts/check.go b/hack/checksecretparts/check.go new file mode 100644 index 000000000000..c1995ba6ace5 --- /dev/null +++ b/hack/checksecretparts/check.go @@ -0,0 +1,184 @@ +package main + +import ( + "fmt" + "go/ast" + "go/parser" + "go/token" + "os" + "path/filepath" + "sort" + "strings" +) + +// Finding describes a single detectors.Result{} construction in a package that +// never references the SecretParts field. +type Finding struct { + // Position is the source location of the offending composite literal. + Position token.Position + // Package is the directory containing the finding. + Package string +} + +// CheckPackageDir runs the SecretParts check on a single directory. It returns +// one Finding per detectors.Result{} construction site in the directory, +// filtered so that packages which mention SecretParts anywhere produce no +// findings. Test files (_test.go) are ignored on both sides: construction +// sites in them are not reported, and references in them do not suppress +// findings (test files commonly zero the field for comparison — see +// pkg/detectors/gitlab/v1/gitlab_integration_test.go). +func CheckPackageDir(dir string) ([]Finding, error) { + fset := token.NewFileSet() + entries, err := os.ReadDir(dir) + if err != nil { + return nil, err + } + + var files []*ast.File + for _, e := range entries { + if e.IsDir() { + continue + } + name := e.Name() + if !strings.HasSuffix(name, ".go") || strings.HasSuffix(name, "_test.go") { + continue + } + path := filepath.Join(dir, name) + f, err := parser.ParseFile(fset, path, nil, parser.SkipObjectResolution) + if err != nil { + return nil, fmt.Errorf("parse %s: %w", path, err) + } + files = append(files, f) + } + if len(files) == 0 { + return nil, nil + } + + return checkFiles(fset, dir, files), nil +} + +// checkFiles inspects a set of parsed, non-test files from a single directory +// and returns findings. It is separated from CheckPackageDir so that tests can +// drive it with synthetic ASTs. +func checkFiles(fset *token.FileSet, dir string, files []*ast.File) []Finding { + var ( + constructions []token.Position + hasSecretParts bool + ) + + for _, f := range files { + if fileReferencesSecretParts(f) { + hasSecretParts = true + } + constructions = append(constructions, findResultConstructions(fset, f)...) + } + + if hasSecretParts || len(constructions) == 0 { + return nil + } + + sort.Slice(constructions, func(i, j int) bool { + if constructions[i].Filename != constructions[j].Filename { + return constructions[i].Filename < constructions[j].Filename + } + return constructions[i].Offset < constructions[j].Offset + }) + + out := make([]Finding, len(constructions)) + for i, pos := range constructions { + out[i] = Finding{Position: pos, Package: dir} + } + return out +} + +// findResultConstructions returns positions of composite literals with the +// type detectors.Result (selector expression with identifier "detectors" and +// selector "Result"). It covers both bare literals and pointer literals +// (&detectors.Result{}). +func findResultConstructions(fset *token.FileSet, f *ast.File) []token.Position { + var positions []token.Position + ast.Inspect(f, func(n ast.Node) bool { + lit, ok := n.(*ast.CompositeLit) + if !ok { + return true + } + if !isDetectorsResultType(lit.Type) { + return true + } + if hasSecretPartsKey(lit) { + return true + } + positions = append(positions, fset.Position(lit.Lbrace)) + return true + }) + return positions +} + +func isDetectorsResultType(expr ast.Expr) bool { + // Unwrap *detectors.Result which won't appear for composite literal Type, + // but handle parenthesized forms defensively. + for { + switch e := expr.(type) { + case *ast.ParenExpr: + expr = e.X + continue + case *ast.StarExpr: + expr = e.X + continue + } + break + } + sel, ok := expr.(*ast.SelectorExpr) + if !ok { + return false + } + pkg, ok := sel.X.(*ast.Ident) + if !ok { + return false + } + return pkg.Name == "detectors" && sel.Sel.Name == "Result" +} + +func hasSecretPartsKey(lit *ast.CompositeLit) bool { + for _, elt := range lit.Elts { + kv, ok := elt.(*ast.KeyValueExpr) + if !ok { + continue + } + key, ok := kv.Key.(*ast.Ident) + if !ok { + continue + } + if key.Name == "SecretParts" { + return true + } + } + return false +} + +// fileReferencesSecretParts returns true if the file mentions the identifier +// "SecretParts" in any form: a composite-literal key, a selector expression +// (x.SecretParts), or a bare identifier. The rationale is that if a detector +// package touches SecretParts at all — whether on the construction site or in +// a later assignment — it has been migrated; the check's job is to find +// packages that never touch it. +func fileReferencesSecretParts(f *ast.File) bool { + found := false + ast.Inspect(f, func(n ast.Node) bool { + if found { + return false + } + switch x := n.(type) { + case *ast.Ident: + if x.Name == "SecretParts" { + found = true + } + case *ast.SelectorExpr: + if x.Sel != nil && x.Sel.Name == "SecretParts" { + found = true + } + } + return !found + }) + return found +} diff --git a/hack/checksecretparts/check_test.go b/hack/checksecretparts/check_test.go new file mode 100644 index 000000000000..7d390c4a638f --- /dev/null +++ b/hack/checksecretparts/check_test.go @@ -0,0 +1,213 @@ +package main + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestCheckPackageDir(t *testing.T) { + tests := []struct { + name string + files map[string]string + wantLen int + }{ + { + name: "missing SecretParts is reported", + files: map[string]string{ + "det.go": `package det + +import "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + +func FromData() detectors.Result { + return detectors.Result{ + DetectorType: 1, + Raw: []byte("x"), + } +} +`, + }, + wantLen: 1, + }, + { + name: "SecretParts set in composite literal is accepted", + files: map[string]string{ + "det.go": `package det + +import "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + +func FromData() detectors.Result { + return detectors.Result{ + DetectorType: 1, + Raw: []byte("x"), + SecretParts: map[string]string{"key": "v"}, + } +} +`, + }, + wantLen: 0, + }, + { + name: "SecretParts assigned later is accepted", + files: map[string]string{ + "det.go": `package det + +import "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + +func FromData() detectors.Result { + r := detectors.Result{ + DetectorType: 1, + Raw: []byte("x"), + } + r.SecretParts = map[string]string{"key": "v"} + return r +} +`, + }, + wantLen: 0, + }, + { + name: "no detectors.Result construction is a no-op", + files: map[string]string{ + "util.go": `package det + +func Helper() string { return "" } +`, + }, + wantLen: 0, + }, + { + name: "SecretParts only mentioned in test file does NOT suppress warning", + files: map[string]string{ + "det.go": `package det + +import "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + +func FromData() detectors.Result { + return detectors.Result{ + DetectorType: 1, + Raw: []byte("x"), + } +} +`, + "det_test.go": `package det + +import ( + "testing" + + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" +) + +func TestZeroSecretParts(t *testing.T) { + r := detectors.Result{} + r.SecretParts = nil + _ = r +} +`, + }, + wantLen: 1, + }, + { + name: "pointer composite literal is detected", + files: map[string]string{ + "det.go": `package det + +import "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + +func FromData() *detectors.Result { + return &detectors.Result{ + DetectorType: 1, + Raw: []byte("x"), + } +} +`, + }, + wantLen: 1, + }, + { + name: "multiple construction sites reported individually", + files: map[string]string{ + "det.go": `package det + +import "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + +func One() detectors.Result { return detectors.Result{Raw: []byte("a")} } +func Two() detectors.Result { return detectors.Result{Raw: []byte("b")} } +`, + }, + wantLen: 2, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + dir := t.TempDir() + for name, contents := range tc.files { + if err := os.WriteFile(filepath.Join(dir, name), []byte(contents), 0o600); err != nil { + t.Fatal(err) + } + } + got, err := CheckPackageDir(dir) + if err != nil { + t.Fatalf("CheckPackageDir: %v", err) + } + if len(got) != tc.wantLen { + t.Fatalf("got %d findings, want %d: %+v", len(got), tc.wantLen, got) + } + for _, f := range got { + if !strings.HasSuffix(f.Position.Filename, ".go") { + t.Errorf("finding has non-Go filename: %s", f.Position.Filename) + } + if f.Position.Line == 0 { + t.Errorf("finding has zero line number: %+v", f) + } + } + }) + } +} + +func TestCollectPackageDirs(t *testing.T) { + root := t.TempDir() + mustWrite := func(rel, contents string) { + path := filepath.Join(root, rel) + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(path, []byte(contents), 0o600); err != nil { + t.Fatal(err) + } + } + mustWrite("a/a.go", "package a\n") + mustWrite("a/b/b.go", "package b\n") + mustWrite("a/b/b_test.go", "package b\n") // must not cause b to be seen twice + mustWrite("a/testdata/skipme.go", "package skipme\n") + mustWrite("a/vendor/v.go", "package v\n") + mustWrite("a/empty/.keep", "") + + dirs, err := collectPackageDirs([]string{root + "/..."}) + if err != nil { + t.Fatal(err) + } + got := map[string]bool{} + for _, d := range dirs { + // Trim to relative for stable comparison. + rel, err := filepath.Rel(root, d) + if err != nil { + t.Fatal(err) + } + got[rel] = true + } + wantIn := []string{"a", filepath.Join("a", "b")} + for _, w := range wantIn { + if !got[w] { + t.Errorf("missing expected dir %q; got %v", w, got) + } + } + skip := []string{filepath.Join("a", "testdata"), filepath.Join("a", "vendor"), filepath.Join("a", "empty")} + for _, s := range skip { + if got[s] { + t.Errorf("unexpected dir %q in results", s) + } + } +} diff --git a/hack/checksecretparts/main.go b/hack/checksecretparts/main.go new file mode 100644 index 000000000000..209decce9973 --- /dev/null +++ b/hack/checksecretparts/main.go @@ -0,0 +1,146 @@ +// checksecretparts is a static analysis tool that finds detector packages +// which construct detectors.Result values without populating the SecretParts +// field. +// +// It runs as WARNING by default (exit code 0 even when findings exist). Pass +// -fail to exit non-zero on findings; this is intended for use after every +// detector has been migrated to populate SecretParts (see the SecretParts +// design doc, step C). +// +// Usage: +// +// go run ./hack/checksecretparts [dir ...] +// go run ./hack/checksecretparts -fail ./pkg/detectors/... +// +// With no arguments, it scans ./pkg/detectors. The "/..." suffix is accepted +// for parity with go list but is stripped — the tool always recurses. +package main + +import ( + "flag" + "fmt" + "io/fs" + "os" + "path/filepath" + "strings" +) + +func main() { + var ( + failOnFindings bool + quiet bool + ) + flag.BoolVar(&failOnFindings, "fail", false, "exit 1 if any findings are reported (default: warning-only)") + flag.BoolVar(&quiet, "quiet", false, "suppress the summary line when no findings are reported") + flag.Usage = func() { + fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [flags] [dir ...]\n", os.Args[0]) + fmt.Fprintln(flag.CommandLine.Output(), "\nFinds detector packages that construct detectors.Result without setting SecretParts.") + fmt.Fprintln(flag.CommandLine.Output(), "\nFlags:") + flag.PrintDefaults() + } + flag.Parse() + + roots := flag.Args() + if len(roots) == 0 { + roots = []string{"./pkg/detectors"} + } + + pkgDirs, err := collectPackageDirs(roots) + if err != nil { + fmt.Fprintln(os.Stderr, "checksecretparts:", err) + os.Exit(2) + } + + var findings []Finding + for _, dir := range pkgDirs { + f, err := CheckPackageDir(dir) + if err != nil { + fmt.Fprintf(os.Stderr, "checksecretparts: %s: %v\n", dir, err) + os.Exit(2) + } + findings = append(findings, f...) + } + + for _, f := range findings { + fmt.Printf("%s: warning: detectors.Result constructed without SecretParts (no reference to SecretParts anywhere in package)\n", f.Position) + } + + if len(findings) > 0 { + pkgs := map[string]struct{}{} + for _, f := range findings { + pkgs[f.Package] = struct{}{} + } + fmt.Fprintf(os.Stderr, "checksecretparts: %d finding(s) across %d package(s) constructing detectors.Result without SecretParts\n", len(findings), len(pkgs)) + if failOnFindings { + os.Exit(1) + } + return + } + if !quiet { + fmt.Fprintf(os.Stderr, "checksecretparts: scanned %d package(s), no findings\n", len(pkgDirs)) + } +} + +// collectPackageDirs expands the caller-supplied roots into a sorted, +// deduplicated list of directories containing at least one non-test .go file. +func collectPackageDirs(roots []string) ([]string, error) { + seen := map[string]struct{}{} + var dirs []string + for _, r := range roots { + r = strings.TrimSuffix(r, "/...") + r = strings.TrimSuffix(r, "/") + info, err := os.Stat(r) + if err != nil { + return nil, fmt.Errorf("stat %s: %w", r, err) + } + if !info.IsDir() { + return nil, fmt.Errorf("%s is not a directory", r) + } + err = filepath.WalkDir(r, func(path string, d fs.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + if !d.IsDir() { + return nil + } + name := d.Name() + if path != r && (name == "testdata" || name == "vendor" || strings.HasPrefix(name, ".")) { + return fs.SkipDir + } + hasGo, err := dirHasNonTestGoFile(path) + if err != nil { + return err + } + if !hasGo { + return nil + } + if _, ok := seen[path]; ok { + return nil + } + seen[path] = struct{}{} + dirs = append(dirs, path) + return nil + }) + if err != nil { + return nil, err + } + } + return dirs, nil +} + +func dirHasNonTestGoFile(dir string) (bool, error) { + entries, err := os.ReadDir(dir) + if err != nil { + return false, err + } + for _, e := range entries { + if e.IsDir() { + continue + } + name := e.Name() + if strings.HasSuffix(name, ".go") && !strings.HasSuffix(name, "_test.go") { + return true, nil + } + } + return false, nil +}