diff --git a/docs/man/trufflehog.1 b/docs/man/trufflehog.1 index 666b21e51a0d..a85c40d1ecef 100644 --- a/docs/man/trufflehog.1 +++ b/docs/man/trufflehog.1 @@ -395,6 +395,9 @@ Docker namespace (organization or user). For non-Docker Hub registries, include .TP \fB--registry-token=REGISTRY-TOKEN\fR Optional Docker registry access token. Provide this if you want to include private images within the specified namespace. +.TP +\fB--registry=REGISTRY\fR +Scan all images in a registry host. Supports OCI Distribution Spec compliant registries (Harbor, Nexus, Artifactory, etc.). Use --registry-token for authentication. .SS \fBtravisci --token=TOKEN\fR Scan TravisCI diff --git a/main.go b/main.go index a73df6c0bc73..72116eef894f 100644 --- a/main.go +++ b/main.go @@ -202,6 +202,7 @@ var ( dockerExcludePaths = dockerScan.Flag("exclude-paths", "Comma separated list of paths to exclude from scan").String() dockerScanNamespace = dockerScan.Flag("namespace", "Docker namespace (organization or user). For non-Docker Hub registries, include the registry address as well (e.g., ghcr.io/namespace or quay.io/namespace).").String() dockerScanRegistryToken = dockerScan.Flag("registry-token", "Optional Docker registry access token. Provide this if you want to include private images within the specified namespace.").String() + dockerScanRegistry = dockerScan.Flag("registry", "Scan all images in a registry host. Supports OCI Distribution Spec compliant registries (Harbor, Nexus, Artifactory, etc.). Use --registry-token for authentication.").String() travisCiScan = cli.Command("travisci", "Scan TravisCI") travisCiScanToken = travisCiScan.Flag("token", "TravisCI token. Can also be provided with environment variable").Envar("TRAVISCI_TOKEN").Required().String() @@ -1014,21 +1015,38 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics, return scanMetrics, fmt.Errorf("invalid config: you cannot specify both images and namespace at the same time") } - if *dockerScanImages == nil && *dockerScanNamespace == "" { - return scanMetrics, fmt.Errorf("invalid config: both images and namespace cannot be empty; one is required") + if *dockerScanImages == nil && *dockerScanNamespace == "" && *dockerScanRegistry == "" { + return scanMetrics, fmt.Errorf("invalid config: one of --image, --namespace, or --registry is required") } - if *dockerScanRegistryToken != "" && *dockerScanNamespace == "" { - return scanMetrics, fmt.Errorf("invalid config: registry token can only be used with registry namespace") + if *dockerScanRegistry != "" && (*dockerScanImages != nil || *dockerScanNamespace != "") { + return scanMetrics, fmt.Errorf("invalid config: --registry cannot be combined with --image or --namespace") + } + + if *dockerScanRegistry != "" && isPublicRegistry(*dockerScanRegistry) { + return scanMetrics, fmt.Errorf("invalid config: --registry is for private registries only. Use --namespace for public registries (hub.docker.com, quay.io, ghcr.io)") + } + + if *dockerScanRegistryToken != "" && *dockerScanNamespace == "" && *dockerScanRegistry == "" { + return scanMetrics, fmt.Errorf("invalid config: --registry-token requires --namespace or --registry") + } + + // Sanitize registry host to remove protocol prefixes and paths + if *dockerScanRegistry != "" { + *dockerScanRegistry = sanitizeRegistryHost(*dockerScanRegistry) + if *dockerScanRegistry == "" { + return scanMetrics, fmt.Errorf("invalid config: --registry value is empty after removing protocol/path (e.g., 'https://' or ' ')") + } } cfg := sources.DockerConfig{ BearerToken: *dockerScanToken, Images: *dockerScanImages, - UseDockerKeychain: *dockerScanToken == "", + UseDockerKeychain: *dockerScanToken == "" && *dockerScanRegistry == "", ExcludePaths: strings.Split(*dockerExcludePaths, ","), Namespace: *dockerScanNamespace, RegistryToken: *dockerScanRegistryToken, + Registry: *dockerScanRegistry, } if ref, err := eng.ScanDocker(ctx, cfg); err != nil { return scanMetrics, fmt.Errorf("failed to scan Docker: %v", err) @@ -1287,6 +1305,72 @@ func validateClonePath(clonePath string, noCleanup bool) error { return nil } +// normalizeRegistryHost removes protocol prefixes and paths from a registry host string. +// This is a shared helper used by both isPublicRegistry and sanitizeRegistryHost. +// Returns the normalized hostname and a boolean indicating if it's empty after normalization. +func normalizeRegistryHost(host string) (string, bool) { + host = strings.TrimSpace(host) + + // Remove protocol prefixes (case-insensitive) + lowerHost := strings.ToLower(host) + if strings.HasPrefix(lowerHost, "https://") { + host = host[8:] // len("https://") = 8 + } else if strings.HasPrefix(lowerHost, "http://") { + host = host[7:] // len("http://") = 7 + } + + // Remove trailing slashes and paths + if idx := strings.Index(host, "/"); idx != -1 { + host = host[:idx] + } + + host = strings.TrimSpace(host) + return host, host == "" +} + +// isPublicRegistry checks if the given registry host is a known public registry. +// Public registries (DockerHub, Quay, GHCR) should use --namespace flag instead of --registry +// because they have dedicated implementations with custom APIs. +func isPublicRegistry(host string) bool { + host, empty := normalizeRegistryHost(host) + if empty { + return false + } + + host = strings.ToLower(host) + + // Check against known public registries + publicRegistries := []string{ + "hub.docker.com", + "docker.io", + "registry-1.docker.io", + "index.docker.io", + "registry.hub.docker.com", + "quay.io", + "ghcr.io", + } + + for _, registry := range publicRegistries { + if host == registry { + return true + } + } + + return false +} + +// sanitizeRegistryHost removes protocol prefixes and paths from registry host. +// This ensures clean hostnames are passed to the registry implementation. +// Examples: +// - "https://harbor.corp.io" -> "harbor.corp.io" +// - "HTTPS://harbor.corp.io" -> "harbor.corp.io" +// - "http://localhost:5000/path" -> "localhost:5000" +// - "registry.example.com" -> "registry.example.com" +func sanitizeRegistryHost(host string) string { + normalized, _ := normalizeRegistryHost(host) + return normalized +} + // isPreCommitHook detects if trufflehog is running as a pre-commit hook func isPreCommitHook() bool { // Pre-commit.com framework detection diff --git a/pkg/engine/docker.go b/pkg/engine/docker.go index c269d056467b..ea259df88520 100644 --- a/pkg/engine/docker.go +++ b/pkg/engine/docker.go @@ -19,6 +19,7 @@ func (e *Engine) ScanDocker(ctx context.Context, c sources.DockerConfig) (source ExcludePaths: c.ExcludePaths, Namespace: c.Namespace, RegistryToken: c.RegistryToken, + Registry: c.Registry, } switch { diff --git a/pkg/pb/sourcespb/sources.pb.go b/pkg/pb/sourcespb/sources.pb.go index 37424d7eee7c..6296c332712a 100644 --- a/pkg/pb/sourcespb/sources.pb.go +++ b/pkg/pb/sourcespb/sources.pb.go @@ -1182,6 +1182,7 @@ type Docker struct { ExcludePaths []string `protobuf:"bytes,6,rep,name=exclude_paths,json=excludePaths,proto3" json:"exclude_paths,omitempty"` Namespace string `protobuf:"bytes,7,opt,name=namespace,proto3" json:"namespace,omitempty"` RegistryToken string `protobuf:"bytes,8,opt,name=registry_token,json=registryToken,proto3" json:"registry_token,omitempty"` + Registry string `protobuf:"bytes,9,opt,name=registry,proto3" json:"registry,omitempty"` } func (x *Docker) Reset() { @@ -1279,6 +1280,13 @@ func (x *Docker) GetRegistryToken() string { return "" } +func (x *Docker) GetRegistry() string { + if x != nil { + return x.Registry + } + return "" +} + type isDocker_Credential interface { isDocker_Credential() } diff --git a/pkg/sources/docker/docker.go b/pkg/sources/docker/docker.go index 135bb10c40a1..c67e30862330 100644 --- a/pkg/sources/docker/docker.go +++ b/pkg/sources/docker/docker.go @@ -140,6 +140,21 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk, _ . s.conn.Images = append(s.conn.Images, namespaceImages...) } + // if a registry host is set, enumerate all images from that registry via /v2/_catalog. + if registryHost := s.conn.GetRegistry(); registryHost != "" { + start := time.Now() + registry := MakeRegistryFromHost(registryHost) + if token := s.conn.GetRegistryToken(); token != "" { + registry.WithRegistryToken(token) + } + registryImages, err := registry.ListImages(ctx, "") + if err != nil { + return fmt.Errorf("failed to list registry %s images: %w", registryHost, err) + } + dockerListImagesAPIDuration.WithLabelValues(s.name).Observe(time.Since(start).Seconds()) + s.conn.Images = append(s.conn.Images, registryImages...) + } + for _, image := range s.conn.GetImages() { if common.IsDone(ctx) { return nil diff --git a/pkg/sources/docker/registries.go b/pkg/sources/docker/registries.go index b4402c50835b..a0ee6463c57b 100644 --- a/pkg/sources/docker/registries.go +++ b/pkg/sources/docker/registries.go @@ -413,3 +413,129 @@ func discardBody(resp *http.Response) { _ = resp.Body.Close() } } + +// === Generic OCI Registry === + +// GenericOCIRegistry implements the Registry interface for any OCI Distribution Spec +// compliant registry (Harbor, Nexus, Artifactory, etc.) using the /v2/_catalog endpoint. +type GenericOCIRegistry struct { + Host string + Token string + Client *http.Client + scheme string // defaults to "https"; overridable for testing +} + +// catalogResp models the JSON response from the /v2/_catalog endpoint. +type catalogResp struct { + Repositories []string `json:"repositories"` +} + +func (g *GenericOCIRegistry) Name() string { + return g.Host +} + +func (g *GenericOCIRegistry) WithRegistryToken(token string) { + g.Token = token +} + +func (g *GenericOCIRegistry) WithClient(client *http.Client) { + g.Client = client +} + +// ListImages enumerates all repositories from an OCI Distribution Spec compliant registry +// using the /v2/_catalog endpoint. The namespace parameter is unused. +// Pagination is handled via the Link response header. +func (g *GenericOCIRegistry) ListImages(ctx context.Context, _ string) ([]string, error) { + scheme := g.scheme + if scheme == "" { + scheme = "https" + } + + baseURL := &url.URL{ + Scheme: scheme, + Host: g.Host, + Path: "v2/_catalog", + } + + query := baseURL.Query() + query.Set("n", fmt.Sprint(maxRegistryPageSize)) + baseURL.RawQuery = query.Encode() + + allImages := []string{} + nextURL := baseURL.String() + + for nextURL != "" { + if err := registryRateLimiter.Wait(ctx); err != nil { + return nil, err + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, nextURL, http.NoBody) + if err != nil { + return nil, err + } + + if g.Token != "" { + req.Header.Set("Authorization", "Bearer "+g.Token) + } + + client := g.Client + if client == nil { + client = defaultHTTPClient + } + resp, err := client.Do(req) + if err != nil { + return nil, err + } + + body, err := io.ReadAll(resp.Body) + discardBody(resp) + if err != nil { + return nil, err + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("failed to list registry images: unexpected status code: %d", resp.StatusCode) + } + + var page catalogResp + if err := json.Unmarshal(body, &page); err != nil { + return nil, err + } + + for _, repo := range page.Repositories { + allImages = append(allImages, fmt.Sprintf("%s/%s", g.Host, repo)) + } + + linkHeader := resp.Header.Get("Link") + if linkHeader != "" { + var err error + nextURL, err = resolveNextURL(baseURL, linkHeader) + if err != nil { + return nil, fmt.Errorf("pagination failed: %w", err) + } + } else { + nextURL = "" + } + } + + return allImages, nil +} + +func resolveNextURL(baseURL *url.URL, linkHeader string) (string, error) { + nextLink := parseNextLinkURL(linkHeader) + if nextLink == "" { + return "", nil + } + + parsedNext, err := url.Parse(nextLink) + if err != nil { + return "", fmt.Errorf("failed to parse next link URL %q: %w", nextLink, err) + } + + return baseURL.ResolveReference(parsedNext).String(), nil +} + +// MakeRegistryFromHost returns a GenericOCIRegistry for the given registry host. +func MakeRegistryFromHost(host string) Registry { + return &GenericOCIRegistry{Host: host} +} diff --git a/pkg/sources/docker/registries_test.go b/pkg/sources/docker/registries_test.go index f6f3c04047dc..ed14c789dff7 100644 --- a/pkg/sources/docker/registries_test.go +++ b/pkg/sources/docker/registries_test.go @@ -1,8 +1,10 @@ package docker import ( + "encoding/json" "fmt" "net/http" + "net/http/httptest" "slices" "testing" @@ -100,3 +102,139 @@ func TestGHCRListImages_RateLimitError(t *testing.T) { assert.Error(t, err) assert.Nil(t, ghcrImages) } + +func TestGenericOCIRegistryListImages(t *testing.T) { + t.Parallel() + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/v2/_catalog", r.URL.Path) + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(catalogResp{Repositories: []string{"myapp", "mydb"}}) + })) + defer srv.Close() + + reg := &GenericOCIRegistry{Host: srv.Listener.Addr().String()} + reg.WithClient(srv.Client()) + + // Override scheme to http for the test server. + reg.scheme = "http" + + images, err := reg.ListImages(context.Background(), "") + assert.NoError(t, err) + + expected := []string{ + srv.Listener.Addr().String() + "/myapp", + srv.Listener.Addr().String() + "/mydb", + } + slices.Sort(images) + slices.Sort(expected) + assert.Equal(t, expected, images) +} + +func TestGenericOCIRegistryListImages_Pagination(t *testing.T) { + t.Parallel() + + page := 0 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + if page == 0 { + w.Header().Set("Link", `; rel="next"`) + _ = json.NewEncoder(w).Encode(catalogResp{Repositories: []string{"repo1", "repo2"}}) + page++ + } else { + _ = json.NewEncoder(w).Encode(catalogResp{Repositories: []string{"repo3"}}) + } + })) + defer srv.Close() + + reg := &GenericOCIRegistry{Host: srv.Listener.Addr().String(), scheme: "http"} + reg.WithClient(srv.Client()) + + images, err := reg.ListImages(context.Background(), "") + assert.NoError(t, err) + assert.Len(t, images, 3) +} + +func TestGenericOCIRegistryListImages_PaginationAbsoluteURL(t *testing.T) { + t.Parallel() + + page := 0 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + if page == 0 { + w.Header().Set("Link", fmt.Sprintf(`<%s/v2/_catalog?n=2&last=repo2>; rel="next"`, "http://"+r.Host)) + _ = json.NewEncoder(w).Encode(catalogResp{Repositories: []string{"repo1", "repo2"}}) + page++ + } else { + _ = json.NewEncoder(w).Encode(catalogResp{Repositories: []string{"repo3"}}) + } + })) + defer srv.Close() + + reg := &GenericOCIRegistry{Host: srv.Listener.Addr().String(), scheme: "http"} + reg.WithClient(srv.Client()) + + images, err := reg.ListImages(context.Background(), "") + assert.NoError(t, err) + assert.Len(t, images, 3) +} + +func TestGenericOCIRegistryListImages_AuthHeader(t *testing.T) { + t.Parallel() + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "Bearer mytoken", r.Header.Get("Authorization")) + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(catalogResp{Repositories: []string{"secured-app"}}) + })) + defer srv.Close() + + reg := &GenericOCIRegistry{Host: srv.Listener.Addr().String(), scheme: "http"} + reg.WithClient(srv.Client()) + reg.WithRegistryToken("mytoken") + + images, err := reg.ListImages(context.Background(), "") + assert.NoError(t, err) + assert.Equal(t, []string{srv.Listener.Addr().String() + "/secured-app"}, images) +} + +func TestGenericOCIRegistryListImages_ErrorStatus(t *testing.T) { + t.Parallel() + + reg := &GenericOCIRegistry{Host: "127.0.0.1:9"} + reg.WithClient(common.ConstantResponseHttpClient(http.StatusUnauthorized, "{}")) + reg.scheme = "http" + + images, err := reg.ListImages(context.Background(), "") + assert.Error(t, err) + assert.Nil(t, images) +} + +func TestGenericOCIRegistryListImages_MalformedLinkHeader(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.Header().Set("Link", `; rel="next"`) + w.WriteHeader(http.StatusOK) + w.Write([]byte(`{"repositories":["repo1","repo2"]}`)) + })) + defer server.Close() + + reg := &GenericOCIRegistry{Host: server.URL[7:]} + reg.scheme = "http" + + images, err := reg.ListImages(context.Background(), "") + assert.Error(t, err) + assert.Contains(t, err.Error(), "pagination failed") + assert.Nil(t, images) +} + +func TestMakeRegistryFromHost(t *testing.T) { + t.Parallel() + + reg := MakeRegistryFromHost("registry.example.com") + assert.Equal(t, "registry.example.com", reg.Name()) + _, ok := reg.(*GenericOCIRegistry) + assert.True(t, ok) +} diff --git a/pkg/sources/sources.go b/pkg/sources/sources.go index 88b11b85a5ea..c44c67b4d114 100644 --- a/pkg/sources/sources.go +++ b/pkg/sources/sources.go @@ -233,6 +233,8 @@ type DockerConfig struct { Namespace string // RegistryToken is an optional authentication token used to access private images within the namespace. RegistryToken string + // Registry is the full registry host to enumerate all images from (e.g., registry.example.com). + Registry string } // GCSConfig defines the optional configuration for a GCS source. diff --git a/proto/sources.proto b/proto/sources.proto index cf64caca8687..40a1b8edf7df 100644 --- a/proto/sources.proto +++ b/proto/sources.proto @@ -168,6 +168,7 @@ message Docker { repeated string exclude_paths = 6; string namespace = 7; string registry_token = 8; + string registry = 9; } message ECR {