From c24e0dffe9b9ae7857079649726674b0fc6be116 Mon Sep 17 00:00:00 2001
From: Shreyas Goenka <shreyas.goenka@databricks.com>
Date: Sun, 3 May 2026 15:45:32 +0000
Subject: [PATCH 1/8] sync: add remote-SHA filter as a third layer of the
 upload pipeline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sync runs in three layers:

  1. Discovery (libs/git, libs/fileset): walk the local tree.

  2. Snapshot diff (libs/sync/snapshot.go, diff.go): compare against the
     local mtime snapshot to produce an action plan (puts/deletes/etc).

  3. Remote filter (new, libs/sync/remote_filter.go): pre-flight that
     bulk-fetches content SHAs from the workspace and drops puts whose
     remote SHA already matches the local SHA.

Layer 3 only runs when the snapshot is fresh (no prior local state) — the
case where Layer 2 produces false-positive puts at scale, e.g. on a CI
runner that has just cloned the repo. With an existing snapshot, Layer 2
is precise; paying for a bulk remote list would be wasted work.

The remote SHA list uses /api/2.0/workspace/list-repo with the
return_wsfs_metadata=true flag, exposed via a new RemoteFileMetadata type
and ListWithSHAs method on WorkspaceFilesClient. Errors and missing remote
state degrade gracefully: the filter returns the unmodified diff and the
worst case is the existing behavior (re-upload).

Verified end-to-end against bundle-dev: a sync that deletes the local
snapshot and re-runs produces zero uploads when contents are unchanged,
and uploads only the edited file when one file changes. Notebooks (.py
with the magic header and .ipynb) preserve raw uploaded bytes server-side,
so their SHAs match local SHAs verbatim — no notebook-specific carve-out
needed.

Snapshot schema is unchanged; this is purely additive.

Co-authored-by: Isaac
---
 libs/filer/workspace_files_client.go |  66 +++++++++++
 libs/fileset/file.go                 |   6 +
 libs/sync/remote_filter.go           | 133 +++++++++++++++++++++
 libs/sync/remote_filter_test.go      | 169 +++++++++++++++++++++++++++
 libs/sync/sync.go                    |  46 +++++++-
 5 files changed, 416 insertions(+), 4 deletions(-)
 create mode 100644 libs/sync/remote_filter.go
 create mode 100644 libs/sync/remote_filter_test.go

diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go
index c6c62816bbc..99b41a02a47 100644
--- a/libs/filer/workspace_files_client.go
+++ b/libs/filer/workspace_files_client.go
@@ -338,6 +338,72 @@ func (w *WorkspaceFilesClient) Mkdir(ctx context.Context, name string) error {
 	})
 }
 
+// RemoteFileMetadata describes a single workspace object returned by the
+// list-repo API with return_wsfs_metadata=true.
+type RemoteFileMetadata struct {
+	// Absolute workspace path. Note that for notebooks, the extension is
+	// stripped (e.g. /Workspace/foo/bar.py -> /Workspace/foo/bar).
+	Path string
+
+	// SHA-256 hex digest of the blob. Populated only for FILE and NOTEBOOK
+	// objects (not directories) when the workspace returns wsfs metadata.
+	ContentSHA256Hex string
+
+	// "FILE", "NOTEBOOK", or "DIRECTORY".
+	ObjectType string
+}
+
+// ListWithSHAs recursively lists all workspace objects under the given path
+// and returns their content SHAs from the workspace's wsfs metadata. This uses
+// /api/2.0/workspace/list-repo with the (currently undocumented in the SDK)
+// return_wsfs_metadata=true flag, which causes the response to include a
+// content_sha256_hex field for files and notebooks.
+//
+// Returns nil if the path does not exist; callers should treat that as "no
+// remote state to merge" rather than an error.
+func (w *WorkspaceFilesClient) ListWithSHAs(ctx context.Context, dirPath string) ([]RemoteFileMetadata, error) {
+	type listObject struct {
+		ObjectType       string `json:"object_type"`
+		Path             string `json:"path"`
+		ContentSHA256Hex string `json:"content_sha256_hex"`
+		HasWsfsMetadata  bool   `json:"has_wsfs_metadata"`
+	}
+	type listResponse struct {
+		Objects []listObject `json:"objects"`
+	}
+
+	var resp listResponse
+	err := w.apiClient.Do(
+		ctx,
+		http.MethodGet,
+		"/api/2.0/workspace/list-repo",
+		w.orgIDHeaders(),
+		nil,
+		map[string]any{
+			"path":                 dirPath,
+			"return_wsfs_metadata": true,
+		},
+		&resp,
+	)
+	if err != nil {
+		var aerr *apierr.APIError
+		if errors.As(err, &aerr) && aerr.StatusCode == http.StatusNotFound {
+			return nil, nil
+		}
+		return nil, err
+	}
+
+	out := make([]RemoteFileMetadata, 0, len(resp.Objects))
+	for _, o := range resp.Objects {
+		out = append(out, RemoteFileMetadata{
+			Path:             o.Path,
+			ContentSHA256Hex: o.ContentSHA256Hex,
+			ObjectType:       o.ObjectType,
+		})
+	}
+	return out, nil
+}
+
 func (w *WorkspaceFilesClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) {
 	absPath, err := w.root.Join(name)
 	if err != nil {
diff --git a/libs/fileset/file.go b/libs/fileset/file.go
index 0a27b294cf7..0d0f6440ab0 100644
--- a/libs/fileset/file.go
+++ b/libs/fileset/file.go
@@ -67,6 +67,12 @@ func (f File) Modified() (ts time.Time) {
 	return info.ModTime()
 }
 
+// Open returns a reader for the file contents. The caller is responsible
+// for closing it.
+func (f File) Open() (fs.File, error) {
+	return f.root.Open(f.Relative)
+}
+
 func (f *File) IsNotebook() (bool, error) {
 	if f.fileType != Unknown {
 		return f.fileType == Notebook, nil
diff --git a/libs/sync/remote_filter.go b/libs/sync/remote_filter.go
new file mode 100644
index 00000000000..dcf0427c2c8
--- /dev/null
+++ b/libs/sync/remote_filter.go
@@ -0,0 +1,133 @@
+package sync
+
+import (
+	"context"
+	"crypto/sha256"
+	"encoding/hex"
+	"io"
+	"path"
+
+	"github.com/databricks/cli/libs/filer"
+	"github.com/databricks/cli/libs/fileset"
+	"github.com/databricks/cli/libs/log"
+)
+
+// shaLister fetches content SHAs for a workspace directory in one bulk call.
+// The interface lets tests stub the API call without spinning up a fake
+// filer.
+type shaLister interface {
+	ListWithSHAs(ctx context.Context, dirPath string) ([]filer.RemoteFileMetadata, error)
+}
+
+// RemoteFilter is the third layer of the sync pipeline. It takes the action
+// plan produced by the snapshot diff and drops puts whose remote SHA already
+// matches the local SHA — files the workspace already has, byte-for-byte.
+//
+// The expensive work (one bulk list, one SHA per skipped put candidate) only
+// pays off when the snapshot diff has produced false-positive puts at scale.
+// The caller decides when to invoke Apply; today that's only on a fresh
+// snapshot (no prior local state).
+type RemoteFilter struct {
+	lister     shaLister
+	remotePath string
+}
+
+func NewRemoteFilter(lister shaLister, remotePath string) *RemoteFilter {
+	return &RemoteFilter{lister: lister, remotePath: remotePath}
+}
+
+// Apply returns a copy of d with put entries removed for files whose local
+// SHA already matches the remote SHA. Errors fetching or computing SHAs are
+// logged and treated as "do not skip" — the worst case is an unnecessary
+// upload, which is the existing behavior.
+func (f *RemoteFilter) Apply(ctx context.Context, d diff, files []fileset.File, localToRemote map[string]string) diff {
+	if len(d.put) == 0 || f == nil || f.lister == nil {
+		return d
+	}
+
+	remote, err := f.lister.ListWithSHAs(ctx, f.remotePath)
+	if err != nil {
+		log.Warnf(ctx, "could not fetch remote content SHAs from %s; uploading all candidate files: %s", f.remotePath, err)
+		return d
+	}
+	if len(remote) == 0 {
+		return d
+	}
+
+	remoteSHAByPath := make(map[string]string, len(remote))
+	for _, e := range remote {
+		if e.ContentSHA256Hex == "" {
+			continue
+		}
+		remoteSHAByPath[e.Path] = e.ContentSHA256Hex
+	}
+
+	localByRelative := make(map[string]*fileset.File, len(files))
+	for i := range files {
+		localByRelative[files[i].Relative] = &files[i]
+	}
+
+	keep := make([]string, 0, len(d.put))
+	skipped := 0
+	for _, p := range d.put {
+		if !f.canSkip(ctx, p, localByRelative, localToRemote, remoteSHAByPath) {
+			keep = append(keep, p)
+			continue
+		}
+		skipped++
+	}
+
+	if skipped > 0 {
+		log.Debugf(ctx, "remote-filter: skipped %d/%d uploads matching workspace SHAs", skipped, len(d.put))
+	}
+
+	return diff{
+		delete: d.delete,
+		rmdir:  d.rmdir,
+		mkdir:  d.mkdir,
+		put:    keep,
+	}
+}
+
+// canSkip reports whether the put for relativePath can be safely dropped:
+// the workspace already has a file at the corresponding remote path with the
+// same SHA-256 as the local file.
+func (f *RemoteFilter) canSkip(
+	ctx context.Context,
+	relativePath string,
+	localByRelative map[string]*fileset.File,
+	localToRemote map[string]string,
+	remoteSHAByPath map[string]string,
+) bool {
+	local, ok := localByRelative[relativePath]
+	if !ok {
+		return false
+	}
+	remoteName, ok := localToRemote[relativePath]
+	if !ok {
+		return false
+	}
+	remoteSHA, ok := remoteSHAByPath[path.Join(f.remotePath, remoteName)]
+	if !ok {
+		return false
+	}
+	localSHA, err := computeFileSHA(local)
+	if err != nil {
+		log.Debugf(ctx, "remote-filter: hashing %s failed; will upload: %s", relativePath, err)
+		return false
+	}
+	return localSHA == remoteSHA
+}
+
+func computeFileSHA(f *fileset.File) (string, error) {
+	rc, err := f.Open()
+	if err != nil {
+		return "", err
+	}
+	defer rc.Close()
+	h := sha256.New()
+	if _, err := io.Copy(h, rc); err != nil {
+		return "", err
+	}
+	return hex.EncodeToString(h.Sum(nil)), nil
+}
diff --git a/libs/sync/remote_filter_test.go b/libs/sync/remote_filter_test.go
new file mode 100644
index 00000000000..33e74aecf5b
--- /dev/null
+++ b/libs/sync/remote_filter_test.go
@@ -0,0 +1,169 @@
+package sync
+
+import (
+	"context"
+	"crypto/sha256"
+	"encoding/hex"
+	"errors"
+	"path"
+	"testing"
+
+	"github.com/databricks/cli/libs/filer"
+	"github.com/databricks/cli/libs/fileset"
+	"github.com/databricks/cli/libs/vfs"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+type stubLister struct {
+	out []filer.RemoteFileMetadata
+	err error
+}
+
+func (s *stubLister) ListWithSHAs(ctx context.Context, dirPath string) ([]filer.RemoteFileMetadata, error) {
+	return s.out, s.err
+}
+
+func sha256OfFile(t *testing.T, root vfs.Path, relative string) string {
+	t.Helper()
+	r, err := root.Open(relative)
+	require.NoError(t, err)
+	defer r.Close()
+	h := sha256.New()
+	_, err = h.Write(mustReadAll(t, r))
+	require.NoError(t, err)
+	return hex.EncodeToString(h.Sum(nil))
+}
+
+func mustReadAll(t *testing.T, r interface{ Read(p []byte) (int, error) }) []byte {
+	t.Helper()
+	buf := make([]byte, 0, 1024)
+	tmp := make([]byte, 256)
+	for {
+		n, err := r.Read(tmp)
+		if n > 0 {
+			buf = append(buf, tmp[:n]...)
+		}
+		if err != nil {
+			break
+		}
+	}
+	return buf
+}
+
+const remoteRoot = "/Workspace/Users/foo@databricks.com/proj"
+
+// loadTestFiles returns the test fileset and the mtime-based SnapshotState
+// (which provides LocalToRemoteNames). All tests share the same on-disk
+// fileset under testdata/sync-fileset.
+func loadTestFiles(t *testing.T) ([]fileset.File, map[string]string) {
+	t.Helper()
+	fs := fileset.New(vfs.MustNew("./testdata/sync-fileset"))
+	files, err := fs.Files()
+	require.NoError(t, err)
+	state, err := NewSnapshotState(files)
+	require.NoError(t, err)
+	return files, state.LocalToRemoteNames
+}
+
+func TestRemoteFilterReturnsUnchangedWhenNoPuts(t *testing.T) {
+	rf := NewRemoteFilter(&stubLister{}, remoteRoot)
+	d := diff{delete: []string{"old"}, rmdir: []string{"olddir"}}
+	got := rf.Apply(t.Context(), d, nil, nil)
+	assert.Equal(t, d.delete, got.delete)
+	assert.Equal(t, d.rmdir, got.rmdir)
+	assert.Empty(t, got.put)
+}
+
+func TestRemoteFilterReturnsUnchangedWhenListErrors(t *testing.T) {
+	files, ltr := loadTestFiles(t)
+	rf := NewRemoteFilter(&stubLister{err: errors.New("boom")}, remoteRoot)
+	in := diff{put: []string{"my-script.py"}}
+	got := rf.Apply(t.Context(), in, files, ltr)
+	assert.Equal(t, []string{"my-script.py"}, got.put)
+}
+
+func TestRemoteFilterReturnsUnchangedWhenRemoteEmpty(t *testing.T) {
+	files, ltr := loadTestFiles(t)
+	rf := NewRemoteFilter(&stubLister{out: nil}, remoteRoot)
+	in := diff{put: []string{"my-script.py"}}
+	got := rf.Apply(t.Context(), in, files, ltr)
+	assert.Equal(t, []string{"my-script.py"}, got.put)
+}
+
+func TestRemoteFilterDropsPutWhenSHAMatches(t *testing.T) {
+	files, ltr := loadTestFiles(t)
+	root := vfs.MustNew("./testdata/sync-fileset")
+	wantSHA := sha256OfFile(t, root, "my-script.py")
+
+	rf := NewRemoteFilter(&stubLister{out: []filer.RemoteFileMetadata{
+		{Path: path.Join(remoteRoot, "my-script.py"), ContentSHA256Hex: wantSHA, ObjectType: "FILE"},
+	}}, remoteRoot)
+
+	in := diff{put: []string{"my-script.py"}}
+	got := rf.Apply(t.Context(), in, files, ltr)
+	assert.Empty(t, got.put)
+}
+
+func TestRemoteFilterKeepsPutWhenSHAMismatches(t *testing.T) {
+	files, ltr := loadTestFiles(t)
+	rf := NewRemoteFilter(&stubLister{out: []filer.RemoteFileMetadata{
+		{Path: path.Join(remoteRoot, "my-script.py"), ContentSHA256Hex: "deadbeef", ObjectType: "FILE"},
+	}}, remoteRoot)
+
+	in := diff{put: []string{"my-script.py"}}
+	got := rf.Apply(t.Context(), in, files, ltr)
+	assert.Equal(t, []string{"my-script.py"}, got.put)
+}
+
+func TestRemoteFilterKeepsPutWhenRemoteMissing(t *testing.T) {
+	files, ltr := loadTestFiles(t)
+	rf := NewRemoteFilter(&stubLister{out: []filer.RemoteFileMetadata{
+		{Path: path.Join(remoteRoot, "other-file"), ContentSHA256Hex: "x"},
+	}}, remoteRoot)
+
+	in := diff{put: []string{"my-script.py"}}
+	got := rf.Apply(t.Context(), in, files, ltr)
+	assert.Equal(t, []string{"my-script.py"}, got.put)
+}
+
+func TestRemoteFilterDropsPutForNotebookWhenSHAMatches(t *testing.T) {
+	// Notebooks are stored without their .py extension on the workspace —
+	// LocalToRemoteNames maps "my-nb.py" -> "my-nb". The filter must use the
+	// remote name when looking up the SHA.
+	files, ltr := loadTestFiles(t)
+	root := vfs.MustNew("./testdata/sync-fileset")
+	wantSHA := sha256OfFile(t, root, "my-nb.py")
+	require.Equal(t, "my-nb", ltr["my-nb.py"])
+
+	rf := NewRemoteFilter(&stubLister{out: []filer.RemoteFileMetadata{
+		{Path: path.Join(remoteRoot, "my-nb"), ContentSHA256Hex: wantSHA, ObjectType: "NOTEBOOK"},
+	}}, remoteRoot)
+
+	in := diff{put: []string{"my-nb.py"}}
+	got := rf.Apply(t.Context(), in, files, ltr)
+	assert.Empty(t, got.put)
+}
+
+func TestRemoteFilterMixedDiff(t *testing.T) {
+	files, ltr := loadTestFiles(t)
+	root := vfs.MustNew("./testdata/sync-fileset")
+	scriptSHA := sha256OfFile(t, root, "my-script.py")
+	validNbSHA := sha256OfFile(t, root, "valid-nb.ipynb")
+
+	rf := NewRemoteFilter(&stubLister{out: []filer.RemoteFileMetadata{
+		{Path: path.Join(remoteRoot, "my-script.py"), ContentSHA256Hex: scriptSHA, ObjectType: "FILE"},
+		{Path: path.Join(remoteRoot, "my-nb"), ContentSHA256Hex: "stale", ObjectType: "NOTEBOOK"},
+		{Path: path.Join(remoteRoot, "valid-nb"), ContentSHA256Hex: validNbSHA, ObjectType: "NOTEBOOK"},
+	}}, remoteRoot)
+
+	in := diff{
+		put:    []string{"my-script.py", "my-nb.py", "valid-nb.ipynb"},
+		delete: []string{"gone"},
+		mkdir:  []string{"new"},
+	}
+	got := rf.Apply(t.Context(), in, files, ltr)
+	assert.ElementsMatch(t, []string{"my-nb.py"}, got.put, "only the SHA-mismatched notebook should be re-uploaded")
+	assert.Equal(t, []string{"gone"}, got.delete)
+	assert.Equal(t, []string{"new"}, got.mkdir)
+}
diff --git a/libs/sync/sync.go b/libs/sync/sync.go
index c65b49eb775..9a6a7e52259 100644
--- a/libs/sync/sync.go
+++ b/libs/sync/sync.go
@@ -45,6 +45,24 @@ type SyncOptions struct {
 	DryRun bool
 }
 
+// Sync runs file synchronization in three layers:
+//
+//  1. Discovery (libs/git, libs/fileset): walks the local tree and produces a
+//     list of files to consider, honoring include/exclude rules.
+//
+//  2. Snapshot diff (libs/sync/snapshot.go, libs/sync/diff.go): compares the
+//     discovered files against a local snapshot of mtimes from the previous
+//     run and produces a diff (puts, deletes, mkdirs, rmdirs) — the action
+//     plan for this iteration. If no snapshot exists, every file becomes a
+//     put.
+//
+//  3. Remote filter (libs/sync/remote_filter.go): an optional pre-flight that
+//     fetches content SHAs from the workspace and drops puts whose remote SHA
+//     already matches the local SHA. We only run it when the snapshot is
+//     fresh (no prior state), which is the only case where Layer 2 produces
+//     false-positive puts at scale (e.g. on a CI runner). When a local
+//     snapshot exists, Layer 2 is already accurate enough; paying for a
+//     bulk remote list would be wasted work.
 type Sync struct {
 	*SyncOptions
 
@@ -52,8 +70,9 @@ type Sync struct {
 	includeFileSet *fileset.FileSet
 	excludeFileSet *fileset.FileSet
 
-	snapshot *Snapshot
-	filer    filer.Filer
+	snapshot     *Snapshot
+	filer        filer.Filer
+	remoteFilter *RemoteFilter
 
 	// Synchronization progress events are sent to this event notifier.
 	notifier EventNotifier
@@ -111,11 +130,19 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) {
 		}
 	}
 
-	filer, err := filer.NewWorkspaceFilesClient(opts.WorkspaceClient, opts.RemotePath)
+	filerImpl, err := filer.NewWorkspaceFilesClient(opts.WorkspaceClient, opts.RemotePath)
 	if err != nil {
 		return nil, err
 	}
 
+	// The remote SHA list call is not part of the Filer interface (it's a
+	// sync-only optimization, not a general filesystem op), so we type-assert
+	// the concrete client. In tests we plug in a stub via NewWithRemoteFilter.
+	var remoteFilter *RemoteFilter
+	if wfc, ok := filerImpl.(*filer.WorkspaceFilesClient); ok {
+		remoteFilter = NewRemoteFilter(wfc, opts.RemotePath)
+	}
+
 	var notifier EventNotifier
 	outputWaitGroup := &stdsync.WaitGroup{}
 	if opts.OutputHandler != nil {
@@ -135,7 +162,8 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) {
 		includeFileSet:  includeFileSet,
 		excludeFileSet:  excludeFileSet,
 		snapshot:        snapshot,
-		filer:           filer,
+		filer:           filerImpl,
+		remoteFilter:    remoteFilter,
 		notifier:        notifier,
 		outputWaitGroup: outputWaitGroup,
 		seq:             0,
@@ -178,16 +206,26 @@ func (s *Sync) notifyComplete(ctx context.Context, d diff) {
 // Returns the list of files tracked (and synchronized) by the syncer during the run,
 // and an error if any occurred.
 func (s *Sync) RunOnce(ctx context.Context) ([]fileset.File, error) {
+	// Layer 1: discovery.
 	files, err := s.GetFileList(ctx)
 	if err != nil {
 		return files, err
 	}
 
+	// Layer 2: snapshot-driven action plan.
 	change, err := s.snapshot.diff(ctx, files)
 	if err != nil {
 		return files, err
 	}
 
+	// Layer 3: remote-state filter, only when the snapshot is fresh.
+	// With an existing snapshot, Layer 2 is precise; with no snapshot, every
+	// file is a put — so we ask the workspace what's already there and drop
+	// puts whose contents already match.
+	if s.snapshot.New && s.remoteFilter != nil {
+		change = s.remoteFilter.Apply(ctx, change, files, s.snapshot.LocalToRemoteNames)
+	}
+
 	s.notifyStart(ctx, change)
 	if change.IsEmpty() {
 		s.notifyComplete(ctx, change)

From 087152b2758393089ccd7e221e99e79593449eb7 Mon Sep 17 00:00:00 2001
From: Shreyas Goenka <shreyas.goenka@databricks.com>
Date: Mon, 4 May 2026 08:57:58 +0000
Subject: [PATCH 2/8] testserver: handle /workspace/list-repo so Layer 3
 doesn't error in acceptance

Acceptance tests were failing on the direct engine path because Layer 3
of sync (the remote-SHA filter) calls /api/2.0/workspace/list-repo, but
the testserver had no default handler. The CLI's filter logged a Warn
line ("could not fetch remote content SHAs ... No stub found") which
contaminated recorded output.txt diffs.

Add WorkspaceListRepo to the FakeWorkspace: walks the in-memory
files/directories maps, computes SHA-256 over each file's stored bytes,
and returns objects in the same shape as the real list-repo response
(path, object_type, content_sha256_hex, has_wsfs_metadata, size,
language). On a workspace with no imported files, returns
{"objects": []}, which causes Layer 3 to no-op cleanly.

Co-authored-by: Isaac
---
 libs/testserver/fake_workspace.go | 52 +++++++++++++++++++++++++++++++
 libs/testserver/handlers.go       |  5 +++
 2 files changed, 57 insertions(+)

diff --git a/libs/testserver/fake_workspace.go b/libs/testserver/fake_workspace.go
index 5430c68cbcc..36596b58252 100644
--- a/libs/testserver/fake_workspace.go
+++ b/libs/testserver/fake_workspace.go
@@ -2,7 +2,9 @@ package testserver
 
 import (
 	"bytes"
+	"crypto/sha256"
 	"encoding/binary"
+	"encoding/hex"
 	"encoding/json"
 	"fmt"
 	"os"
@@ -454,6 +456,56 @@ func (s *FakeWorkspace) WorkspaceFilesExportFile(path string) []byte {
 	return s.files[path].Data
 }
 
+// WorkspaceListRepo returns the recursive listing under root used by the
+// /api/2.0/workspace/list-repo endpoint. The CLI calls this with
+// return_wsfs_metadata=true, expecting a content_sha256_hex per file/notebook.
+func (s *FakeWorkspace) WorkspaceListRepo(root string) any {
+	if !strings.HasPrefix(root, "/") {
+		root = "/" + root
+	}
+
+	defer s.LockUnlock()()
+
+	type listObject struct {
+		ObjectType       string `json:"object_type"`
+		Path             string `json:"path"`
+		ContentSHA256Hex string `json:"content_sha256_hex,omitempty"`
+		HasWsfsMetadata  bool   `json:"has_wsfs_metadata,omitempty"`
+		Size             int    `json:"size,omitempty"`
+		Language         string `json:"language,omitempty"`
+	}
+
+	var objects []listObject
+	if _, ok := s.directories[root]; ok {
+		objects = append(objects, listObject{ObjectType: "DIRECTORY", Path: root})
+	}
+	for p, dir := range s.directories {
+		if p == root || !strings.HasPrefix(p, root+"/") {
+			continue
+		}
+		objects = append(objects, listObject{ObjectType: "DIRECTORY", Path: dir.Path})
+	}
+	for p, fe := range s.files {
+		if !strings.HasPrefix(p, root+"/") {
+			continue
+		}
+		sum := sha256.Sum256(fe.Data)
+		obj := listObject{
+			ObjectType:       string(fe.Info.ObjectType),
+			Path:             fe.Info.Path,
+			ContentSHA256Hex: hex.EncodeToString(sum[:]),
+			HasWsfsMetadata:  true,
+			Size:             len(fe.Data),
+		}
+		if fe.Info.Language != "" {
+			obj.Language = string(fe.Info.Language)
+		}
+		objects = append(objects, obj)
+	}
+
+	return map[string]any{"objects": objects}
+}
+
 // FileExists checks if a file exists at the given path.
 func (s *FakeWorkspace) FileExists(path string) bool {
 	if !strings.HasPrefix(path, "/") {
diff --git a/libs/testserver/handlers.go b/libs/testserver/handlers.go
index 8bd53391841..1ebab18051f 100644
--- a/libs/testserver/handlers.go
+++ b/libs/testserver/handlers.go
@@ -79,6 +79,11 @@ func AddDefaultHandlers(server *Server) {
 		return req.Workspace.WorkspaceGetStatus(path)
 	})
 
+	server.Handle("GET", "/api/2.0/workspace/list-repo", func(req Request) any {
+		path := req.URL.Query().Get("path")
+		return req.Workspace.WorkspaceListRepo(path)
+	})
+
 	server.Handle("POST", "/api/2.0/workspace/mkdirs", func(req Request) any {
 		var request workspace.Mkdirs
 		if err := json.Unmarshal(req.Body, &request); err != nil {

From f2597a0f8e5e164cf2640f20f3344a9d51bb09a4 Mon Sep 17 00:00:00 2001
From: Shreyas Goenka <shreyas.goenka@databricks.com>
Date: Mon, 4 May 2026 09:16:29 +0000
Subject: [PATCH 3/8] acceptance: record list-repo call from Layer 3 in
 user_agent fixtures

Layer 3 of sync issues a GET /api/2.0/workspace/list-repo with
return_wsfs_metadata=true on cold-snapshot deploys (which is what
acceptance tests do). Add the new request to the recorded fixtures for
both engines so the diff comparison passes. The User-Agent on the new
call inherits cmd/bundle_deploy and engine/<x> as expected.

Co-authored-by: Isaac
---
 .../simple/out.requests.deploy.direct.json          | 13 +++++++++++++
 .../simple/out.requests.deploy.terraform.json       | 13 +++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/acceptance/bundle/user_agent/simple/out.requests.deploy.direct.json b/acceptance/bundle/user_agent/simple/out.requests.deploy.direct.json
index cc39aad6e9b..a84d67497c6 100644
--- a/acceptance/bundle/user_agent/simple/out.requests.deploy.direct.json
+++ b/acceptance/bundle/user_agent/simple/out.requests.deploy.direct.json
@@ -123,6 +123,19 @@
     "return_export_info": "true"
   }
 }
+{
+  "headers": {
+    "User-Agent": [
+      "cli/[DEV_VERSION] databricks-sdk-go/[SDK_VERSION] go/[GO_VERSION] os/[OS] cmd/bundle_deploy cmd-exec-id/[UUID] interactive/none engine/direct auth/pat"
+    ]
+  },
+  "method": "GET",
+  "path": "/api/2.0/workspace/list-repo",
+  "q": {
+    "path": "/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files",
+    "return_wsfs_metadata": "true"
+  }
+}
 {
   "headers": {
     "User-Agent": [
diff --git a/acceptance/bundle/user_agent/simple/out.requests.deploy.terraform.json b/acceptance/bundle/user_agent/simple/out.requests.deploy.terraform.json
index 435b188af3b..9661fc629d1 100644
--- a/acceptance/bundle/user_agent/simple/out.requests.deploy.terraform.json
+++ b/acceptance/bundle/user_agent/simple/out.requests.deploy.terraform.json
@@ -123,6 +123,19 @@
     "return_export_info": "true"
   }
 }
+{
+  "headers": {
+    "User-Agent": [
+      "cli/[DEV_VERSION] databricks-sdk-go/[SDK_VERSION] go/[GO_VERSION] os/[OS] cmd/bundle_deploy cmd-exec-id/[UUID] interactive/none engine/terraform auth/pat"
+    ]
+  },
+  "method": "GET",
+  "path": "/api/2.0/workspace/list-repo",
+  "q": {
+    "path": "/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files",
+    "return_wsfs_metadata": "true"
+  }
+}
 {
   "headers": {
     "User-Agent": [

From a72cd728245ec5c37cc526438acbbead5208c455 Mon Sep 17 00:00:00 2001
From: Shreyas Goenka <shreyas.goenka@databricks.com>
Date: Mon, 4 May 2026 09:29:49 +0000
Subject: [PATCH 4/8] acceptance: add list-repo entries to bundle/user_agent
 output expectations

Companion to the previous fixture updates. The parent user_agent test
aggregates User-Agent observations from per-engine recorded request
logs and writes a shared output.txt; that needs the same list-repo
entries (one per engine).

Co-authored-by: Isaac
---
 acceptance/bundle/user_agent/output.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/acceptance/bundle/user_agent/output.txt b/acceptance/bundle/user_agent/output.txt
index bf128624271..5cda322d3d8 100644
--- a/acceptance/bundle/user_agent/output.txt
+++ b/acceptance/bundle/user_agent/output.txt
@@ -8,6 +8,7 @@ OK  	deploy.direct	/api/2.0/workspace/get-status	engine/direct
 OK  	deploy.direct	/api/2.0/workspace/get-status	engine/direct
 OK  	deploy.direct	/api/2.0/workspace/get-status	engine/direct
 OK  	deploy.direct	/api/2.0/workspace/get-status	engine/direct
+OK  	deploy.direct	/api/2.0/workspace/list-repo	engine/direct
 OK  	deploy.direct	/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/empty.py	engine/direct
 OK  	deploy.direct	/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/deploy.lock	engine/direct
 OK  	deploy.direct	/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/deployment.json	engine/direct
@@ -28,6 +29,7 @@ OK  	deploy.terraform	/api/2.0/workspace/get-status	engine/terraform
 OK  	deploy.terraform	/api/2.0/workspace/get-status	engine/terraform
 OK  	deploy.terraform	/api/2.0/workspace/get-status	engine/terraform
 OK  	deploy.terraform	/api/2.0/workspace/get-status	engine/terraform
+OK  	deploy.terraform	/api/2.0/workspace/list-repo	engine/terraform
 OK  	deploy.terraform	/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/empty.py	engine/terraform
 OK  	deploy.terraform	/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/deploy.lock	engine/terraform
 OK  	deploy.terraform	/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/deployment.json	engine/terraform

From 0083c16a48c6584033c5f896dfed96e52f2a743a Mon Sep 17 00:00:00 2001
From: Shreyas Goenka <shreyas.goenka@databricks.com>
Date: Mon, 4 May 2026 13:58:34 +0000
Subject: [PATCH 5/8] sync: add reusable workspace benchmarks for the list-repo
 + Layer 3 paths

Behind a benchworkspace build tag so they don't run in CI. Three groups:

  - BenchmarkListRepoByCount: list-repo cost vs N (10/100/500/1000).
  - BenchmarkListRepoByContent: list-repo cost across plain files /
    notebooks (py/sql/ipynb) / dashboards / mixed at fixed N=200.
  - BenchmarkSyncRunOnceColdSnapshot: end-to-end Sync.RunOnce against a
    pre-warmed remote, with and without Layer 3, at N=20/100/500. The
    "without" variant zeroes out s.remoteFilter to bypass the new path.

Each run creates and tears down a unique /Users/$USER/.tmp/sync-bench-X
tree on the configured workspace.

Run instructions are in the file's top comment. Required env:

  DATABRICKS_BENCH_PROFILE=<profile>
  DATABRICKS_BENCH_USER=<email>
  go test -tags benchworkspace -bench=. -benchtime=5x -timeout=60m ./libs/sync/...

Co-authored-by: Isaac
---
 libs/sync/workspace_bench_test.go | 309 ++++++++++++++++++++++++++++++
 1 file changed, 309 insertions(+)
 create mode 100644 libs/sync/workspace_bench_test.go

diff --git a/libs/sync/workspace_bench_test.go b/libs/sync/workspace_bench_test.go
new file mode 100644
index 00000000000..45301e43320
--- /dev/null
+++ b/libs/sync/workspace_bench_test.go
@@ -0,0 +1,309 @@
+//go:build benchworkspace
+
+// Workspace benchmarks for libs/sync. Real-network — they hit a Databricks
+// workspace and import / list / delete files there.
+//
+// Quickstart:
+//
+//	export DATABRICKS_BENCH_PROFILE=my-profile     # required; profile from .databrickscfg
+//	export DATABRICKS_BENCH_USER=me@example.com    # required; remote dirs go under /Users/$DATABRICKS_BENCH_USER/.tmp/
+//
+//	go test -tags benchworkspace -bench=. -benchtime=5x -timeout=60m ./libs/sync/...
+//
+// To run a single benchmark group:
+//
+//	go test -tags benchworkspace -bench=BenchmarkListRepoByCount -benchtime=10x -timeout=20m ./libs/sync/...
+//	go test -tags benchworkspace -bench=BenchmarkListRepoByContent -benchtime=10x -timeout=20m ./libs/sync/...
+//	go test -tags benchworkspace -bench=BenchmarkSyncRunOnceColdSnapshot -benchtime=5x -timeout=30m ./libs/sync/...
+//
+// Use a scratch profile / scratch user — every run creates and deletes a
+// fresh /Users/$DATABRICKS_BENCH_USER/.tmp/sync-bench-<random>/ tree.
+
+package sync
+
+import (
+	"bytes"
+	"context"
+	"crypto/rand"
+	"encoding/hex"
+	"fmt"
+	"net/http"
+	"net/url"
+	"os"
+	"path"
+	"path/filepath"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"testing"
+
+	"github.com/databricks/cli/libs/filer"
+	"github.com/databricks/cli/libs/vfs"
+	"github.com/databricks/databricks-sdk-go"
+	apiclient "github.com/databricks/databricks-sdk-go/client"
+	"github.com/databricks/databricks-sdk-go/config"
+	"github.com/databricks/databricks-sdk-go/service/workspace"
+)
+
+// benchEnv groups the workspace state a benchmark needs.
+type benchEnv struct {
+	wc        *databricks.WorkspaceClient
+	apiClient *apiclient.DatabricksClient
+	profile   string
+	username  string
+	root      string // absolute, no trailing slash
+}
+
+func newBenchEnv(tb testing.TB) *benchEnv {
+	tb.Helper()
+	profile := os.Getenv("DATABRICKS_BENCH_PROFILE")
+	user := os.Getenv("DATABRICKS_BENCH_USER")
+	if profile == "" || user == "" {
+		tb.Skip("DATABRICKS_BENCH_PROFILE and DATABRICKS_BENCH_USER must be set")
+	}
+	wc, err := databricks.NewWorkspaceClient(&databricks.Config{Profile: profile})
+	if err != nil {
+		tb.Fatalf("workspace client: %v", err)
+	}
+	c, err := apiclient.New(&config.Config{Profile: profile})
+	if err != nil {
+		tb.Fatalf("api client: %v", err)
+	}
+	tag := make([]byte, 4)
+	_, _ = rand.Read(tag)
+	root := fmt.Sprintf("/Users/%s/.tmp/sync-bench-%s", user, hex.EncodeToString(tag))
+	if err := wc.Workspace.MkdirsByPath(context.Background(), root); err != nil {
+		tb.Fatalf("mkdir root: %v", err)
+	}
+	tb.Cleanup(func() {
+		_ = wc.Workspace.Delete(context.Background(), workspace.Delete{Path: root, Recursive: true})
+	})
+	return &benchEnv{wc: wc, apiClient: c, profile: profile, username: user, root: root}
+}
+
+// importRaw uploads body bytes to the given absolute workspace path via the
+// legacy /workspace-files/import-file endpoint. Used during benchmark setup
+// (we want population to be cheap and predictable; what we're benchmarking
+// is the listing / sync, not the upload).
+func importRaw(ctx context.Context, c *apiclient.DatabricksClient, absPath string, body []byte) error {
+	urlPath := fmt.Sprintf("/api/2.0/workspace-files/import-file/%s?overwrite=true",
+		url.PathEscape(strings.TrimLeft(absPath, "/")))
+	return c.Do(ctx, http.MethodPost, urlPath, nil, nil, body, nil)
+}
+
+// populate uploads the given (relative path -> body) map under remoteDir,
+// using a fixed-size worker pool. Returns once all uploads finish.
+func populate(tb testing.TB, env *benchEnv, remoteDir string, items map[string][]byte) {
+	tb.Helper()
+	ctx := context.Background()
+	if err := env.wc.Workspace.MkdirsByPath(ctx, remoteDir); err != nil {
+		tb.Fatalf("mkdir %s: %v", remoteDir, err)
+	}
+	type job struct {
+		rel  string
+		body []byte
+	}
+	jobs := make(chan job, len(items))
+	for r, b := range items {
+		jobs <- job{r, b}
+	}
+	close(jobs)
+	var wg sync.WaitGroup
+	var failed atomic.Int64
+	for i := 0; i < 16; i++ {
+		wg.Go(func() {
+			for j := range jobs {
+				if err := importRaw(ctx, env.apiClient, path.Join(remoteDir, j.rel), j.body); err != nil {
+					failed.Add(1)
+				}
+			}
+		})
+	}
+	wg.Wait()
+	if failed.Load() > 0 {
+		tb.Logf("warning: %d uploads failed during populate", failed.Load())
+	}
+}
+
+// generators returns a body-by-kind table. The bodies are sized to roughly
+// 200 bytes so list-repo response time isn't dominated by per-file content.
+func generators() map[string]func(i int) (suffix string, body []byte) {
+	pad := func(s string, n int) []byte {
+		if n <= len(s) {
+			return []byte(s[:n])
+		}
+		buf := make([]byte, n)
+		copy(buf, s)
+		for i := len(s); i < n; i++ {
+			buf[i] = byte('a' + (i % 26))
+		}
+		return buf
+	}
+	return map[string]func(i int) (string, []byte){
+		"file": func(i int) (string, []byte) {
+			return ".txt", pad(fmt.Sprintf("plain file %d\n", i), 200)
+		},
+		"py-notebook": func(i int) (string, []byte) {
+			return ".py", pad(fmt.Sprintf("# Databricks notebook source\nprint('%d')\n", i), 200)
+		},
+		"sql-notebook": func(i int) (string, []byte) {
+			return ".sql", pad(fmt.Sprintf("-- Databricks notebook source\nSELECT %d;\n", i), 200)
+		},
+		"ipynb": func(i int) (string, []byte) {
+			return ".ipynb", pad(fmt.Sprintf(`{"cells":[{"cell_type":"code","source":["# Databricks notebook source\n","print(%d)"],"outputs":[],"execution_count":null,"metadata":{}}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"}},"nbformat":4,"nbformat_minor":4}`, i), 200)
+		},
+		"dashboard": func(i int) (string, []byte) {
+			return ".lvdash.json", pad(`{"datasets":[],"pages":[{"name":"p","displayName":"P","layout":[]}]}`, 200)
+		},
+	}
+}
+
+// BenchmarkListRepoByCount measures /workspace/list-repo cost at varying file
+// counts (all plain files), to characterize how the call scales with N.
+func BenchmarkListRepoByCount(b *testing.B) {
+	env := newBenchEnv(b)
+	wfc, err := filer.NewWorkspaceFilesClient(env.wc, env.root)
+	if err != nil {
+		b.Fatalf("filer: %v", err)
+	}
+	lister := wfc.(*filer.WorkspaceFilesClient)
+
+	gen := generators()["file"]
+	for _, n := range []int{10, 100, 500, 1000} {
+		b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) {
+			dir := path.Join(env.root, fmt.Sprintf("count-%d", n))
+			items := make(map[string][]byte, n)
+			for i := 0; i < n; i++ {
+				suf, body := gen(i)
+				items[fmt.Sprintf("file-%05d%s", i, suf)] = body
+			}
+			populate(b, env, dir, items)
+			// Warm-up
+			_, _ = lister.ListWithSHAs(context.Background(), dir)
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				if _, err := lister.ListWithSHAs(context.Background(), dir); err != nil {
+					b.Fatalf("list: %v", err)
+				}
+			}
+		})
+	}
+}
+
+// BenchmarkListRepoByContent measures /workspace/list-repo cost when the
+// directory has different mixes of object types. Each scenario has a fixed
+// number of files; only the content mix varies.
+func BenchmarkListRepoByContent(b *testing.B) {
+	const N = 200
+	env := newBenchEnv(b)
+	wfc, err := filer.NewWorkspaceFilesClient(env.wc, env.root)
+	if err != nil {
+		b.Fatalf("filer: %v", err)
+	}
+	lister := wfc.(*filer.WorkspaceFilesClient)
+
+	gen := generators()
+	scenarios := map[string]func(i int) (string, []byte){
+		"all-files":         gen["file"],
+		"all-py-notebooks":  gen["py-notebook"],
+		"all-ipynb":         gen["ipynb"],
+		"all-sql-notebooks": gen["sql-notebook"],
+		"all-dashboards":    gen["dashboard"],
+	}
+	mixedKinds := []string{"file", "py-notebook", "ipynb", "dashboard", "sql-notebook"}
+	scenarios["mixed"] = func(i int) (string, []byte) {
+		return gen[mixedKinds[i%len(mixedKinds)]](i)
+	}
+
+	for name, g := range scenarios {
+		b.Run(name, func(b *testing.B) {
+			dir := path.Join(env.root, "content-"+name)
+			items := make(map[string][]byte, N)
+			for i := 0; i < N; i++ {
+				suf, body := g(i)
+				items[fmt.Sprintf("item-%05d%s", i, suf)] = body
+			}
+			populate(b, env, dir, items)
+			_, _ = lister.ListWithSHAs(context.Background(), dir)
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				if _, err := lister.ListWithSHAs(context.Background(), dir); err != nil {
+					b.Fatalf("list: %v", err)
+				}
+			}
+		})
+	}
+}
+
+// BenchmarkSyncRunOnceColdSnapshot times an end-to-end Sync.RunOnce against
+// a workspace that already has the bundle's files (the CI-cold-runner case).
+// Sub-benchmarks compare with-Layer-3 vs without-Layer-3 to show the speedup
+// the remote-SHA filter delivers when nothing has actually changed.
+func BenchmarkSyncRunOnceColdSnapshot(b *testing.B) {
+	env := newBenchEnv(b)
+	user, err := env.wc.CurrentUser.Me(context.Background())
+	if err != nil {
+		b.Fatalf("Me: %v", err)
+	}
+
+	for _, n := range []int{20, 100, 500} {
+		b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) {
+			localDir := b.TempDir()
+			gen := generators()["file"]
+			for i := 0; i < n; i++ {
+				suf, body := gen(i)
+				p := filepath.Join(localDir, fmt.Sprintf("file-%05d%s", i, suf))
+				if err := os.WriteFile(p, body, 0o644); err != nil {
+					b.Fatalf("write %s: %v", p, err)
+				}
+			}
+			remoteDir := path.Join(env.root, fmt.Sprintf("sync-N%d", n))
+			if err := env.wc.Workspace.MkdirsByPath(context.Background(), remoteDir); err != nil {
+				b.Fatalf("mkdir: %v", err)
+			}
+
+			runOnce := func(b *testing.B, withLayer3 bool) {
+				_ = os.RemoveAll(filepath.Join(localDir, ".databricks"))
+				localRoot := vfs.MustNew(localDir)
+				snapBase := b.TempDir()
+				s, err := New(context.Background(), SyncOptions{
+					WorktreeRoot:     localRoot,
+					LocalRoot:        localRoot,
+					Paths:            []string{"."},
+					RemotePath:       remoteDir,
+					SnapshotBasePath: snapBase,
+					WorkspaceClient:  env.wc,
+					CurrentUser:      user,
+				})
+				if err != nil {
+					b.Fatalf("Sync.New: %v", err)
+				}
+				if !withLayer3 {
+					s.remoteFilter = nil
+				}
+				if _, err := s.RunOnce(context.Background()); err != nil {
+					b.Fatalf("RunOnce: %v", err)
+				}
+				s.Close()
+			}
+
+			// Pre-warm: one initial sync so the workspace has the files.
+			runOnce(b, true)
+			b.ResetTimer()
+
+			b.Run("with-layer3", func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					runOnce(b, true)
+				}
+			})
+			b.Run("without-layer3", func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					runOnce(b, false)
+				}
+			})
+		})
+	}
+}
+
+// keep tests in this file linkable even without the bench tag
+var _ = bytes.NewReader

From e90c69e9b00ac58caf233be849f5ed8d908416ee Mon Sep 17 00:00:00 2001
From: Shreyas Goenka <shreyas.goenka@databricks.com>
Date: Mon, 4 May 2026 14:51:05 +0000
Subject: [PATCH 6/8] sync(bench): add tree-shape axis + parallel-walk runner
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Expand the benchworkspace benchmarks so every cell can be combined with a
tree shape: flat (no nesting), small (depth 2, branch 2), medium (depth
4, branch 2), large (depth 6, branch 2). All four list-repo and sync
benchmarks now parameterize over (shape × N).

Add a test-only parallelWalk runner that lists everything under a path
by issuing non-recursive /api/2.0/workspace/list calls and recursing
client-side with a configurable worker pool. Add BenchmarkListWalkers
that compares list-repo vs parallel-walk (workers=8 and workers=32) head
to head across (shape × N). The runner is deliberately not exported and
not wired into Sync — production code uses list-repo. The bench is here
to make the comparison reproducible and to back the claim that
list-repo dominates parallel-walk at any non-flat shape.

BenchmarkListRepoByContent now also varies shape, so content × shape is
fully covered. Run all four groups with:

  go test -tags benchworkspace -bench=. -benchtime=5x -timeout=90m ./libs/sync/...

Co-authored-by: Isaac
---
 libs/sync/workspace_bench_test.go | 458 +++++++++++++++++++++---------
 1 file changed, 331 insertions(+), 127 deletions(-)

diff --git a/libs/sync/workspace_bench_test.go b/libs/sync/workspace_bench_test.go
index 45301e43320..562989674e5 100644
--- a/libs/sync/workspace_bench_test.go
+++ b/libs/sync/workspace_bench_test.go
@@ -8,21 +8,24 @@
 //	export DATABRICKS_BENCH_PROFILE=my-profile     # required; profile from .databrickscfg
 //	export DATABRICKS_BENCH_USER=me@example.com    # required; remote dirs go under /Users/$DATABRICKS_BENCH_USER/.tmp/
 //
-//	go test -tags benchworkspace -bench=. -benchtime=5x -timeout=60m ./libs/sync/...
+//	go test -tags benchworkspace -bench=. -benchtime=5x -timeout=90m ./libs/sync/...
 //
-// To run a single benchmark group:
+// Run a single group:
 //
-//	go test -tags benchworkspace -bench=BenchmarkListRepoByCount -benchtime=10x -timeout=20m ./libs/sync/...
-//	go test -tags benchworkspace -bench=BenchmarkListRepoByContent -benchtime=10x -timeout=20m ./libs/sync/...
-//	go test -tags benchworkspace -bench=BenchmarkSyncRunOnceColdSnapshot -benchtime=5x -timeout=30m ./libs/sync/...
+//	go test -tags benchworkspace -bench=BenchmarkListRepo$               -benchtime=10x -timeout=20m ./libs/sync/...
+//	go test -tags benchworkspace -bench=BenchmarkListRepoByContent       -benchtime=10x -timeout=20m ./libs/sync/...
+//	go test -tags benchworkspace -bench=BenchmarkListWalkers             -benchtime=5x  -timeout=30m ./libs/sync/...
+//	go test -tags benchworkspace -bench=BenchmarkSyncRunOnceColdSnapshot -benchtime=5x  -timeout=30m ./libs/sync/...
 //
-// Use a scratch profile / scratch user — every run creates and deletes a
-// fresh /Users/$DATABRICKS_BENCH_USER/.tmp/sync-bench-<random>/ tree.
+// Filter further with -bench='BenchmarkListWalkers/shape=medium/N=200'.
+//
+// Each benchmark group creates and tears down a unique remote tree under
+// /Users/$DATABRICKS_BENCH_USER/.tmp/sync-bench-<random>/ on the configured
+// workspace. Use a scratch profile.
 
 package sync
 
 import (
-	"bytes"
 	"context"
 	"crypto/rand"
 	"encoding/hex"
@@ -32,9 +35,10 @@ import (
 	"os"
 	"path"
 	"path/filepath"
-	"strings"
-	"sync"
+	"sort"
+	stdsync "sync"
 	"sync/atomic"
+	"strings"
 	"testing"
 
 	"github.com/databricks/cli/libs/filer"
@@ -45,13 +49,69 @@ import (
 	"github.com/databricks/databricks-sdk-go/service/workspace"
 )
 
-// benchEnv groups the workspace state a benchmark needs.
+// ----- shape: tree depth and branching used to spread files across dirs ----
+
+type treeShape struct {
+	name   string
+	depth  int
+	branch int // unused when depth == 0 (flat)
+}
+
+// shapes ranges from "flat" (no nesting) to "large" (deep tree, many dirs).
+// Tweak these values if you want to explore broader/narrower mixes.
+var shapes = []treeShape{
+	{"flat", 0, 0},     // 1 leaf dir
+	{"small", 2, 2},    // 4 leaf dirs
+	{"medium", 4, 2},   // 16 leaf dirs
+	{"large", 6, 2},    // 64 leaf dirs
+}
+
+// generatePaths returns n relative file paths arranged into a tree of the
+// requested shape. Filenames are unique; no extension is added (callers add
+// the right one for the content kind they're using).
+func generatePaths(shape treeShape, n int) []string {
+	if shape.depth == 0 {
+		out := make([]string, n)
+		for i := 0; i < n; i++ {
+			out[i] = fmt.Sprintf("f-%05d", i)
+		}
+		return out
+	}
+	leaves := 1
+	for i := 0; i < shape.depth; i++ {
+		leaves *= shape.branch
+	}
+	filesPerLeaf := (n + leaves - 1) / leaves
+	if filesPerLeaf < 1 {
+		filesPerLeaf = 1
+	}
+	var paths []string
+	var walk func(prefix string, d int)
+	walk = func(prefix string, d int) {
+		if d == 0 {
+			for f := 0; f < filesPerLeaf && len(paths) < n; f++ {
+				paths = append(paths, path.Join(prefix, fmt.Sprintf("f-%d", f)))
+			}
+			return
+		}
+		for b := 0; b < shape.branch; b++ {
+			if len(paths) >= n {
+				return
+			}
+			walk(path.Join(prefix, fmt.Sprintf("d%d", b)), d-1)
+		}
+	}
+	walk("", shape.depth)
+	return paths
+}
+
+// ----- environment + helpers ------------------------------------------------
+
 type benchEnv struct {
 	wc        *databricks.WorkspaceClient
 	apiClient *apiclient.DatabricksClient
-	profile   string
 	username  string
-	root      string // absolute, no trailing slash
+	root      string // absolute workspace path, no trailing slash
 }
 
 func newBenchEnv(tb testing.TB) *benchEnv {
@@ -78,27 +138,41 @@ func newBenchEnv(tb testing.TB) *benchEnv {
 	tb.Cleanup(func() {
 		_ = wc.Workspace.Delete(context.Background(), workspace.Delete{Path: root, Recursive: true})
 	})
-	return &benchEnv{wc: wc, apiClient: c, profile: profile, username: user, root: root}
+	return &benchEnv{wc: wc, apiClient: c, username: user, root: root}
 }
 
-// importRaw uploads body bytes to the given absolute workspace path via the
-// legacy /workspace-files/import-file endpoint. Used during benchmark setup
-// (we want population to be cheap and predictable; what we're benchmarking
-// is the listing / sync, not the upload).
+// importRaw uploads body bytes via the legacy import-file endpoint. Used for
+// setup only — what we benchmark is the listing / sync path, not the upload.
 func importRaw(ctx context.Context, c *apiclient.DatabricksClient, absPath string, body []byte) error {
 	urlPath := fmt.Sprintf("/api/2.0/workspace-files/import-file/%s?overwrite=true",
 		url.PathEscape(strings.TrimLeft(absPath, "/")))
 	return c.Do(ctx, http.MethodPost, urlPath, nil, nil, body, nil)
 }
 
-// populate uploads the given (relative path -> body) map under remoteDir,
-// using a fixed-size worker pool. Returns once all uploads finish.
+// populate creates all parent dirs under remoteDir, then uploads (relPath →
+// body) entries via a 16-worker pool. Setup helper for benchmarks.
 func populate(tb testing.TB, env *benchEnv, remoteDir string, items map[string][]byte) {
 	tb.Helper()
 	ctx := context.Background()
 	if err := env.wc.Workspace.MkdirsByPath(ctx, remoteDir); err != nil {
 		tb.Fatalf("mkdir %s: %v", remoteDir, err)
 	}
+	parents := map[string]struct{}{}
+	for rel := range items {
+		if d := path.Dir(rel); d != "." && d != "/" {
+			parents[d] = struct{}{}
+		}
+	}
+	parentList := make([]string, 0, len(parents))
+	for d := range parents {
+		parentList = append(parentList, d)
+	}
+	sort.Strings(parentList)
+	for _, d := range parentList {
+		if err := env.wc.Workspace.MkdirsByPath(ctx, path.Join(remoteDir, d)); err != nil {
+			tb.Fatalf("mkdir %s: %v", d, err)
+		}
+	}
 	type job struct {
 		rel  string
 		body []byte
@@ -108,7 +182,7 @@ func populate(tb testing.TB, env *benchEnv, remoteDir string, items map[string][
 		jobs <- job{r, b}
 	}
 	close(jobs)
-	var wg sync.WaitGroup
+	var wg stdsync.WaitGroup
 	var failed atomic.Int64
 	for i := 0; i < 16; i++ {
 		wg.Go(func() {
@@ -120,12 +194,12 @@ func populate(tb testing.TB, env *benchEnv, remoteDir string, items map[string][
 		})
 	}
 	wg.Wait()
-	if failed.Load() > 0 {
-		tb.Logf("warning: %d uploads failed during populate", failed.Load())
+	if f := failed.Load(); f > 0 {
+		tb.Logf("warning: %d uploads failed during populate", f)
 	}
 }
 
-// generators returns a body-by-kind table. The bodies are sized to roughly
+// generators returns body generators for each content kind, sized roughly to
 // 200 bytes so list-repo response time isn't dominated by per-file content.
 func generators() map[string]func(i int) (suffix string, body []byte) {
 	pad := func(s string, n int) []byte {
@@ -158,9 +232,105 @@ func generators() map[string]func(i int) (suffix string, body []byte) {
 	}
 }
 
-// BenchmarkListRepoByCount measures /workspace/list-repo cost at varying file
-// counts (all plain files), to characterize how the call scales with N.
-func BenchmarkListRepoByCount(b *testing.B) {
+// itemsForCell builds the (relPath → body) map for a (shape, n, kindKey) cell.
+func itemsForCell(shape treeShape, n int, kindKey string) map[string][]byte {
+	gen := generators()[kindKey]
+	paths := generatePaths(shape, n)
+	out := make(map[string][]byte, len(paths))
+	for i, rel := range paths {
+		suf, body := gen(i)
+		out[rel+suf] = body
+	}
+	return out
+}
+
+// itemsMixed builds a map where each file is a different kind in a fixed
+// rotation. Used by the content-mix benchmark.
+func itemsMixed(shape treeShape, n int) map[string][]byte {
+	gens := generators()
+	kinds := []string{"file", "py-notebook", "ipynb", "dashboard", "sql-notebook"}
+	paths := generatePaths(shape, n)
+	out := make(map[string][]byte, len(paths))
+	for i, rel := range paths {
+		suf, body := gens[kinds[i%len(kinds)]](i)
+		out[rel+suf] = body
+	}
+	return out
+}
+
+// ----- parallel-walk runner (test-only alternative to list-repo) ------------
+
+// parallelWalk lists everything under root by issuing non-recursive
+// /api/2.0/workspace/list calls and recursing into directories from the
+// client side, with up to `workers` outstanding calls in flight. It serves as
+// a baseline to compare against the recursive list-repo endpoint.
+//
+// Test-only: we do not ship this in production. The list-repo path is
+// strictly faster for any nested tree (see benchmark numbers).
+func parallelWalk(ctx context.Context, c *apiclient.DatabricksClient, root string, workers int) ([]filer.RemoteFileMetadata, error) {
+	type listObject struct {
+		ObjectType       string `json:"object_type"`
+		Path             string `json:"path"`
+		ContentSHA256Hex string `json:"content_sha256_hex"`
+		HasWsfsMetadata  bool   `json:"has_wsfs_metadata"`
+	}
+	type listResp struct {
+		Objects []listObject `json:"objects"`
+	}
+
+	sem := make(chan struct{}, workers)
+	var (
+		mu       stdsync.Mutex
+		results  []filer.RemoteFileMetadata
+		firstErr error
+		wg       stdsync.WaitGroup
+	)
+
+	var walk func(dir string)
+	walk = func(dir string) {
+		defer wg.Done()
+		sem <- struct{}{}
+		defer func() { <-sem }()
+
+		var resp listResp
+		body := map[string]any{"path": dir, "return_wsfs_metadata": true}
+		if err := c.Do(ctx, http.MethodGet, "/api/2.0/workspace/list", nil, nil, body, &resp); err != nil {
+			mu.Lock()
+			if firstErr == nil {
+				firstErr = err
+			}
+			mu.Unlock()
+			return
+		}
+		for _, o := range resp.Objects {
+			if o.ObjectType == "DIRECTORY" {
+				wg.Add(1)
+				go walk(o.Path)
+				continue
+			}
+			if o.ContentSHA256Hex == "" {
+				continue
+			}
+			mu.Lock()
+			results = append(results, filer.RemoteFileMetadata{
+				Path:             o.Path,
+				ContentSHA256Hex: o.ContentSHA256Hex,
+				ObjectType:       o.ObjectType,
+			})
+			mu.Unlock()
+		}
+	}
+	wg.Add(1)
+	go walk(root)
+	wg.Wait()
+	return results, firstErr
+}
+
+// ----- benchmarks ----------------------------------------------------------
+
+// BenchmarkListRepo measures /workspace/list-repo cost vs (shape, N) at fixed
+// content (plain files).
+func BenchmarkListRepo(b *testing.B) {
 	env := newBenchEnv(b)
 	wfc, err := filer.NewWorkspaceFilesClient(env.wc, env.root)
 	if err != nil {
@@ -168,31 +338,26 @@ func BenchmarkListRepoByCount(b *testing.B) {
 	}
 	lister := wfc.(*filer.WorkspaceFilesClient)
 
-	gen := generators()["file"]
-	for _, n := range []int{10, 100, 500, 1000} {
-		b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) {
-			dir := path.Join(env.root, fmt.Sprintf("count-%d", n))
-			items := make(map[string][]byte, n)
-			for i := 0; i < n; i++ {
-				suf, body := gen(i)
-				items[fmt.Sprintf("file-%05d%s", i, suf)] = body
-			}
-			populate(b, env, dir, items)
-			// Warm-up
-			_, _ = lister.ListWithSHAs(context.Background(), dir)
-			b.ResetTimer()
-			for i := 0; i < b.N; i++ {
-				if _, err := lister.ListWithSHAs(context.Background(), dir); err != nil {
-					b.Fatalf("list: %v", err)
+	counts := []int{10, 100, 500}
+	for _, shape := range shapes {
+		for _, n := range counts {
+			b.Run(fmt.Sprintf("shape=%s/N=%d", shape.name, n), func(b *testing.B) {
+				dir := path.Join(env.root, fmt.Sprintf("listrepo-%s-%d", shape.name, n))
+				populate(b, env, dir, itemsForCell(shape, n, "file"))
+				_, _ = lister.ListWithSHAs(context.Background(), dir)
+				b.ResetTimer()
+				for i := 0; i < b.N; i++ {
+					if _, err := lister.ListWithSHAs(context.Background(), dir); err != nil {
+						b.Fatalf("list: %v", err)
+					}
 				}
-			}
-		})
+			})
+		}
 	}
 }
 
-// BenchmarkListRepoByContent measures /workspace/list-repo cost when the
-// directory has different mixes of object types. Each scenario has a fixed
-// number of files; only the content mix varies.
+// BenchmarkListRepoByContent measures /workspace/list-repo cost across
+// content mixes, at every shape, fixed N=200.
 func BenchmarkListRepoByContent(b *testing.B) {
 	const N = 200
 	env := newBenchEnv(b)
@@ -202,28 +367,24 @@ func BenchmarkListRepoByContent(b *testing.B) {
 	}
 	lister := wfc.(*filer.WorkspaceFilesClient)
 
-	gen := generators()
-	scenarios := map[string]func(i int) (string, []byte){
-		"all-files":         gen["file"],
-		"all-py-notebooks":  gen["py-notebook"],
-		"all-ipynb":         gen["ipynb"],
-		"all-sql-notebooks": gen["sql-notebook"],
-		"all-dashboards":    gen["dashboard"],
-	}
-	mixedKinds := []string{"file", "py-notebook", "ipynb", "dashboard", "sql-notebook"}
-	scenarios["mixed"] = func(i int) (string, []byte) {
-		return gen[mixedKinds[i%len(mixedKinds)]](i)
-	}
-
-	for name, g := range scenarios {
-		b.Run(name, func(b *testing.B) {
-			dir := path.Join(env.root, "content-"+name)
-			items := make(map[string][]byte, N)
-			for i := 0; i < N; i++ {
-				suf, body := g(i)
-				items[fmt.Sprintf("item-%05d%s", i, suf)] = body
-			}
-			populate(b, env, dir, items)
+	contents := []string{"file", "py-notebook", "ipynb", "sql-notebook", "dashboard"}
+	for _, shape := range shapes {
+		for _, kind := range contents {
+			b.Run(fmt.Sprintf("shape=%s/content=%s", shape.name, kind), func(b *testing.B) {
+				dir := path.Join(env.root, fmt.Sprintf("content-%s-%s", shape.name, kind))
+				populate(b, env, dir, itemsForCell(shape, N, kind))
+				_, _ = lister.ListWithSHAs(context.Background(), dir)
+				b.ResetTimer()
+				for i := 0; i < b.N; i++ {
+					if _, err := lister.ListWithSHAs(context.Background(), dir); err != nil {
+						b.Fatalf("list: %v", err)
+					}
+				}
+			})
+		}
+		b.Run(fmt.Sprintf("shape=%s/content=mixed", shape.name), func(b *testing.B) {
+			dir := path.Join(env.root, fmt.Sprintf("content-%s-mixed", shape.name))
+			populate(b, env, dir, itemsMixed(shape, N))
 			_, _ = lister.ListWithSHAs(context.Background(), dir)
 			b.ResetTimer()
 			for i := 0; i < b.N; i++ {
@@ -235,75 +396,118 @@ func BenchmarkListRepoByContent(b *testing.B) {
 	}
 }
 
-// BenchmarkSyncRunOnceColdSnapshot times an end-to-end Sync.RunOnce against
-// a workspace that already has the bundle's files (the CI-cold-runner case).
-// Sub-benchmarks compare with-Layer-3 vs without-Layer-3 to show the speedup
-// the remote-SHA filter delivers when nothing has actually changed.
-func BenchmarkSyncRunOnceColdSnapshot(b *testing.B) {
+// BenchmarkListWalkers compares /workspace/list-repo (recursive, server-side)
+// against the test-only parallel client-side walk, across (shape, N, workers).
+// Plain-file content; the goal is to characterize the listing strategies
+// themselves.
+func BenchmarkListWalkers(b *testing.B) {
 	env := newBenchEnv(b)
-	user, err := env.wc.CurrentUser.Me(context.Background())
+	wfc, err := filer.NewWorkspaceFilesClient(env.wc, env.root)
 	if err != nil {
-		b.Fatalf("Me: %v", err)
+		b.Fatalf("filer: %v", err)
 	}
+	lister := wfc.(*filer.WorkspaceFilesClient)
+
+	counts := []int{100, 500}
+	workerCounts := []int{8, 32}
+	for _, shape := range shapes {
+		for _, n := range counts {
+			dir := path.Join(env.root, fmt.Sprintf("walkers-%s-%d", shape.name, n))
+			populate(b, env, dir, itemsForCell(shape, n, "file"))
+			_, _ = lister.ListWithSHAs(context.Background(), dir)
 
-	for _, n := range []int{20, 100, 500} {
-		b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) {
-			localDir := b.TempDir()
-			gen := generators()["file"]
-			for i := 0; i < n; i++ {
-				suf, body := gen(i)
-				p := filepath.Join(localDir, fmt.Sprintf("file-%05d%s", i, suf))
-				if err := os.WriteFile(p, body, 0o644); err != nil {
-					b.Fatalf("write %s: %v", p, err)
+			b.Run(fmt.Sprintf("shape=%s/N=%d/impl=list-repo", shape.name, n), func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					if _, err := lister.ListWithSHAs(context.Background(), dir); err != nil {
+						b.Fatalf("list-repo: %v", err)
+					}
 				}
+			})
+			for _, w := range workerCounts {
+				w := w
+				b.Run(fmt.Sprintf("shape=%s/N=%d/impl=parallel-w%d", shape.name, n, w), func(b *testing.B) {
+					for i := 0; i < b.N; i++ {
+						if _, err := parallelWalk(context.Background(), env.apiClient, dir, w); err != nil {
+							b.Fatalf("parallel-walk: %v", err)
+						}
+					}
+				})
 			}
-			remoteDir := path.Join(env.root, fmt.Sprintf("sync-N%d", n))
-			if err := env.wc.Workspace.MkdirsByPath(context.Background(), remoteDir); err != nil {
-				b.Fatalf("mkdir: %v", err)
-			}
+		}
+	}
+}
 
-			runOnce := func(b *testing.B, withLayer3 bool) {
-				_ = os.RemoveAll(filepath.Join(localDir, ".databricks"))
-				localRoot := vfs.MustNew(localDir)
-				snapBase := b.TempDir()
-				s, err := New(context.Background(), SyncOptions{
-					WorktreeRoot:     localRoot,
-					LocalRoot:        localRoot,
-					Paths:            []string{"."},
-					RemotePath:       remoteDir,
-					SnapshotBasePath: snapBase,
-					WorkspaceClient:  env.wc,
-					CurrentUser:      user,
-				})
-				if err != nil {
-					b.Fatalf("Sync.New: %v", err)
+// BenchmarkSyncRunOnceColdSnapshot times an end-to-end Sync.RunOnce against a
+// pre-warmed remote (the CI-cold-runner case). Sub-benchmarks compare with-
+// and without-Layer-3 across (shape, N) cells. Plain-file content.
+func BenchmarkSyncRunOnceColdSnapshot(b *testing.B) {
+	env := newBenchEnv(b)
+	user, err := env.wc.CurrentUser.Me(context.Background())
+	if err != nil {
+		b.Fatalf("Me: %v", err)
+	}
+
+	counts := []int{20, 100}
+	for _, shape := range shapes {
+		for _, n := range counts {
+			b.Run(fmt.Sprintf("shape=%s/N=%d", shape.name, n), func(b *testing.B) {
+				localDir := b.TempDir()
+				gen := generators()["file"]
+				for i, rel := range generatePaths(shape, n) {
+					suf, body := gen(i)
+					p := filepath.Join(localDir, filepath.FromSlash(rel)+suf)
+					if err := os.MkdirAll(filepath.Dir(p), 0o755); err != nil {
+						b.Fatalf("mkdir: %v", err)
+					}
+					if err := os.WriteFile(p, body, 0o644); err != nil {
+						b.Fatalf("write %s: %v", p, err)
+					}
 				}
-				if !withLayer3 {
-					s.remoteFilter = nil
+				remoteDir := path.Join(env.root, fmt.Sprintf("sync-%s-%d", shape.name, n))
+				if err := env.wc.Workspace.MkdirsByPath(context.Background(), remoteDir); err != nil {
+					b.Fatalf("mkdir: %v", err)
 				}
-				if _, err := s.RunOnce(context.Background()); err != nil {
-					b.Fatalf("RunOnce: %v", err)
+
+				runOnce := func(b *testing.B, withLayer3 bool) {
+					_ = os.RemoveAll(filepath.Join(localDir, ".databricks"))
+					localRoot := vfs.MustNew(localDir)
+					snapBase := b.TempDir()
+					s, err := New(context.Background(), SyncOptions{
+						WorktreeRoot:     localRoot,
+						LocalRoot:        localRoot,
+						Paths:            []string{"."},
+						RemotePath:       remoteDir,
+						SnapshotBasePath: snapBase,
+						WorkspaceClient:  env.wc,
+						CurrentUser:      user,
+					})
+					if err != nil {
+						b.Fatalf("Sync.New: %v", err)
+					}
+					if !withLayer3 {
+						s.remoteFilter = nil
+					}
+					if _, err := s.RunOnce(context.Background()); err != nil {
+						b.Fatalf("RunOnce: %v", err)
+					}
+					s.Close()
 				}
-				s.Close()
-			}
 
-			// Pre-warm: one initial sync so the workspace has the files.
-			runOnce(b, true)
-			b.ResetTimer()
+				// Pre-warm so the workspace already has every file.
+				runOnce(b, true)
+				b.ResetTimer()
 
-			b.Run("with-layer3", func(b *testing.B) {
-				for i := 0; i < b.N; i++ {
-					runOnce(b, true)
-				}
-			})
-			b.Run("without-layer3", func(b *testing.B) {
-				for i := 0; i < b.N; i++ {
-					runOnce(b, false)
-				}
+				b.Run("with-layer3", func(b *testing.B) {
+					for i := 0; i < b.N; i++ {
+						runOnce(b, true)
+					}
+				})
+				b.Run("without-layer3", func(b *testing.B) {
+					for i := 0; i < b.N; i++ {
+						runOnce(b, false)
+					}
+				})
 			})
-		})
+		}
 	}
 }
-
-// keep tests in this file linkable even without the bench tag
-var _ = bytes.NewReader

From 4acd80d8bd4c3b4ed775f67fbcad210e18d21d3d Mon Sep 17 00:00:00 2001
From: Shreyas Goenka <shreyas.goenka@databricks.com>
Date: Wed, 6 May 2026 10:41:22 +0000
Subject: [PATCH 7/8] sync(bench): add minimal walker comparison

BenchmarkWalkers compares /workspace/list-repo against the parallel
list-walker across four leaf counts (1, 4, 16, 64) at full occupancy
(one file per leaf). Reuses helpers from workspace_bench_test.go.

Run:

  go test -tags benchworkspace -bench=BenchmarkWalkers$ -benchtime=3x -timeout=15m ./libs/sync/

Co-authored-by: Isaac
---
 libs/sync/walkers_bench_test.go | 68 +++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 libs/sync/walkers_bench_test.go

diff --git a/libs/sync/walkers_bench_test.go b/libs/sync/walkers_bench_test.go
new file mode 100644
index 00000000000..2fa8c1958df
--- /dev/null
+++ b/libs/sync/walkers_bench_test.go
@@ -0,0 +1,68 @@
+//go:build benchworkspace
+
+// BenchmarkWalkers — minimal head-to-head comparison of /workspace/list-repo
+// (recursive, server-side) vs a client-side parallel walker over the
+// non-recursive /workspace/list endpoint.
+//
+// One cell per leaf-count: 1, 4, 16, 64. Full occupancy — exactly one file
+// per leaf directory, so total files equals leaf count and every leaf is
+// populated. Two sub-benches per cell: list-repo vs parallel-walk (8 workers).
+//
+// Run with 3 iterations per cell:
+//
+//	export DATABRICKS_BENCH_PROFILE=<profile>
+//	export DATABRICKS_BENCH_USER=<email>
+//	go test -tags benchworkspace -bench=BenchmarkWalkers$ -benchtime=3x -timeout=15m ./libs/sync/
+
+package sync
+
+import (
+	"context"
+	"path"
+	"testing"
+
+	"github.com/databricks/cli/libs/filer"
+)
+
+func BenchmarkWalkers(b *testing.B) {
+	env := newBenchEnv(b)
+	wfc, err := filer.NewWorkspaceFilesClient(env.wc, env.root)
+	if err != nil {
+		b.Fatalf("filer: %v", err)
+	}
+	lister := wfc.(*filer.WorkspaceFilesClient)
+
+	// Shapes are sized so every leaf dir gets exactly one file at full
+	// occupancy (files = leaves).
+	cases := []struct {
+		name   string
+		shape  treeShape
+		leaves int
+	}{
+		{"leaves=1", treeShape{"flat", 0, 0}, 1},
+		{"leaves=4", treeShape{"small", 2, 2}, 4},
+		{"leaves=16", treeShape{"medium", 4, 2}, 16},
+		{"leaves=64", treeShape{"large", 6, 2}, 64},
+	}
+
+	for _, c := range cases {
+		dir := path.Join(env.root, "walkers-"+c.name)
+		populate(b, env, dir, itemsForCell(c.shape, c.leaves, "file"))
+		_, _ = lister.ListWithSHAs(context.Background(), dir)
+
+		b.Run(c.name+"/list-repo", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				if _, err := lister.ListWithSHAs(context.Background(), dir); err != nil {
+					b.Fatalf("list-repo: %v", err)
+				}
+			}
+		})
+		b.Run(c.name+"/parallel-walk", func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				if _, err := parallelWalk(context.Background(), env.apiClient, dir, 8); err != nil {
+					b.Fatalf("parallel-walk: %v", err)
+				}
+			}
+		})
+	}
+}

From 1f89453ad8b5be598f0ecfa249c6f427499ff27e Mon Sep 17 00:00:00 2001
From: Shreyas Goenka <shreyas.goenka@databricks.com>
Date: Wed, 6 May 2026 10:49:34 +0000
Subject: [PATCH 8/8] sync(bench): round-robin walker bench with persistent
 fixture; parallelize mkdirs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BenchmarkWalkers now plants exactly 100 files in every cell (1, 4, 16,
64 leaves), distributing them round-robin: file 0 → leaf 0, file 1 →
leaf 1, …, rolling over once every leaf has one file. Total file count
is constant; only depth/breadth changes.

The scaffold path is hardcoded at
  /Users/$DATABRICKS_BENCH_USER/.tmp/sync-bench-walkers-fixture
and is not torn down between runs. ensureWalkersFixture skips the
scaffold call when the per-cell directory already has the expected
file count, so a second run reuses the workspace state. Cold run is
~80 s; warm re-run is ~45 s for the same 24 measurements.

Also parallelize the mkdir loop in populate() — the directory pre-
creation was the serial bottleneck for deep shapes (depth=6 with 64
leaves did 64 sequential mkdirs at ~150 ms each). Now uses the same
16-worker pool the upload loop already used.

Co-authored-by: Isaac
---
 libs/sync/walkers_bench_test.go   | 134 ++++++++++++++++++++++++++----
 libs/sync/workspace_bench_test.go |  37 +++++++--
 2 files changed, 144 insertions(+), 27 deletions(-)

diff --git a/libs/sync/walkers_bench_test.go b/libs/sync/walkers_bench_test.go
index 2fa8c1958df..d6103587117 100644
--- a/libs/sync/walkers_bench_test.go
+++ b/libs/sync/walkers_bench_test.go
@@ -4,9 +4,21 @@
 // (recursive, server-side) vs a client-side parallel walker over the
 // non-recursive /workspace/list endpoint.
 //
-// One cell per leaf-count: 1, 4, 16, 64. Full occupancy — exactly one file
-// per leaf directory, so total files equals leaf count and every leaf is
-// populated. Two sub-benches per cell: list-repo vs parallel-walk (8 workers).
+// Always 100 files, distributed round-robin across the available leaves of
+// each tree shape. Total file count is constant; only depth/breadth changes:
+//
+//	leaves=1   → 100 files in one dir         (flat)
+//	leaves=4   → ~25 files per dir, depth 2
+//	leaves=16  → ~6-7 files per dir, depth 4
+//	leaves=64  → ~1-2 files per dir, depth 6
+//
+// The scaffold lives at a hardcoded persistent path:
+//
+//	/Users/$DATABRICKS_BENCH_USER/.tmp/sync-bench-walkers-fixture/
+//
+// First run populates it (~one-time cost). Subsequent runs reuse it, so
+// repeated bench iterations are fast. To force a fresh scaffold, delete the
+// fixture dir from the workspace.
 //
 // Run with 3 iterations per cell:
 //
@@ -18,37 +30,63 @@ package sync
 
 import (
 	"context"
+	"fmt"
+	"os"
 	"path"
 	"testing"
 
 	"github.com/databricks/cli/libs/filer"
+	"github.com/databricks/databricks-sdk-go"
+	apiclient "github.com/databricks/databricks-sdk-go/client"
+	"github.com/databricks/databricks-sdk-go/config"
+)
+
+const (
+	walkersFilesPerCell = 100
+	walkersFixtureName  = "sync-bench-walkers-fixture"
 )
 
 func BenchmarkWalkers(b *testing.B) {
-	env := newBenchEnv(b)
-	wfc, err := filer.NewWorkspaceFilesClient(env.wc, env.root)
+	profile := os.Getenv("DATABRICKS_BENCH_PROFILE")
+	user := os.Getenv("DATABRICKS_BENCH_USER")
+	if profile == "" || user == "" {
+		b.Skip("DATABRICKS_BENCH_PROFILE and DATABRICKS_BENCH_USER must be set")
+	}
+	wc, err := databricks.NewWorkspaceClient(&databricks.Config{Profile: profile})
+	if err != nil {
+		b.Fatalf("workspace client: %v", err)
+	}
+	apiC, err := apiclient.New(&config.Config{Profile: profile})
+	if err != nil {
+		b.Fatalf("api client: %v", err)
+	}
+
+	fixtureRoot := fmt.Sprintf("/Users/%s/.tmp/%s", user, walkersFixtureName)
+	if err := wc.Workspace.MkdirsByPath(context.Background(), fixtureRoot); err != nil {
+		b.Fatalf("mkdir fixture root: %v", err)
+	}
+	env := &benchEnv{wc: wc, apiClient: apiC, username: user, root: fixtureRoot}
+
+	wfc, err := filer.NewWorkspaceFilesClient(wc, fixtureRoot)
 	if err != nil {
 		b.Fatalf("filer: %v", err)
 	}
 	lister := wfc.(*filer.WorkspaceFilesClient)
 
-	// Shapes are sized so every leaf dir gets exactly one file at full
-	// occupancy (files = leaves).
 	cases := []struct {
-		name   string
-		shape  treeShape
-		leaves int
+		name  string
+		shape treeShape
 	}{
-		{"leaves=1", treeShape{"flat", 0, 0}, 1},
-		{"leaves=4", treeShape{"small", 2, 2}, 4},
-		{"leaves=16", treeShape{"medium", 4, 2}, 16},
-		{"leaves=64", treeShape{"large", 6, 2}, 64},
+		{"leaves=1", treeShape{"flat", 0, 0}},
+		{"leaves=4", treeShape{"small", 2, 2}},
+		{"leaves=16", treeShape{"medium", 4, 2}},
+		{"leaves=64", treeShape{"large", 6, 2}},
 	}
 
 	for _, c := range cases {
-		dir := path.Join(env.root, "walkers-"+c.name)
-		populate(b, env, dir, itemsForCell(c.shape, c.leaves, "file"))
-		_, _ = lister.ListWithSHAs(context.Background(), dir)
+		dir := path.Join(fixtureRoot, c.name)
+		items := walkersItemsRoundRobin(c.shape, walkersFilesPerCell)
+		ensureWalkersFixture(b, env, lister, dir, items)
 
 		b.Run(c.name+"/list-repo", func(b *testing.B) {
 			for i := 0; i < b.N; i++ {
@@ -59,10 +97,70 @@ func BenchmarkWalkers(b *testing.B) {
 		})
 		b.Run(c.name+"/parallel-walk", func(b *testing.B) {
 			for i := 0; i < b.N; i++ {
-				if _, err := parallelWalk(context.Background(), env.apiClient, dir, 8); err != nil {
+				if _, err := parallelWalk(context.Background(), apiC, dir, 8); err != nil {
 					b.Fatalf("parallel-walk: %v", err)
 				}
 			}
 		})
 	}
 }
+
+// walkersItemsRoundRobin generates n file paths distributed round-robin
+// across the leaves of the given shape (file 0 → leaf 0, file 1 → leaf 1, …,
+// rolling over once every leaf has one file). Returns a (relPath → body)
+// map suitable for passing to populate().
+func walkersItemsRoundRobin(shape treeShape, n int) map[string][]byte {
+	gen := generators()["file"]
+	leaves := walkersLeafPaths(shape)
+	out := make(map[string][]byte, n)
+	for i := 0; i < n; i++ {
+		leaf := leaves[i%len(leaves)]
+		suf, body := gen(i)
+		rel := path.Join(leaf, fmt.Sprintf("f-%d%s", i, suf))
+		out[rel] = body
+	}
+	return out
+}
+
+// walkersLeafPaths returns every leaf-dir path for the given shape, in DFS
+// order. For shape=flat returns [""], so files land directly under the cell
+// root.
+func walkersLeafPaths(shape treeShape) []string {
+	if shape.depth == 0 {
+		return []string{""}
+	}
+	var paths []string
+	var walk func(prefix string, d int)
+	walk = func(prefix string, d int) {
+		if d == 0 {
+			paths = append(paths, prefix)
+			return
+		}
+		for b := 0; b < shape.branch; b++ {
+			walk(path.Join(prefix, fmt.Sprintf("d%d", b)), d-1)
+		}
+	}
+	walk("", shape.depth)
+	return paths
+}
+
+// ensureWalkersFixture populates dir with items only if the existing fixture
+// doesn't already match. Existence check is by file count under dir (using
+// list-repo). Lets repeat bench runs reuse the scaffolding.
+func ensureWalkersFixture(b *testing.B, env *benchEnv, lister *filer.WorkspaceFilesClient, dir string, items map[string][]byte) {
+	existing, err := lister.ListWithSHAs(context.Background(), dir)
+	if err == nil {
+		fileCount := 0
+		for _, o := range existing {
+			if o.ContentSHA256Hex != "" {
+				fileCount++
+			}
+		}
+		if fileCount == len(items) {
+			b.Logf("fixture %s reused (%d files)", dir, fileCount)
+			return
+		}
+	}
+	b.Logf("scaffolding fixture %s (%d files)", dir, len(items))
+	populate(b, env, dir, items)
+}
diff --git a/libs/sync/workspace_bench_test.go b/libs/sync/workspace_bench_test.go
index 562989674e5..c70bf3d99d4 100644
--- a/libs/sync/workspace_bench_test.go
+++ b/libs/sync/workspace_bench_test.go
@@ -36,9 +36,9 @@ import (
 	"path"
 	"path/filepath"
 	"sort"
+	"strings"
 	stdsync "sync"
 	"sync/atomic"
-	"strings"
 	"testing"
 
 	"github.com/databricks/cli/libs/filer"
@@ -60,10 +60,10 @@ type treeShape struct {
 // shapes ranges from "flat" (no nesting) to "large" (deep tree, many dirs).
 // Tweak these values if you want to explore broader/narrower mixes.
 var shapes = []treeShape{
-	{"flat", 0, 0},     // 1 leaf dir
-	{"small", 2, 2},    // 4 leaf dirs
-	{"medium", 4, 2},   // 16 leaf dirs
-	{"large", 6, 2},    // 64 leaf dirs
+	{"flat", 0, 0},   // 1 leaf dir
+	{"small", 2, 2},  // 4 leaf dirs
+	{"medium", 4, 2}, // 16 leaf dirs
+	{"large", 6, 2},  // 64 leaf dirs
 }
 
 // generatePaths returns n relative file paths arranged into a tree of the
@@ -168,10 +168,29 @@ func populate(tb testing.TB, env *benchEnv, remoteDir string, items map[string][
 		parentList = append(parentList, d)
 	}
 	sort.Strings(parentList)
+
+	// Parallelize the mkdirs — for deep shapes the serial loop dominates
+	// fixture setup time.
+	mkdirJobs := make(chan string, len(parentList))
 	for _, d := range parentList {
-		if err := env.wc.Workspace.MkdirsByPath(ctx, path.Join(remoteDir, d)); err != nil {
-			tb.Fatalf("mkdir %s: %v", d, err)
-		}
+		mkdirJobs <- d
+	}
+	close(mkdirJobs)
+	var mkdirWG stdsync.WaitGroup
+	var mkdirErr atomic.Pointer[error]
+	for i := 0; i < 16; i++ {
+		mkdirWG.Go(func() {
+			for d := range mkdirJobs {
+				if err := env.wc.Workspace.MkdirsByPath(ctx, path.Join(remoteDir, d)); err != nil {
+					e := fmt.Errorf("mkdir %s: %w", d, err)
+					mkdirErr.CompareAndSwap(nil, &e)
+				}
+			}
+		})
+	}
+	mkdirWG.Wait()
+	if e := mkdirErr.Load(); e != nil {
+		tb.Fatalf("%v", *e)
 	}
 	type job struct {
 		rel  string
@@ -338,7 +357,7 @@ func BenchmarkListRepo(b *testing.B) {
 	}
 	lister := wfc.(*filer.WorkspaceFilesClient)
 
-	counts := []int{10, 100, 500}
+	counts := []int{10, 50}
 	for _, shape := range shapes {
 		for _, n := range counts {
 			b.Run(fmt.Sprintf("shape=%s/N=%d", shape.name, n), func(b *testing.B) {