diff --git a/backup/run.go b/backup/run.go index 51e4e64716d..b430f4a46c4 100644 --- a/backup/run.go +++ b/backup/run.go @@ -13,6 +13,7 @@ import ( "net/url" "os" "path/filepath" + "strings" "time" "github.com/golang/glog" @@ -49,7 +50,11 @@ var opt struct { destination string format string verbose bool - upgrade bool // used by export backup command. + upgrade bool // used by export backup command + sinceDate string // date-range filter lower bound (YYYY-MM-DD or RFC3339) + untilDate string // date-range filter upper bound + lastNDays int // shorthand: last N calendar days + summary bool // print summary stats after the listing } func init() { @@ -158,7 +163,15 @@ func initBackupLs() { flag.StringVarP(&opt.location, "location", "l", "", "Sets the source location URI (required).") flag.BoolVar(&opt.verbose, "verbose", false, - "Outputs additional info in backup list.") + "Show groups and drop operations. Reads the full manifest.json instead of the lightweight summary.") + flag.StringVar(&opt.sinceDate, "since-date", "", + "Only show backups on or after this date (YYYY-MM-DD or RFC3339).") + flag.StringVar(&opt.untilDate, "until-date", "", + "Only show backups on or before this date (YYYY-MM-DD or RFC3339).") + flag.IntVar(&opt.lastNDays, "last-n-days", 0, + "Only show backups from the last N calendar days.") + flag.BoolVar(&opt.summary, "summary", false, + "Print summary statistics after the backup listing.") _ = LsBackup.Cmd.MarkFlagRequired("location") } @@ -247,11 +260,70 @@ func runRestoreCmd() error { return nil } +// parseFilterDate parses a date string in YYYY-MM-DD or RFC3339 format. +func parseFilterDate(s string) (time.Time, error) { + if s == "" { + return time.Time{}, fmt.Errorf("date string must not be empty") + } + if t, err := time.Parse("2006-01-02", s); err == nil { + return t.UTC(), nil + } + t, err := time.Parse(time.RFC3339, s) + return t.UTC(), err +} + +// buildLsFilter constructs a BackupDateFilter from lsbackup flag values. +// It validates flag combinations and returns an error for invalid inputs. +func buildLsFilter(sinceDate, untilDate string, lastNDays int) (worker.BackupDateFilter, error) { + filter := worker.BackupDateFilter{} + if lastNDays < 0 { + return filter, fmt.Errorf("--last-n-days must be a positive integer, got %d", lastNDays) + } + if lastNDays > 0 && sinceDate != "" { + return filter, fmt.Errorf("--last-n-days and --since-date are mutually exclusive") + } + if lastNDays > 0 { + since := time.Now().UTC().AddDate(0, 0, -lastNDays).Truncate(24 * time.Hour) + filter.Since = &since + } + if sinceDate != "" { + t, err := parseFilterDate(sinceDate) + if err != nil { + return filter, fmt.Errorf("invalid --since-date value %q: %w", sinceDate, err) + } + filter.Since = &t + } + if untilDate != "" { + t, err := parseFilterDate(untilDate) + if err != nil { + return filter, fmt.Errorf("invalid --until-date value %q: %w", untilDate, err) + } + var end time.Time + if strings.Contains(untilDate, "T") { + // RFC3339 datetime: user gave an exact timestamp, respect it. + end = t + } else { + // Plain date (YYYY-MM-DD): extend to end of that calendar day. + end = t.Add(24*time.Hour - time.Millisecond) + } + filter.Until = &end + } + return filter, nil +} + func runLsbackupCmd() error { - manifests, err := worker.ListBackupManifests(opt.location, nil) + // Validate and build the date filter before doing any I/O. + filter, err := buildLsFilter(opt.sinceDate, opt.untilDate, opt.lastNDays) + if err != nil { + return err + } + + // --verbose reads the full manifest.json to include groups and drop-ops. + manifests, err := worker.ListBackupManifests(opt.location, nil, opt.verbose) if err != nil { return fmt.Errorf("while listing manifests: %w", err) } + manifests = worker.FilterManifestsByDate(manifests, filter) type backupEntry struct { Path string `json:"path"` @@ -269,7 +341,6 @@ func runLsbackupCmd() error { var output backupOutput for _, manifest := range manifests { - be := backupEntry{ Path: manifest.Path, Since: manifest.SinceTsDeprecated, @@ -291,6 +362,29 @@ func runLsbackupCmd() error { } _, _ = os.Stdout.Write(b) fmt.Println() + + if opt.summary { + stats := worker.ComputeBackupListStats(manifests) + if opt.sinceDate != "" || opt.untilDate != "" || opt.lastNDays > 0 { + fmt.Fprintf(os.Stderr, "\n--- Backup Summary (filtered) ---\n") + } else { + fmt.Fprintf(os.Stderr, "\n--- Backup Summary ---\n") + } + fmt.Fprintf(os.Stderr, "Total backups listed : %d\n", stats.Total) + fmt.Fprintf(os.Stderr, "Backup series : %d\n", stats.BackupSeriesCount) + if stats.OldestBackup != nil { + fmt.Fprintf(os.Stderr, "Oldest backup : %s\n", stats.OldestBackup.Format(time.RFC3339)) + } + if stats.NewestBackup != nil { + fmt.Fprintf(os.Stderr, "Newest backup : %s\n", stats.NewestBackup.Format(time.RFC3339)) + } + if stats.LastFullBackup != nil { + fmt.Fprintf(os.Stderr, "Last full backup : %s\n", stats.LastFullBackup.Format(time.RFC3339)) + } + if stats.LastIncrBackup != nil { + fmt.Fprintf(os.Stderr, "Last incr backup : %s\n", stats.LastIncrBackup.Format(time.RFC3339)) + } + } return nil } diff --git a/backup/run_test.go b/backup/run_test.go new file mode 100644 index 00000000000..603a4866336 --- /dev/null +++ b/backup/run_test.go @@ -0,0 +1,203 @@ +/* + * SPDX-FileCopyrightText: © 2017-2025 Istari Digital, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +package backup + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestParseFilterDate(t *testing.T) { + tests := []struct { + name string + input string + wantErr bool + wantYear int + wantMonth time.Month + wantDay int + wantHour int + wantMin int + }{ + { + name: "YYYY-MM-DD", + input: "2026-04-15", + wantYear: 2026, + wantMonth: time.April, + wantDay: 15, + }, + { + name: "RFC3339 UTC", + input: "2026-01-01T00:00:00Z", + wantYear: 2026, + wantMonth: time.January, + wantDay: 1, + }, + { + name: "RFC3339 with positive offset normalised to UTC", + input: "2026-04-15T06:30:00+05:30", // 06:30 IST == 01:00 UTC + wantYear: 2026, + wantMonth: time.April, + wantDay: 15, + wantHour: 1, + wantMin: 0, + }, + { + name: "RFC3339 with negative offset normalised to UTC", + input: "2026-04-15T00:00:00-05:00", // midnight EST == 05:00 UTC + wantYear: 2026, + wantMonth: time.April, + wantDay: 15, + wantHour: 5, + }, + { + name: "empty string returns error", + input: "", + wantErr: true, + }, + { + name: "invalid string", + input: "not-a-date", + wantErr: true, + }, + { + name: "partial date without day", + input: "2026-04", + wantErr: true, + }, + { + name: "RFC3339 without timezone is invalid", + input: "2026-04-15T10:00:00", + wantErr: true, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got, err := parseFilterDate(tc.input) + if tc.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + require.Equal(t, time.UTC, got.Location(), "result must be UTC") + require.Equal(t, tc.wantYear, got.Year()) + require.Equal(t, tc.wantMonth, got.Month()) + require.Equal(t, tc.wantDay, got.Day()) + require.Equal(t, tc.wantHour, got.Hour()) + require.Equal(t, tc.wantMin, got.Minute()) + }) + } +} + +func TestBuildLsFilter(t *testing.T) { + t.Run("negative lastNDays returns error", func(t *testing.T) { + _, err := buildLsFilter("", "", -1) + require.Error(t, err) + require.Contains(t, err.Error(), "--last-n-days") + require.Contains(t, err.Error(), "-1") + }) + + t.Run("lastNDays and sinceDate together return error", func(t *testing.T) { + _, err := buildLsFilter("2026-01-01", "", 7) + require.Error(t, err) + require.Contains(t, err.Error(), "--last-n-days") + require.Contains(t, err.Error(), "--since-date") + }) + + t.Run("invalid sinceDate returns error naming the flag", func(t *testing.T) { + _, err := buildLsFilter("not-a-date", "", 0) + require.Error(t, err) + require.Contains(t, err.Error(), "--since-date") + }) + + t.Run("invalid untilDate returns error naming the flag", func(t *testing.T) { + _, err := buildLsFilter("", "not-a-date", 0) + require.Error(t, err) + require.Contains(t, err.Error(), "--until-date") + }) + + t.Run("all empty returns filter with nil Since and Until", func(t *testing.T) { + f, err := buildLsFilter("", "", 0) + require.NoError(t, err) + require.Nil(t, f.Since) + require.Nil(t, f.Until) + }) + + t.Run("lastNDays=0 does not set Since", func(t *testing.T) { + f, err := buildLsFilter("", "", 0) + require.NoError(t, err) + require.Nil(t, f.Since) + }) + + t.Run("lastNDays=7 sets Since to 7 days ago midnight UTC", func(t *testing.T) { + expected := time.Now().UTC().AddDate(0, 0, -7).Truncate(24 * time.Hour) + f, err := buildLsFilter("", "", 7) + require.NoError(t, err) + require.NotNil(t, f.Since) + require.Nil(t, f.Until) + require.WithinDuration(t, expected, *f.Since, time.Second) + require.Equal(t, 0, f.Since.Hour(), "Since must be at midnight") + }) + + t.Run("sinceDate YYYY-MM-DD sets Since to midnight UTC of that date", func(t *testing.T) { + f, err := buildLsFilter("2026-03-01", "", 0) + require.NoError(t, err) + require.NotNil(t, f.Since) + require.Nil(t, f.Until) + require.Equal(t, 2026, f.Since.Year()) + require.Equal(t, time.March, f.Since.Month()) + require.Equal(t, 1, f.Since.Day()) + require.Equal(t, 0, f.Since.Hour()) + }) + + t.Run("sinceDate RFC3339 uses exact time without midnight forcing", func(t *testing.T) { + f, err := buildLsFilter("2026-03-01T08:30:00Z", "", 0) + require.NoError(t, err) + require.NotNil(t, f.Since) + require.Equal(t, 8, f.Since.Hour()) + require.Equal(t, 30, f.Since.Minute()) + require.Equal(t, 0, f.Since.Second()) + require.Equal(t, time.UTC, f.Since.Location()) + }) + + t.Run("untilDate YYYY-MM-DD extends to end of that calendar day", func(t *testing.T) { + f, err := buildLsFilter("", "2026-03-31", 0) + require.NoError(t, err) + require.Nil(t, f.Since) + require.NotNil(t, f.Until) + // Should be 2026-03-31 23:59:59.999 UTC + require.Equal(t, 2026, f.Until.Year()) + require.Equal(t, time.March, f.Until.Month()) + require.Equal(t, 31, f.Until.Day()) + require.Equal(t, 23, f.Until.Hour()) + require.Equal(t, 59, f.Until.Minute()) + require.Equal(t, 59, f.Until.Second()) + }) + + t.Run("untilDate RFC3339 uses exact time without end-of-day extension", func(t *testing.T) { + f, err := buildLsFilter("", "2026-03-31T12:00:00Z", 0) + require.NoError(t, err) + require.NotNil(t, f.Until) + // Must be exactly 12:00:00, not extended to 23:59:59 + require.Equal(t, 12, f.Until.Hour()) + require.Equal(t, 0, f.Until.Minute()) + require.Equal(t, 0, f.Until.Second()) + }) + + t.Run("sinceDate and untilDate both set", func(t *testing.T) { + f, err := buildLsFilter("2026-01-01", "2026-12-31", 0) + require.NoError(t, err) + require.NotNil(t, f.Since) + require.NotNil(t, f.Until) + require.Equal(t, time.January, f.Since.Month()) + require.Equal(t, 1, f.Since.Day()) + require.Equal(t, time.December, f.Until.Month()) + require.Equal(t, 31, f.Until.Day()) + require.Equal(t, 23, f.Until.Hour()) + }) + +} diff --git a/graphql/admin/endpoints.go b/graphql/admin/endpoints.go index f82853400a8..5e81dba97e9 100644 --- a/graphql/admin/endpoints.go +++ b/graphql/admin/endpoints.go @@ -188,6 +188,27 @@ const adminTypes = ` """ anonymous: Boolean + """ + Only return backups taken on or after this date (YYYY-MM-DD or RFC3339). + """ + sinceDate: String + + """ + Only return backups on or before this date. For YYYY-MM-DD the entire day is + included (up to 23:59:59 UTC). For RFC3339 the exact timestamp is used as the cutoff. + """ + untilDate: String + + """ + Only return backups from the last N calendar days. Cannot be combined with sinceDate. + """ + lastNDays: Int + + """ + When true, reads the full manifest.json and includes predicate groups and drop + operations. Defaults to false, which reads the lightweight manifest_summary.json. + """ + fullManifest: Boolean } type BackupGroup { diff --git a/graphql/admin/list_backups.go b/graphql/admin/list_backups.go index d4e10dcf91a..16ec76b40d1 100644 --- a/graphql/admin/list_backups.go +++ b/graphql/admin/list_backups.go @@ -8,6 +8,8 @@ package admin import ( "context" "encoding/json" + "strings" + "time" "github.com/pkg/errors" @@ -25,6 +27,10 @@ type lsBackupInput struct { SessionToken pb.Sensitive Anonymous bool ForceFull bool + SinceDate string `json:"sinceDate"` + UntilDate string `json:"untilDate"` + LastNDays int `json:"lastNDays"` + FullManifest bool `json:"fullManifest"` } type group struct { @@ -43,22 +49,96 @@ type manifest struct { Encrypted bool `json:"encrypted,omitempty"` } +// parseGraphQLDate parses a YYYY-MM-DD or RFC3339 date string. +func parseGraphQLDate(s string) (time.Time, error) { + if s == "" { + return time.Time{}, errors.Errorf("date string must not be empty") + } + if t, err := time.Parse("2006-01-02", s); err == nil { + return t.UTC(), nil + } + t, err := time.Parse(time.RFC3339, s) + return t.UTC(), err +} + +// buildBackupDateFilter constructs a BackupDateFilter from GraphQL input fields. +func buildBackupDateFilter(input *lsBackupInput) (worker.BackupDateFilter, error) { + filter := worker.BackupDateFilter{} + if input.LastNDays < 0 { + return filter, errors.Errorf("lastNDays must be a positive integer, got %d", input.LastNDays) + } + if input.LastNDays > 0 && input.SinceDate != "" { + return filter, errors.Errorf("lastNDays and sinceDate are mutually exclusive") + } + if input.LastNDays > 0 { + since := time.Now().UTC().AddDate(0, 0, -input.LastNDays).Truncate(24 * time.Hour) + filter.Since = &since + } + if input.SinceDate != "" { + t, err := parseGraphQLDate(input.SinceDate) + if err != nil { + return filter, errors.Errorf("invalid sinceDate %q: %v", input.SinceDate, err) + } + filter.Since = &t + } + if input.UntilDate != "" { + t, err := parseGraphQLDate(input.UntilDate) + if err != nil { + return filter, errors.Errorf("invalid untilDate %q: %v", input.UntilDate, err) + } + var end time.Time + if strings.Contains(input.UntilDate, "T") { + // RFC3339 datetime: user gave an exact timestamp, respect it. + end = t + } else { + // Plain date (YYYY-MM-DD): extend to end of that calendar day. + end = t.Add(24*time.Hour - time.Millisecond) + } + filter.Until = &end + } + return filter, nil +} + +// needsFullManifest returns true when the full manifest.json must be read. +// It is true when the caller explicitly sets fullManifest OR when the query +// selection set includes "groups", so existing queries that request groups +// continue to receive populated data without any input change. +func needsFullManifest(fullManifestFlag bool, selectionSet []schema.Field) bool { + if fullManifestFlag { + return true + } + for _, f := range selectionSet { + if f.Name() == "groups" { + return true + } + } + return false +} + func resolveListBackups(ctx context.Context, q schema.Query) *resolve.Resolved { input, err := getLsBackupInput(q) if err != nil { return resolve.EmptyResult(q, err) } + filter, err := buildBackupDateFilter(input) + if err != nil { + return resolve.EmptyResult(q, err) + } + creds := &x.MinioCredentials{ AccessKey: input.AccessKey, SecretKey: input.SecretKey, SessionToken: input.SessionToken, Anonymous: input.Anonymous, } - manifests, err := worker.ProcessListBackups(ctx, input.Location, creds) + manifests, err := worker.ProcessListBackups(ctx, input.Location, creds, + needsFullManifest(input.FullManifest, q.SelectionSet())) if err != nil { return resolve.EmptyResult(q, errors.Errorf("%s: %s", x.Error, err.Error())) } + manifests = worker.FilterManifestsByDate(manifests, filter) + convertedManifests := convertManifests(manifests) results := make([]map[string]interface{}, 0) diff --git a/graphql/admin/list_backups_test.go b/graphql/admin/list_backups_test.go new file mode 100644 index 00000000000..dec9b69f8bc --- /dev/null +++ b/graphql/admin/list_backups_test.go @@ -0,0 +1,374 @@ +/* + * SPDX-FileCopyrightText: © 2017-2025 Istari Digital, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +package admin + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/dgraph-io/dgraph/v25/graphql/schema" + "github.com/dgraph-io/dgraph/v25/worker" +) + +// stubField implements schema.Field with only Name() defined. +// All other methods panic — they must not be called by needsFullManifest. +type stubField struct { + name string + schema.Field +} + +func (s stubField) Name() string { return s.name } + +func fields(names ...string) []schema.Field { + out := make([]schema.Field, len(names)) + for i, n := range names { + out[i] = stubField{name: n} + } + return out +} + +func TestParseGraphQLDate(t *testing.T) { + tests := []struct { + name string + input string + wantErr bool + wantYear int + wantMonth time.Month + wantDay int + wantHour int + wantMin int + }{ + { + name: "YYYY-MM-DD format", + input: "2026-04-15", + wantYear: 2026, + wantMonth: time.April, + wantDay: 15, + }, + { + name: "RFC3339 UTC", + input: "2026-01-01T00:00:00Z", + wantYear: 2026, + wantMonth: time.January, + wantDay: 1, + }, + { + name: "RFC3339 with positive offset normalised to UTC", + input: "2026-04-15T06:30:00+05:30", // 06:30 IST == 01:00 UTC + wantYear: 2026, + wantMonth: time.April, + wantDay: 15, + wantHour: 1, + wantMin: 0, + }, + { + name: "RFC3339 with negative offset normalised to UTC", + input: "2026-04-15T00:00:00-05:00", // midnight EST == 05:00 UTC + wantYear: 2026, + wantMonth: time.April, + wantDay: 15, + wantHour: 5, + }, + { + name: "invalid date string", + input: "not-a-date", + wantErr: true, + }, + { + name: "partial date without day", + input: "2026-04", + wantErr: true, + }, + { + name: "RFC3339 without timezone is invalid", + input: "2026-04-15T10:00:00", + wantErr: true, + }, + { + name: "empty string", + input: "", + wantErr: true, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got, err := parseGraphQLDate(tc.input) + if tc.wantErr { + require.Error(t, err) + if tc.input == "" { + require.Contains(t, err.Error(), "empty") + } + return + } + require.NoError(t, err) + require.Equal(t, time.UTC, got.Location(), "result must be UTC") + require.Equal(t, tc.wantYear, got.Year()) + require.Equal(t, tc.wantMonth, got.Month()) + require.Equal(t, tc.wantDay, got.Day()) + require.Equal(t, tc.wantHour, got.Hour()) + require.Equal(t, tc.wantMin, got.Minute()) + }) + } +} + +func TestBuildBackupDateFilter(t *testing.T) { + t.Run("no fields set returns empty filter", func(t *testing.T) { + filter, err := buildBackupDateFilter(&lsBackupInput{}) + require.NoError(t, err) + require.Nil(t, filter.Since) + require.Nil(t, filter.Until) + }) + + t.Run("sinceDate YYYY-MM-DD sets Since to midnight UTC", func(t *testing.T) { + filter, err := buildBackupDateFilter(&lsBackupInput{SinceDate: "2026-03-01"}) + require.NoError(t, err) + require.NotNil(t, filter.Since) + require.Equal(t, 2026, filter.Since.Year()) + require.Equal(t, time.March, filter.Since.Month()) + require.Equal(t, 1, filter.Since.Day()) + require.Equal(t, 0, filter.Since.Hour(), "sinceDate must be at midnight UTC") + require.Nil(t, filter.Until) + }) + + t.Run("sinceDate RFC3339 uses exact time", func(t *testing.T) { + filter, err := buildBackupDateFilter(&lsBackupInput{SinceDate: "2026-03-01T08:00:00Z"}) + require.NoError(t, err) + require.NotNil(t, filter.Since) + require.Equal(t, 8, filter.Since.Hour()) + require.Equal(t, 0, filter.Since.Minute()) + }) + + t.Run("untilDate YYYY-MM-DD sets Until to end of that day", func(t *testing.T) { + filter, err := buildBackupDateFilter(&lsBackupInput{UntilDate: "2026-03-31"}) + require.NoError(t, err) + require.Nil(t, filter.Since) + require.NotNil(t, filter.Until) + // Until = 2026-03-31 00:00:00 + 24h - 1ms = 2026-03-31 23:59:59.999 + require.Equal(t, 2026, filter.Until.Year()) + require.Equal(t, time.March, filter.Until.Month()) + require.Equal(t, 31, filter.Until.Day()) + require.Equal(t, 23, filter.Until.Hour()) + require.Equal(t, 59, filter.Until.Minute()) + require.Equal(t, 59, filter.Until.Second()) + }) + + t.Run("untilDate RFC3339 uses exact time without end-of-day extension", func(t *testing.T) { + filter, err := buildBackupDateFilter(&lsBackupInput{UntilDate: "2026-03-31T12:00:00Z"}) + require.NoError(t, err) + require.NotNil(t, filter.Until) + require.Equal(t, 12, filter.Until.Hour()) + require.Equal(t, 0, filter.Until.Minute()) + require.Equal(t, 0, filter.Until.Second()) + }) + + t.Run("lastNDays=7 sets Since to 7 days ago midnight UTC", func(t *testing.T) { + expected := time.Now().UTC().AddDate(0, 0, -7).Truncate(24 * time.Hour) + filter, err := buildBackupDateFilter(&lsBackupInput{LastNDays: 7}) + require.NoError(t, err) + require.NotNil(t, filter.Since) + require.Nil(t, filter.Until) + require.WithinDuration(t, expected, *filter.Since, time.Second) + require.Equal(t, 0, filter.Since.Hour(), "Since must be at midnight UTC") + }) + + t.Run("lastNDays=0 does not set Since", func(t *testing.T) { + filter, err := buildBackupDateFilter(&lsBackupInput{LastNDays: 0}) + require.NoError(t, err) + require.Nil(t, filter.Since) + }) + + t.Run("lastNDays and sinceDate together return error", func(t *testing.T) { + _, err := buildBackupDateFilter(&lsBackupInput{ + LastNDays: 30, + SinceDate: "2026-01-01", + }) + require.Error(t, err) + require.Contains(t, err.Error(), "sinceDate") + require.Contains(t, err.Error(), "lastNDays") + }) + + t.Run("negative lastNDays returns error", func(t *testing.T) { + _, err := buildBackupDateFilter(&lsBackupInput{LastNDays: -1}) + require.Error(t, err) + require.Contains(t, err.Error(), "lastNDays") + }) + + t.Run("sinceDate and untilDate both set", func(t *testing.T) { + filter, err := buildBackupDateFilter(&lsBackupInput{ + SinceDate: "2026-01-01", + UntilDate: "2026-01-31", + }) + require.NoError(t, err) + require.NotNil(t, filter.Since) + require.NotNil(t, filter.Until) + require.Equal(t, time.January, filter.Since.Month()) + require.Equal(t, 1, filter.Since.Day()) + // Until is 2026-01-31 + 24h - 1ms = 2026-01-31 23:59:59.999 + require.Equal(t, time.January, filter.Until.Month()) + require.Equal(t, 31, filter.Until.Day()) + require.Equal(t, 23, filter.Until.Hour()) + require.Equal(t, 59, filter.Until.Second()) + }) + + t.Run("invalid sinceDate returns error with field name", func(t *testing.T) { + _, err := buildBackupDateFilter(&lsBackupInput{SinceDate: "not-a-date"}) + require.Error(t, err) + require.Contains(t, err.Error(), "sinceDate") + }) + + t.Run("invalid untilDate returns error with field name", func(t *testing.T) { + _, err := buildBackupDateFilter(&lsBackupInput{UntilDate: "not-a-date"}) + require.Error(t, err) + require.Contains(t, err.Error(), "untilDate") + }) +} + +func TestConvertManifests(t *testing.T) { + t.Run("nil input returns empty slice", func(t *testing.T) { + result := convertManifests(nil) + require.Empty(t, result) + }) + + t.Run("empty input returns empty slice", func(t *testing.T) { + result := convertManifests([]*worker.Manifest{}) + require.NotNil(t, result) + require.Equal(t, 0, len(result)) + }) + + t.Run("single full manifest without groups", func(t *testing.T) { + m := &worker.Manifest{ + ManifestBase: worker.ManifestBase{ + Type: "full", + BackupId: "backup-abc", + BackupNum: 1, + Path: "dgraph.20260101.000000.000", + ReadTs: 500, + SinceTsDeprecated: 0, + Encrypted: true, + }, + } + result := convertManifests([]*worker.Manifest{m}) + require.Equal(t, 1, len(result)) + r := result[0] + require.Equal(t, "full", r.Type) + require.Equal(t, "backup-abc", r.BackupId) + require.Equal(t, uint64(1), r.BackupNum) + require.Equal(t, "dgraph.20260101.000000.000", r.Path) + require.Equal(t, uint64(500), r.ReadTs) + require.Equal(t, uint64(0), r.Since) + require.True(t, r.Encrypted) + require.Empty(t, r.Groups) + }) + + t.Run("Since maps from SinceTsDeprecated", func(t *testing.T) { + m := &worker.Manifest{ + ManifestBase: worker.ManifestBase{ + Type: "incremental", + BackupId: "id1", + BackupNum: 2, + ReadTs: 800, + SinceTsDeprecated: 500, + }, + } + result := convertManifests([]*worker.Manifest{m}) + require.Equal(t, 1, len(result)) + require.Equal(t, uint64(500), result[0].Since) + require.Equal(t, uint64(800), result[0].ReadTs) + }) + + t.Run("manifest with multiple groups", func(t *testing.T) { + m := &worker.Manifest{ + ManifestBase: worker.ManifestBase{ + Type: "incremental", + BackupId: "backup-def", + BackupNum: 2, + Path: "dgraph.20260115.000000.000", + ReadTs: 800, + }, + Groups: map[uint32][]string{ + 1: {"0-name", "0-age"}, + 2: {"0-email"}, + }, + } + result := convertManifests([]*worker.Manifest{m}) + require.Equal(t, 1, len(result)) + r := result[0] + require.Equal(t, 2, len(r.Groups)) + + groupMap := make(map[uint32][]string) + for _, g := range r.Groups { + groupMap[g.GroupId] = g.Predicates + } + require.ElementsMatch(t, []string{"0-name", "0-age"}, groupMap[1]) + require.ElementsMatch(t, []string{"0-email"}, groupMap[2]) + }) + + t.Run("manifest with nil groups yields empty Groups slice", func(t *testing.T) { + m := &worker.Manifest{ + ManifestBase: worker.ManifestBase{ + Type: "full", BackupId: "id2", BackupNum: 1, + }, + Groups: nil, + } + result := convertManifests([]*worker.Manifest{m}) + require.Equal(t, 1, len(result)) + require.Empty(t, result[0].Groups) + }) + + t.Run("multiple manifests preserves order and fields", func(t *testing.T) { + + manifests := []*worker.Manifest{ + { + ManifestBase: worker.ManifestBase{ + Type: "full", BackupId: "id1", BackupNum: 1, ReadTs: 100, + Path: "dgraph.20260101.000000.000", + }, + }, + { + ManifestBase: worker.ManifestBase{ + Type: "incremental", BackupId: "id1", BackupNum: 2, ReadTs: 200, + SinceTsDeprecated: 100, + Path: "dgraph.20260115.000000.000", + }, + }, + } + result := convertManifests(manifests) + require.Equal(t, 2, len(result)) + require.Equal(t, "full", result[0].Type) + require.Equal(t, uint64(100), result[0].ReadTs) + require.Equal(t, "incremental", result[1].Type) + require.Equal(t, uint64(200), result[1].ReadTs) + require.Equal(t, uint64(100), result[1].Since) + }) +} + +func TestNeedsFullManifest(t *testing.T) { + t.Run("false when flag is false and groups not in selection", func(t *testing.T) { + require.False(t, needsFullManifest(false, fields("backupId", "type", "path"))) + }) + + t.Run("true when fullManifest flag is true regardless of selection", func(t *testing.T) { + require.True(t, needsFullManifest(true, fields("backupId", "type"))) + }) + + t.Run("true when groups is in selection even without fullManifest flag", func(t *testing.T) { + require.True(t, needsFullManifest(false, fields("backupId", "type", "groups"))) + }) + + t.Run("true when groups is the only selected field", func(t *testing.T) { + require.True(t, needsFullManifest(false, fields("groups"))) + }) + + t.Run("false when selection is empty and flag is false", func(t *testing.T) { + require.False(t, needsFullManifest(false, fields())) + }) + + t.Run("false when selection has similar but not exact name", func(t *testing.T) { + require.False(t, needsFullManifest(false, fields("groupId", "backupGroups"))) + }) +} diff --git a/systest/backup/encryption/backup_test.go b/systest/backup/encryption/backup_test.go index 9d064707b13..fa7e72683a5 100644 --- a/systest/backup/encryption/backup_test.go +++ b/systest/backup/encryption/backup_test.go @@ -375,7 +375,10 @@ func copyToLocalFs(t *testing.T) { minio.ListObjectsOptions{Prefix: "", Recursive: false}) for object := range objectCh1 { require.NoError(t, object.Err) - if object.Key != "manifest.json" { + // Only create local dirs for Minio directory prefixes (keys ending with "/"). + // Root-level files like manifest.json and manifest_summary.json must not be + // treated as directories. + if strings.HasSuffix(object.Key, "/") { dstDir := backupDir + "/" + object.Key require.NoError(t, os.MkdirAll(dstDir, os.ModePerm)) } diff --git a/systest/backup/filesystem/backup_test.go b/systest/backup/filesystem/backup_test.go index 5a9534050c6..894878116b7 100644 --- a/systest/backup/filesystem/backup_test.go +++ b/systest/backup/filesystem/backup_test.go @@ -449,6 +449,701 @@ func runBackupInternal(t *testing.T, forceFull bool, numExpectedFiles, return dirs } +// TestBackupSummaryManifest verifies that a backup writes manifest_summary.json +// alongside manifest.json, that the summary omits predicate groups, and that +// ListBackupManifests uses the summary when it is available. +func TestBackupSummaryManifest(t *testing.T) { + common.DirSetup(t) + defer common.DirCleanup(t) + + conn, err := grpc.NewClient(testutil.GetSockAddr(), + grpc.WithTransportCredentials(credentials.NewTLS(testutil.GetAlphaClientConfig(t)))) + require.NoError(t, err) + dg := dgo.NewDgraphClient(api.NewDgraphClient(conn)) + + ctx := context.Background() + require.NoError(t, dg.Alter(ctx, &api.Operation{DropAll: true})) + require.NoError(t, dg.Alter(ctx, &api.Operation{ + Schema: `name: string @index(hash) .`, + })) + _, err = dg.NewTxn().Mutate(ctx, &api.Mutation{ + CommitNow: true, + SetNquads: []byte(`<_:x1> "summary-test" .`), + }) + require.NoError(t, err) + + // Take a single full backup. + _ = runBackup(t, 3, 1) + + // manifest_summary.json must be present alongside manifest.json. + summaryFile := filepath.Join(copyBackupDir, "manifest_summary.json") + require.FileExists(t, summaryFile, "manifest_summary.json was not created by backup") + + raw, err := os.ReadFile(summaryFile) + require.NoError(t, err) + + var summary worker.MasterManifestSummary + require.NoError(t, json.Unmarshal(raw, &summary), "manifest_summary.json is not valid JSON") + require.Equal(t, 1, len(summary.Manifests)) + require.Equal(t, "full", summary.Manifests[0].Type) + + // Raw JSON must not contain the groups or drop_operations keys. + rawStr := string(raw) + require.NotContains(t, rawStr, `"groups"`) + require.NotContains(t, rawStr, `"drop_operations"`) + + // ListBackupManifests should prefer the summary and return manifests without groups. + manifests, err := worker.ListBackupManifests(copyBackupDir, nil, false) + require.NoError(t, err) + require.Equal(t, 1, len(manifests)) + require.Nil(t, manifests[0].Groups, + "ListBackupManifests should return nil Groups when using summary manifest") + + // The full manifest.json must still be intact and readable (restore path unchanged). + fullManifestFile := filepath.Join(copyBackupDir, "manifest.json") + require.FileExists(t, fullManifestFile) + rawFull, err := os.ReadFile(fullManifestFile) + require.NoError(t, err) + var fullMaster worker.MasterManifest + require.NoError(t, json.Unmarshal(rawFull, &fullMaster)) + require.Equal(t, 1, len(fullMaster.Manifests)) + require.NotNil(t, fullMaster.Manifests[0].Groups, "full manifest.json must retain Groups for restore") +} + +// listBackupsEntry mirrors the GraphQL Manifest type fields used in filter tests. +type listBackupsEntry struct { + BackupId string `json:"backupId"` + BackupNum float64 `json:"backupNum"` + Type string `json:"type"` + Path string `json:"path"` + Encrypted bool `json:"encrypted"` + Since float64 `json:"since"` + Groups []struct { + GroupId float64 `json:"groupId"` + Predicates []string `json:"predicates"` + } `json:"groups"` +} + +// setupListBackupsFixture copies the pre-built testdata manifest files into the +// alpha container's backup directory. The test does not need to take any real backups. +func setupListBackupsFixture(t *testing.T) { + t.Helper() + common.DirSetup(t) + t.Cleanup(func() { common.DirCleanup(t) }) + + alpha := testutil.DockerPrefix + "_alpha1_1" + require.NoError(t, testutil.DockerCp("testdata/manifest.json", + alpha+":/data/backups/manifest.json")) + require.NoError(t, testutil.DockerCp("testdata/manifest_summary.json", + alpha+":/data/backups/manifest_summary.json")) +} + +// makeListBackupsRunner returns a closure that posts a listBackups query with the +// given extra input fields and returns the decoded entries. +func makeListBackupsRunner(t *testing.T) func(extraInput string) []listBackupsEntry { + t.Helper() + adminUrl := "https://" + testutil.GetSockAddrHttp() + "/admin" + client := testutil.GetHttpsClient(t) + + return func(extraInput string) []listBackupsEntry { + t.Helper() + query := fmt.Sprintf(`query { + listBackups(input: {location: "%s"%s}) { + backupId backupNum type path encrypted since + groups { groupId predicates } + } + }`, alphaBackupDir, extraInput) + params := testutil.GraphQLParams{Query: query} + b, err := json.Marshal(params) + require.NoError(t, err) + resp, err := client.Post(adminUrl, "application/json", bytes.NewBuffer(b)) + require.NoError(t, err) + defer resp.Body.Close() + + var result struct { + Data struct { + ListBackups []listBackupsEntry `json:"listBackups"` + } `json:"data"` + Errors []struct{ Message string } `json:"errors"` + } + require.NoError(t, json.NewDecoder(resp.Body).Decode(&result)) + require.Empty(t, result.Errors, "GraphQL errors: %v", result.Errors) + return result.Data.ListBackups + } +} + +// makeListBackupsErrorRunner returns a closure that posts a listBackups query and +// returns the GraphQL error messages. Use this for inputs that should be rejected. +func makeListBackupsErrorRunner(t *testing.T) func(extraInput string) []string { + t.Helper() + adminUrl := "https://" + testutil.GetSockAddrHttp() + "/admin" + client := testutil.GetHttpsClient(t) + + return func(extraInput string) []string { + t.Helper() + query := fmt.Sprintf(`query { + listBackups(input: {location: "%s"%s}) { + backupId + } + }`, alphaBackupDir, extraInput) + params := testutil.GraphQLParams{Query: query} + b, err := json.Marshal(params) + require.NoError(t, err) + resp, err := client.Post(adminUrl, "application/json", bytes.NewBuffer(b)) + require.NoError(t, err) + defer resp.Body.Close() + var result struct { + Errors []struct{ Message string } `json:"errors"` + } + require.NoError(t, json.NewDecoder(resp.Body).Decode(&result)) + msgs := make([]string, len(result.Errors)) + for i, e := range result.Errors { + msgs[i] = e.Message + } + return msgs + } +} + +// TestListBackupsInputErrors exercises GraphQL-level validation errors for listBackups. +func TestListBackupsInputErrors(t *testing.T) { + setupListBackupsFixture(t) + runExpectError := makeListBackupsErrorRunner(t) + + t.Run("lastNDays and sinceDate together return error", func(t *testing.T) { + errs := runExpectError(`, lastNDays: 7, sinceDate: "2024-01-01"`) + require.NotEmpty(t, errs, "expected a GraphQL error for lastNDays+sinceDate") + require.Contains(t, errs[0], "sinceDate") + require.Contains(t, errs[0], "lastNDays") + }) + + t.Run("negative lastNDays returns error", func(t *testing.T) { + errs := runExpectError(`, lastNDays: -1`) + require.NotEmpty(t, errs, "expected a GraphQL error for negative lastNDays") + require.Contains(t, errs[0], "lastNDays") + }) + + t.Run("invalid sinceDate returns error", func(t *testing.T) { + errs := runExpectError(`, sinceDate: "not-a-date"`) + require.NotEmpty(t, errs, "expected a GraphQL error for invalid sinceDate") + require.Contains(t, errs[0], "sinceDate") + }) +} + +// TestListBackupsGraphQL is an E2E smoke test: take a real backup, then verify +// it appears in the listBackups query response. +func TestListBackupsGraphQL(t *testing.T) { + common.DirSetup(t) + defer common.DirCleanup(t) + + conn, err := grpc.NewClient(testutil.GetSockAddr(), + grpc.WithTransportCredentials(credentials.NewTLS(testutil.GetAlphaClientConfig(t)))) + require.NoError(t, err) + dg := dgo.NewDgraphClient(api.NewDgraphClient(conn)) + ctx := context.Background() + + require.NoError(t, dg.Alter(ctx, &api.Operation{DropAll: true})) + require.NoError(t, dg.Alter(ctx, &api.Operation{Schema: `movie: string @index(hash) .`})) + _, err = dg.NewTxn().Mutate(ctx, &api.Mutation{ + CommitNow: true, + SetNquads: []byte(`<_:x1> "Inception" .`), + }) + require.NoError(t, err) + + _ = runBackup(t, 3, 1) + + _, err = dg.NewTxn().Mutate(ctx, &api.Mutation{ + CommitNow: true, + SetNquads: []byte(`<_:x2> "Interstellar" .`), + }) + require.NoError(t, err) + _ = runBackupInternal(t, false, 6, 2) + + runListBackups := makeListBackupsRunner(t) + entries := runListBackups("") + require.Equal(t, 2, len(entries), "expected one full and one incremental backup") + + types := make(map[string]bool) + for _, e := range entries { + types[e.Type] = true + require.NotEmpty(t, e.BackupId) + require.NotEmpty(t, e.Path) + } + require.True(t, types["full"]) + require.True(t, types["incremental"]) +} + +// TestListBackupsGroupsAutoDetect verifies that a listBackups query which +// selects the "groups" field returns populated predicate data even without +// setting fullManifest: true. This exercises the auto-detection in the resolver +// that reads manifest.json whenever "groups" appears in the selection set, +// preserving backward compatibility for existing queries. +func TestListBackupsGroupsAutoDetect(t *testing.T) { + common.DirSetup(t) + defer common.DirCleanup(t) + + conn, err := grpc.NewClient(testutil.GetSockAddr(), + grpc.WithTransportCredentials(credentials.NewTLS(testutil.GetAlphaClientConfig(t)))) + require.NoError(t, err) + dg := dgo.NewDgraphClient(api.NewDgraphClient(conn)) + ctx := context.Background() + + require.NoError(t, dg.Alter(ctx, &api.Operation{DropAll: true})) + require.NoError(t, dg.Alter(ctx, &api.Operation{Schema: `movie: string @index(hash) .`})) + _, err = dg.NewTxn().Mutate(ctx, &api.Mutation{ + CommitNow: true, + SetNquads: []byte(`<_:x1> "Inception" .`), + }) + require.NoError(t, err) + _ = runBackup(t, 3, 1) + + // Query with groups in the selection but NO fullManifest: true. + // The resolver must auto-detect the "groups" field and read the full manifest. + adminUrl := "https://" + testutil.GetSockAddrHttp() + "/admin" + client := testutil.GetHttpsClient(t) + query := fmt.Sprintf(`query { + listBackups(input: {location: "%s"}) { + backupId type + groups { groupId predicates } + } + }`, alphaBackupDir) + params := testutil.GraphQLParams{Query: query} + b, err := json.Marshal(params) + require.NoError(t, err) + resp, err := client.Post(adminUrl, "application/json", bytes.NewBuffer(b)) + require.NoError(t, err) + defer resp.Body.Close() + + var result struct { + Data struct { + ListBackups []listBackupsEntry `json:"listBackups"` + } `json:"data"` + Errors []struct{ Message string } `json:"errors"` + } + require.NoError(t, json.NewDecoder(resp.Body).Decode(&result)) + require.Empty(t, result.Errors, "unexpected GraphQL errors: %v", result.Errors) + + entries := result.Data.ListBackups + require.Equal(t, 1, len(entries), "expected exactly one backup") + require.NotEmpty(t, entries[0].Groups, + "groups must be populated when 'groups' is in the selection set, even without fullManifest: true") + + // Confirm that at least one group contains the 'movie' predicate. + found := false + for _, g := range entries[0].Groups { + for _, p := range g.Predicates { + if strings.Contains(p, "movie") { + found = true + } + } + } + require.True(t, found, "expected 'movie' predicate in groups") +} + +// TestListBackupsFilters loads pre-built fixture manifests (testdata/manifest*.json) +// into the alpha container and exercises the full matrix of date-filter inputs. +// +// Fixture layout — 7 entries across 3 series: +// +// series-alpha (2023): dgraph.20230115, dgraph.20230201 +// series-beta (2024): dgraph.20240601, dgraph.20240615, dgraph.20240701 +// series-gamma (2026): dgraph.20260101, dgraph.20260315 +func TestListBackupsFilters(t *testing.T) { + setupListBackupsFixture(t) + runListBackups := makeListBackupsRunner(t) + + pathsOf := func(entries []listBackupsEntry) map[string]bool { + m := make(map[string]bool, len(entries)) + for _, e := range entries { + m[e.Path] = true + } + return m + } + + tests := []struct { + name string + input string // extra fields appended after location in the GraphQL input + wantCount int + mustHave []string // paths that must appear + mustAbsent []string // paths that must NOT appear + }{ + { + name: "no filter returns all 7 entries", + wantCount: 7, + mustHave: []string{ + "dgraph.20230115.120000.000", + "dgraph.20230201.080000.000", + "dgraph.20240601.000000.000", + "dgraph.20240615.000000.000", + "dgraph.20240701.000000.000", + "dgraph.20260101.000000.000", + "dgraph.20260315.000000.000", + }, + }, + { + name: "sinceDate=2024-01-01 excludes 2023 series", + input: `, sinceDate: "2024-01-01"`, + wantCount: 5, + mustHave: []string{ + "dgraph.20240601.000000.000", + "dgraph.20240615.000000.000", + "dgraph.20240701.000000.000", + "dgraph.20260101.000000.000", + "dgraph.20260315.000000.000", + }, + mustAbsent: []string{ + "dgraph.20230115.120000.000", + "dgraph.20230201.080000.000", + }, + }, + { + // dgraph.20240601.000000.000 is at midnight exactly; one second later excludes it. + // Contrast with sinceDate="2024-06-01" (YYYY-MM-DD) which would include it. + name: "sinceDate RFC3339 one second past midnight excludes the midnight entry", + input: `, sinceDate: "2024-06-01T00:00:01Z"`, + wantCount: 4, + mustHave: []string{ + "dgraph.20240615.000000.000", + "dgraph.20240701.000000.000", + "dgraph.20260101.000000.000", + "dgraph.20260315.000000.000", + }, + mustAbsent: []string{ + "dgraph.20240601.000000.000", + "dgraph.20230115.120000.000", + }, + }, + { + name: "untilDate=2023-12-31 returns 2023 series only", + input: `, untilDate: "2023-12-31"`, + wantCount: 2, + mustHave: []string{ + "dgraph.20230115.120000.000", + "dgraph.20230201.080000.000", + }, + mustAbsent: []string{ + "dgraph.20240601.000000.000", + "dgraph.20260101.000000.000", + }, + }, + { + name: "sinceDate=2024-06-01 untilDate=2024-06-30 returns June-2024 entries only", + input: `, sinceDate: "2024-06-01", untilDate: "2024-06-30"`, + wantCount: 2, + mustHave: []string{ + "dgraph.20240601.000000.000", + "dgraph.20240615.000000.000", + }, + mustAbsent: []string{ + "dgraph.20240701.000000.000", + "dgraph.20230115.120000.000", + "dgraph.20260101.000000.000", + }, + }, + { + name: "untilDate=2024-06-30 returns 2023 series and June-2024", + input: `, untilDate: "2024-06-30"`, + wantCount: 4, + mustHave: []string{ + "dgraph.20230115.120000.000", + "dgraph.20230201.080000.000", + "dgraph.20240601.000000.000", + "dgraph.20240615.000000.000", + }, + mustAbsent: []string{ + "dgraph.20240701.000000.000", + "dgraph.20260101.000000.000", + }, + }, + { + name: "sinceDate=2024-06-15 returns from Jun-15 onward", + input: `, sinceDate: "2024-06-15"`, + wantCount: 4, + mustHave: []string{ + "dgraph.20240615.000000.000", + "dgraph.20240701.000000.000", + "dgraph.20260101.000000.000", + "dgraph.20260315.000000.000", + }, + mustAbsent: []string{ + "dgraph.20230115.120000.000", + "dgraph.20240601.000000.000", + }, + }, + { + name: "sinceDate=2026-01-01 returns 2026 series only", + input: `, sinceDate: "2026-01-01"`, + wantCount: 2, + mustHave: []string{ + "dgraph.20260101.000000.000", + "dgraph.20260315.000000.000", + }, + mustAbsent: []string{ + "dgraph.20230115.120000.000", + "dgraph.20240601.000000.000", + }, + }, + { + name: "sinceDate=2026-01-01 untilDate=2026-01-31 returns only Jan-2026 full backup", + input: `, sinceDate: "2026-01-01", untilDate: "2026-01-31"`, + wantCount: 1, + mustHave: []string{"dgraph.20260101.000000.000"}, + mustAbsent: []string{ + "dgraph.20260315.000000.000", + "dgraph.20240601.000000.000", + }, + }, + { + name: "2025 gap returns 0 (no backups between 2024 and 2026)", + input: `, sinceDate: "2025-01-01", untilDate: "2025-12-31"`, + wantCount: 0, + }, + { + name: "sinceDate far in future returns 0", + input: `, sinceDate: "2099-01-01"`, + wantCount: 0, + }, + { + name: "untilDate before all backups returns 0", + input: `, untilDate: "2022-12-31"`, + wantCount: 0, + }, + // untilDate RFC3339: the fixture entry dgraph.20230115.120000.000 is at noon UTC. + // A cutoff of 11:59:59Z is before noon so it is excluded; 12:00:00Z is not after noon so it is included. + { + name: "untilDate RFC3339 strict cutoff excludes entry at exact noon", + input: `, untilDate: "2023-01-15T11:59:59Z"`, + wantCount: 0, + mustAbsent: []string{"dgraph.20230115.120000.000"}, + }, + { + name: "untilDate RFC3339 at noon includes the noon entry", + input: `, untilDate: "2023-01-15T12:00:00Z"`, + wantCount: 1, + mustHave: []string{"dgraph.20230115.120000.000"}, + mustAbsent: []string{ + "dgraph.20230201.080000.000", + "dgraph.20240601.000000.000", + }, + }, + // lastNDays cases — fixture newest entry is 2026-03-15, oldest is 2023-01-15. + // lastNDays: 1 → since yesterday; no fixture entry is that recent → 0. + // lastNDays: 36500 ≈ 100 years back; safely covers all fixture entries regardless of when CI runs → 7. + { + name: "lastNDays=1 returns 0 (no fixture entry within last day)", + input: `, lastNDays: 1`, + wantCount: 0, + }, + { + name: "lastNDays=36500 returns all 7 entries", + input: `, lastNDays: 36500`, + wantCount: 7, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + entries := runListBackups(tc.input) + require.Equal(t, tc.wantCount, len(entries), + "wrong entry count for input %q", tc.input) + + paths := pathsOf(entries) + for _, p := range tc.mustHave { + require.True(t, paths[p], "expected path %q in result", p) + } + for _, p := range tc.mustAbsent { + require.False(t, paths[p], "unexpected path %q in result", p) + } + }) + } +} + +// TestListBackupsPrecommittedManifest exercises date filters against the real +// master manifest that is already committed to the repo at +// data/to_restore/3/manifest.json. That file contains 3 entries, all dated +// 2021-05-17, from backup series "quirky_kapitsa4". No live backup is required. +// +// All queries use fullManifest:true so the alpha reads manifest.json directly +// (no manifest_summary.json is uploaded for this test). +func TestListBackupsPrecommittedManifest(t *testing.T) { + common.DirSetup(t) + defer common.DirCleanup(t) + + alpha := testutil.DockerPrefix + "_alpha1_1" + require.NoError(t, testutil.DockerCp( + "./data/to_restore/3/manifest.json", + alpha+":/data/backups/manifest.json")) + + runListBackups := makeListBackupsRunner(t) + // Wrap to always pass fullManifest:true — no summary file is present. + run := func(extra string) []listBackupsEntry { + return runListBackups(`, fullManifest: true` + extra) + } + + tests := []struct { + name string + extra string + wantCount int + wantBackupId string // if non-empty, all entries must have this backupId + }{ + { + name: "no date filter returns all 3 entries", + wantCount: 3, + wantBackupId: "quirky_kapitsa4", + }, + { + // 2021-05-17 is exactly the date of all entries; sinceDate is inclusive. + name: "sinceDate=2021-05-17 (exact boundary) returns all 3", + extra: `, sinceDate: "2021-05-17"`, + wantCount: 3, + }, + { + // The day after all entries — should exclude everything. + name: "sinceDate=2021-05-18 excludes all entries", + extra: `, sinceDate: "2021-05-18"`, + wantCount: 0, + }, + { + // untilDate end-of-day covers all three entries from 2021-05-17. + name: "untilDate=2021-05-17 (end-of-day) returns all 3", + extra: `, untilDate: "2021-05-17"`, + wantCount: 3, + }, + { + // The day before all entries — should exclude everything. + name: "untilDate=2021-05-16 excludes all entries", + extra: `, untilDate: "2021-05-16"`, + wantCount: 0, + }, + { + name: "sinceDate=2021-01-01 untilDate=2021-12-31 returns all 3 (within 2021)", + extra: `, sinceDate: "2021-01-01", untilDate: "2021-12-31"`, + wantCount: 3, + }, + { + name: "sinceDate=2022-01-01 returns 0 (all entries are from 2021)", + extra: `, sinceDate: "2022-01-01"`, + wantCount: 0, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + entries := run(tc.extra) + require.Equal(t, tc.wantCount, len(entries)) + for _, e := range entries { + if tc.wantBackupId != "" { + require.Equal(t, tc.wantBackupId, e.BackupId) + } + // fullManifest:true — groups must be present on entries that have them. + // The to_restore/3 manifest has groups populated on all 3 entries. + require.NotEmpty(t, e.Groups, "fullManifest:true must return groups for path %s", e.Path) + } + }) + } + + // Sanity-check type distribution: 1 full + 2 incremental. + t.Run("type distribution is 1 full and 2 incremental", func(t *testing.T) { + entries := run("") + require.Equal(t, 3, len(entries)) + types := make(map[string]int) + for _, e := range entries { + types[e.Type]++ + } + require.Equal(t, 1, types["full"]) + require.Equal(t, 2, types["incremental"]) + }) +} + +// TestListBackupsFullManifest verifies the fullManifest input parameter. +// It takes a real full backup (which causes the alpha to write both manifest.json +// and manifest_summary.json), then checks: +// - default / fullManifest:false → uses manifest_summary.json, groups are empty +// - fullManifest:true → uses manifest.json, groups are populated +func TestListBackupsFullManifest(t *testing.T) { + common.DirSetup(t) + defer common.DirCleanup(t) + + conn, err := grpc.NewClient(testutil.GetSockAddr(), + grpc.WithTransportCredentials(credentials.NewTLS(testutil.GetAlphaClientConfig(t)))) + require.NoError(t, err) + dg := dgo.NewDgraphClient(api.NewDgraphClient(conn)) + ctx := context.Background() + + require.NoError(t, dg.Alter(ctx, &api.Operation{DropAll: true})) + require.NoError(t, dg.Alter(ctx, &api.Operation{Schema: `name: string @index(hash) .`})) + _, err = dg.NewTxn().Mutate(ctx, &api.Mutation{ + CommitNow: true, + SetNquads: []byte(`<_:x1> "test-fullmanifest" .`), + }) + require.NoError(t, err) + + _ = runBackup(t, 3, 1) + + // runListBackups includes "groups" in the selection set, which triggers the + // server-side auto-escalation to the full manifest.json. Use it only when + // we actually want group data (fullManifest: true subtest). + runListBackups := makeListBackupsRunner(t) + + // runSummaryOnly omits "groups" from the selection so the server uses the + // lightweight manifest_summary.json and returns manifests with empty groups. + adminURL := "https://" + testutil.GetSockAddrHttp() + "/admin" + httpClient := testutil.GetHttpsClient(t) + runSummaryOnly := func(extraInput string) []listBackupsEntry { + t.Helper() + query := fmt.Sprintf(`query { + listBackups(input: {location: "%s"%s}) { + backupId backupNum type path encrypted since + } + }`, alphaBackupDir, extraInput) + params := testutil.GraphQLParams{Query: query} + b, err := json.Marshal(params) + require.NoError(t, err) + resp, err := httpClient.Post(adminURL, "application/json", bytes.NewBuffer(b)) + require.NoError(t, err) + defer resp.Body.Close() + var result struct { + Data struct { + ListBackups []listBackupsEntry `json:"listBackups"` + } `json:"data"` + Errors []struct{ Message string } `json:"errors"` + } + require.NoError(t, json.NewDecoder(resp.Body).Decode(&result)) + require.Empty(t, result.Errors, "GraphQL errors: %v", result.Errors) + return result.Data.ListBackups + } + + t.Run("default uses summary manifest and returns empty groups", func(t *testing.T) { + entries := runSummaryOnly("") + require.Equal(t, 1, len(entries)) + // manifest_summary.json omits groups; the response contains no group data. + require.Empty(t, entries[0].Groups, + "summary path must not populate groups") + }) + + t.Run("fullManifest=false explicit same as default", func(t *testing.T) { + entries := runSummaryOnly(`, fullManifest: false`) + require.Equal(t, 1, len(entries)) + require.Empty(t, entries[0].Groups) + }) + + t.Run("fullManifest=true reads manifest.json and returns populated groups", func(t *testing.T) { + entries := runListBackups(`, fullManifest: true`) + require.Equal(t, 1, len(entries)) + require.NotEmpty(t, entries[0].Groups, + "full manifest path must populate groups") + // At least one group must carry predicates (the schema we applied above). + hasPredicates := false + for _, g := range entries[0].Groups { + if len(g.Predicates) > 0 { + hasPredicates = true + break + } + } + require.True(t, hasPredicates, + "at least one group must contain predicates from the applied schema") + }) +} + func runRestore(t *testing.T, backupLocation, lastDir string, commitTs uint64) map[string]string { // Recreate the restore directory to make sure there's no previous data when // calling restore. diff --git a/systest/backup/filesystem/testdata/manifest.json b/systest/backup/filesystem/testdata/manifest.json new file mode 100644 index 00000000000..d80a6ad41c6 --- /dev/null +++ b/systest/backup/filesystem/testdata/manifest.json @@ -0,0 +1,95 @@ +{ + "Manifests": [ + { + "type": "full", + "since": 0, + "read_ts": 100, + "groups": { "1": ["0-name", "0-age"] }, + "backup_id": "series-alpha", + "backup_num": 1, + "version": 2105, + "path": "dgraph.20230115.120000.000", + "encrypted": false, + "drop_operations": null, + "compression": "snappy" + }, + { + "type": "incremental", + "since": 100, + "read_ts": 150, + "groups": { "1": ["0-name"] }, + "backup_id": "series-alpha", + "backup_num": 2, + "version": 2105, + "path": "dgraph.20230201.080000.000", + "encrypted": false, + "drop_operations": null, + "compression": "snappy" + }, + { + "type": "full", + "since": 0, + "read_ts": 200, + "groups": { "1": ["0-name", "0-email", "0-age"] }, + "backup_id": "series-beta", + "backup_num": 1, + "version": 2105, + "path": "dgraph.20240601.000000.000", + "encrypted": true, + "drop_operations": null, + "compression": "snappy" + }, + { + "type": "incremental", + "since": 200, + "read_ts": 250, + "groups": { "1": ["0-email"] }, + "backup_id": "series-beta", + "backup_num": 2, + "version": 2105, + "path": "dgraph.20240615.000000.000", + "encrypted": true, + "drop_operations": null, + "compression": "snappy" + }, + { + "type": "incremental", + "since": 250, + "read_ts": 300, + "groups": { "1": ["0-name", "0-age", "0-email"] }, + "backup_id": "series-beta", + "backup_num": 3, + "version": 2105, + "path": "dgraph.20240701.000000.000", + "encrypted": true, + "drop_operations": null, + "compression": "snappy" + }, + { + "type": "full", + "since": 0, + "read_ts": 400, + "groups": { "1": ["0-name", "0-age", "0-email", "0-score"] }, + "backup_id": "series-gamma", + "backup_num": 1, + "version": 2105, + "path": "dgraph.20260101.000000.000", + "encrypted": false, + "drop_operations": null, + "compression": "snappy" + }, + { + "type": "incremental", + "since": 400, + "read_ts": 450, + "groups": { "1": ["0-score"] }, + "backup_id": "series-gamma", + "backup_num": 2, + "version": 2105, + "path": "dgraph.20260315.000000.000", + "encrypted": false, + "drop_operations": null, + "compression": "snappy" + } + ] +} diff --git a/systest/backup/filesystem/testdata/manifest_summary.json b/systest/backup/filesystem/testdata/manifest_summary.json new file mode 100644 index 00000000000..f9be09d0bfb --- /dev/null +++ b/systest/backup/filesystem/testdata/manifest_summary.json @@ -0,0 +1,81 @@ +{ + "Manifests": [ + { + "type": "full", + "since": 0, + "read_ts": 100, + "backup_id": "series-alpha", + "backup_num": 1, + "version": 2105, + "path": "dgraph.20230115.120000.000", + "encrypted": false, + "compression": "snappy" + }, + { + "type": "incremental", + "since": 100, + "read_ts": 150, + "backup_id": "series-alpha", + "backup_num": 2, + "version": 2105, + "path": "dgraph.20230201.080000.000", + "encrypted": false, + "compression": "snappy" + }, + { + "type": "full", + "since": 0, + "read_ts": 200, + "backup_id": "series-beta", + "backup_num": 1, + "version": 2105, + "path": "dgraph.20240601.000000.000", + "encrypted": true, + "compression": "snappy" + }, + { + "type": "incremental", + "since": 200, + "read_ts": 250, + "backup_id": "series-beta", + "backup_num": 2, + "version": 2105, + "path": "dgraph.20240615.000000.000", + "encrypted": true, + "compression": "snappy" + }, + { + "type": "incremental", + "since": 250, + "read_ts": 300, + "backup_id": "series-beta", + "backup_num": 3, + "version": 2105, + "path": "dgraph.20240701.000000.000", + "encrypted": true, + "compression": "snappy" + }, + { + "type": "full", + "since": 0, + "read_ts": 400, + "backup_id": "series-gamma", + "backup_num": 1, + "version": 2105, + "path": "dgraph.20260101.000000.000", + "encrypted": false, + "compression": "snappy" + }, + { + "type": "incremental", + "since": 400, + "read_ts": 450, + "backup_id": "series-gamma", + "backup_num": 2, + "version": 2105, + "path": "dgraph.20260315.000000.000", + "encrypted": false, + "compression": "snappy" + } + ] +} diff --git a/systest/backup/minio-large/backup_test.go b/systest/backup/minio-large/backup_test.go index c69334d2dd8..cbc87c6a57d 100644 --- a/systest/backup/minio-large/backup_test.go +++ b/systest/backup/minio-large/backup_test.go @@ -224,7 +224,10 @@ func copyToLocalFs(t *testing.T) { minio.ListObjectsOptions{Prefix: "", Recursive: false}) for object := range objectCh1 { require.NoError(t, object.Err) - if object.Key != "manifest.json" { + // Skip root-level JSON manifest files (manifest.json, manifest_summary.json, + // manifest_summary_tmp.json). The inner recursive loop downloads them as files; + // MkdirAll on a path that already exists as a file fails with "not a directory". + if !strings.HasSuffix(object.Key, ".json") { dstDir := backupDir + "/" + object.Key require.NoError(t, os.MkdirAll(dstDir, os.ModePerm)) } diff --git a/systest/backup/minio/backup_test.go b/systest/backup/minio/backup_test.go index 61bcaae36eb..28185e18387 100644 --- a/systest/backup/minio/backup_test.go +++ b/systest/backup/minio/backup_test.go @@ -387,8 +387,10 @@ func copyToLocalFs(t *testing.T) { minio.ListObjectsOptions{Prefix: "", Recursive: false}) for object := range objectCh1 { require.NoError(t, object.Err) - if object.Key != "manifest.json" { - dstDir := backupDir + "/" + object.Key + // Only create local directories for backup sub-folder keys (end with "/"). + // Root-level files like manifest.json and manifest_summary.json are not directories. + if strings.HasSuffix(object.Key, "/") { + dstDir := backupDir + "/" + strings.TrimSuffix(object.Key, "/") require.NoError(t, os.MkdirAll(dstDir, os.ModePerm)) } diff --git a/systest/integration2/backup_summary_compat_test.go b/systest/integration2/backup_summary_compat_test.go new file mode 100644 index 00000000000..36b96845f1d --- /dev/null +++ b/systest/integration2/backup_summary_compat_test.go @@ -0,0 +1,234 @@ +//go:build integration2 + +/* + * SPDX-FileCopyrightText: © 2017-2025 Istari Digital, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +package main + +import ( + "encoding/json" + "fmt" + "sort" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/dgraph-io/dgo/v250/protos/api" + "github.com/dgraph-io/dgraph/v25/dgraphapi" + "github.com/dgraph-io/dgraph/v25/dgraphtest" +) + +// manifestEntry is a minimal representation of one entry in manifest.json. +type manifestEntry struct { + Type string `json:"type"` + BackupId string `json:"backup_id"` + BackupNum uint64 `json:"backup_num"` + ReadTs uint64 `json:"read_ts"` +} + +// masterManifest mirrors the top-level shape of manifest.json (capital "Manifests"). +type masterManifest struct { + Manifests []manifestEntry `json:"Manifests"` +} + +// masterManifestSummary mirrors the top-level shape of manifest_summary.json +// (capital "Manifests", matching Go's default JSON encoding for MasterManifestSummary). +type masterManifestSummary struct { + Manifests []manifestEntry `json:"Manifests"` +} + +// queryNames runs {q(func: has(name)) {name}} and returns a sorted list of +// name values. +func queryNames(t *testing.T, gc *dgraphapi.GrpcClient) []string { + t.Helper() + resp, err := gc.Query(`{q(func: has(name)) {name}}`) + require.NoError(t, err) + + var result struct { + Q []struct { + Name string `json:"name"` + } `json:"q"` + } + require.NoError(t, json.Unmarshal(resp.Json, &result)) + + names := make([]string, 0, len(result.Q)) + for _, r := range result.Q { + names = append(names, r.Name) + } + sort.Strings(names) + return names +} + +// insertName adds a single triple and commits. +func insertName(t *testing.T, gc *dgraphapi.GrpcClient, name string) { + t.Helper() + _, err := gc.Mutate(&api.Mutation{ + SetNquads: []byte(fmt.Sprintf(`_:x %q .`, name)), + CommitNow: true, + }) + require.NoError(t, err) +} + +// restoreAndWait triggers an online restore and blocks until it finishes. +func restoreAndWait(t *testing.T, hc *dgraphapi.HTTPClient, c *dgraphtest.LocalCluster, + backupId string, backupNum int) { + t.Helper() + require.NoError(t, hc.Restore(c, dgraphtest.DefaultBackupDir, backupId, 0, backupNum)) + require.NoError(t, dgraphapi.WaitForRestore(c)) +} + +// TestManifestSummaryBackwardCompat verifies two things in one scenario: +// +// 1. manifest_summary.json accumulates entries from backups taken with an +// older Dgraph binary (no summary feature) alongside new-binary backups. +// +// 2. All four backups (2 old + 2 new) are fully restorable and produce the +// correct data at each checkpoint, confirming that adding the summary +// file does not corrupt or alter the backup/restore pipeline. +// +// Timeline: +// +// [old binary v24.0.0] +// schema setup → insert "alice" +// backup-1 (full) → only manifest.json written +// insert "bob" +// backup-2 (incr) → only manifest.json written +// [in-place upgrade to local build] +// insert "carol" +// backup-3 (full) → manifest.json + manifest_summary.json (3 entries) +// insert "dave" +// backup-4 (incr) → manifest.json + manifest_summary.json (4 entries) +// +// Restore checkpoints verified: +// +// old series backup-2 → {alice, bob} +// new series backup-3 → {alice, bob, carol} +// new series backup-4 → {alice, bob, carol, dave} +func TestManifestSummaryBackwardCompat(t *testing.T) { + conf := dgraphtest.NewClusterConfig(). + WithNumAlphas(1).WithNumZeros(1).WithReplicas(1). + WithVersion("v24.0.0") + c, err := dgraphtest.NewLocalCluster(conf) + require.NoError(t, err) + defer func() { c.Cleanup(t.Failed()) }() + require.NoError(t, c.Start()) + + gc, cleanup, err := c.Client() + require.NoError(t, err) + defer cleanup() + + hc, err := c.HTTPClient() + require.NoError(t, err) + + // ── Old-binary phase ───────────────────────────────────────────────────── + + require.NoError(t, gc.SetupSchema(`name: string @index(exact) .`)) + insertName(t, gc, "alice") + + // backup-1: full backup with old binary; creates manifest.json only. + require.NoError(t, hc.Backup(c, true, dgraphtest.DefaultBackupDir)) + + insertName(t, gc, "bob") + + // backup-2: incremental backup with old binary. + require.NoError(t, hc.Backup(c, false, dgraphtest.DefaultBackupDir)) + + // Sanity: no manifest_summary.json should exist yet. + _, err = c.ReadFileFromContainer(dgraphtest.DefaultBackupDir + "/manifest_summary.json") + require.Error(t, err, "manifest_summary.json must not exist after old-binary backups") + + // ── In-place upgrade ───────────────────────────────────────────────────── + + // InPlace stops containers, swaps the binary, and restarts. The backup + // volume and alpha posting data are both preserved. + require.NoError(t, c.Upgrade("local", dgraphtest.InPlace)) + + // Re-create clients — container ports may have been remapped. + cleanup() + gc, cleanup, err = c.Client() + require.NoError(t, err) + defer cleanup() + + hc, err = c.HTTPClient() + require.NoError(t, err) + + // ── New-binary phase ───────────────────────────────────────────────────── + + insertName(t, gc, "carol") + + // backup-3: full backup; CompleteBackup reads 2 old entries + new = 3 total; + // writes both manifest.json and manifest_summary.json. + require.NoError(t, hc.Backup(c, true, dgraphtest.DefaultBackupDir)) + + insertName(t, gc, "dave") + + // backup-4: incremental backup; summary grows to 4 entries. + require.NoError(t, hc.Backup(c, false, dgraphtest.DefaultBackupDir)) + + // ── Verify manifest_summary.json ───────────────────────────────────────── + + summaryRaw, err := c.ReadFileFromContainer( + dgraphtest.DefaultBackupDir + "/manifest_summary.json") + require.NoError(t, err, "manifest_summary.json must exist after new-binary backups") + + var summary masterManifestSummary + require.NoError(t, json.Unmarshal(summaryRaw, &summary)) + require.Equal(t, 4, len(summary.Manifests), + "summary must contain all 4 entries: 2 old-binary + 2 new-binary") + + types := make(map[string]int) + for _, m := range summary.Manifests { + types[m.Type]++ + } + require.Equal(t, 2, types["full"], "expected 2 full-backup entries in summary") + require.Equal(t, 2, types["incremental"], "expected 2 incremental-backup entries in summary") + + // ── Identify backup series IDs ──────────────────────────────────────────── + // Read manifest.json (has backup_id per entry) to identify the two series. + + manifestRaw, err := c.ReadFileFromContainer( + dgraphtest.DefaultBackupDir + "/manifest.json") + require.NoError(t, err) + + var master masterManifest + require.NoError(t, json.Unmarshal(manifestRaw, &master)) + require.Equal(t, 4, len(master.Manifests)) + + // Collect unique backup_ids in order of appearance (oldest first). + seen := make(map[string]bool) + var seriesIds []string + for _, m := range master.Manifests { + if !seen[m.BackupId] { + seen[m.BackupId] = true + seriesIds = append(seriesIds, m.BackupId) + } + } + require.Equal(t, 2, len(seriesIds), + "expected exactly 2 distinct backup series") + + oldSeriesId := seriesIds[0] // taken with v24.0.0 + newSeriesId := seriesIds[1] // taken with local build + + t.Run("restore old-series backup-2 yields alice and bob", func(t *testing.T) { + restoreAndWait(t, hc, c, oldSeriesId, 2) + + names := queryNames(t, gc) + require.ElementsMatch(t, []string{"alice", "bob"}, names) + }) + + t.Run("restore new-series backup-3 (full) yields alice bob carol", func(t *testing.T) { + restoreAndWait(t, hc, c, newSeriesId, 1) + + names := queryNames(t, gc) + require.ElementsMatch(t, []string{"alice", "bob", "carol"}, names) + }) + + t.Run("restore new-series backup-4 (full+incr) yields all four names", func(t *testing.T) { + restoreAndWait(t, hc, c, newSeriesId, 2) + + names := queryNames(t, gc) + require.ElementsMatch(t, []string{"alice", "bob", "carol", "dave"}, names) + }) +} diff --git a/worker/backup.go b/worker/backup.go index 9887b9433cf..6c8863b036d 100644 --- a/worker/backup.go +++ b/worker/backup.go @@ -38,57 +38,72 @@ import ( // predicateSet is a map whose keys are predicates. It is meant to be used as a set. type predicateSet map[string]struct{} -// Manifest records backup details, these are values used during restore. -// Since is the timestamp from which the next incremental backup should start (it's set -// to the readTs of the current backup). -// Groups are the IDs of the groups involved. -type Manifest struct { - // Type is the type of backup, either full or incremental. +// ManifestBase holds per-backup metadata that is common to both the full Manifest +// (used for restore) and the lightweight ManifestSummary (used for listing). +// JSON keys are intentionally stable — they must not change because they are stored on disk. +type ManifestBase struct { + // Type is the type of backup: "full" or "incremental". Type string `json:"type"` - // SinceTsDeprecated is kept for backward compatibility. Use readTs instead of sinceTs. + // SinceTsDeprecated is the read timestamp of the previous backup and the start point + // for the next incremental backup. The field name is kept for Go API backward compatibility; + // the JSON key "since" is kept for on-disk backward compatibility with older Dgraph versions. + // New code should use ReadTs instead. SinceTsDeprecated uint64 `json:"since"` - // ReadTs is the timestamp at which this backup was taken. This would be - // the since timestamp for the next incremental backup. + // ReadTs is the timestamp at which this backup was taken. ReadTs uint64 `json:"read_ts"` - // Groups is the map of valid groups to predicates at the time the backup was created. - Groups map[uint32][]string `json:"groups"` - // BackupId is a unique ID assigned to all the backups in the same series - // (from the first full backup to the last incremental backup). + // BackupId is a unique ID assigned to all backups in the same series + // (from the first full backup through the last incremental backup). BackupId string `json:"backup_id"` - // BackupNum is a monotonically increasing number assigned to each backup in - // a series. The full backup as BackupNum equal to one and each incremental - // backup gets assigned the next available number. Used to verify the integrity - // of the data during a restore. + // BackupNum is the 1-based position of this backup within its series; 1 = full backup. BackupNum uint64 `json:"backup_num"` - // Version specifies the Dgraph version, the backup was taken on. For the backup taken on older - // versions (<= 20.11), the predicates in Group map do not have namespace. Version will be zero - // for older versions. + // Version specifies the Dgraph predicate-encoding version in use at backup time. + // 0 means pre-21.03 (no namespace prefix). The restore path reads this to trigger upgrades. Version int `json:"version"` - // Path is the name of the backup directory to which this manifest belongs to. + // Path is the name of the backup directory that holds this backup's data files. Path string `json:"path"` - // Encrypted indicates whether this backup was encrypted or not. + // Encrypted indicates whether this backup was encrypted. Encrypted bool `json:"encrypted"` - // DropOperations lists the various DROP operations that took place since the last backup. - // These are used during restore to redo those operations before applying the backup. - DropOperations []*pb.DropOperation `json:"drop_operations"` - // Compression keeps track of the compression that was used for the data. + // Compression records the codec used to compress backup data files. Compression string `json:"compression"` } -// ValidReadTs function returns the valid read timestamp. The backup can have -// the readTs=0 if the backup was done on an older version of dgraph. The -// SinceTsDecprecated is kept for backward compatibility. -func (m *Manifest) ValidReadTs() uint64 { +// ValidReadTs returns the effective read timestamp for this backup entry. +// Pre-21.03 backups stored the value in Since instead of ReadTs; this method +// handles both cases transparently. +func (m *ManifestBase) ValidReadTs() uint64 { if m.ReadTs == 0 { return m.SinceTsDeprecated } return m.ReadTs } +// Manifest is the full per-backup record used during restore. It embeds ManifestBase +// and adds the two heavy restore-only fields: Groups and DropOperations. +type Manifest struct { + ManifestBase + // Groups maps group IDs to their predicate lists at backup time. + // Required by restore but omitted from the summary manifest to keep listing fast. + Groups map[uint32][]string `json:"groups"` + // DropOperations records DROP operations that occurred since the previous backup. + // Required by restore but omitted from the summary manifest. + DropOperations []*pb.DropOperation `json:"drop_operations"` +} + type MasterManifest struct { Manifests []*Manifest } +// ManifestSummary is a lightweight listing view of ManifestBase. It embeds +// ManifestBase but deliberately excludes Groups and DropOperations, keeping +// manifest_summary.json small even on clusters with large vector schemas. +type ManifestSummary struct { + ManifestBase +} + +type MasterManifestSummary struct { + Manifests []*ManifestSummary +} + func (m *Manifest) getPredsInGroup(gid uint32) predicateSet { preds, ok := m.Groups[gid] if !ok { @@ -354,12 +369,14 @@ func ProcessBackupRequest(ctx context.Context, req *pb.BackupRequest) error { dir := fmt.Sprintf(backupPathFmt, req.UnixTs) m := Manifest{ - ReadTs: req.ReadTs, + ManifestBase: ManifestBase{ + ReadTs: req.ReadTs, + Version: x.ManifestVersion, + Path: dir, + Compression: "snappy", + }, Groups: predMap, - Version: x.ManifestVersion, DropOperations: dropOperations, - Path: dir, - Compression: "snappy", } if req.SinceTs == 0 { m.Type = "full" @@ -384,17 +401,15 @@ func ProcessBackupRequest(ctx context.Context, req *pb.BackupRequest) error { return nil } -func ProcessListBackups(ctx context.Context, location string, creds *x.MinioCredentials) ( - []*Manifest, error) { +func ProcessListBackups(ctx context.Context, location string, creds *x.MinioCredentials, + fullManifest bool) ([]*Manifest, error) { - manifests, err := ListBackupManifests(location, creds) + manifests, err := ListBackupManifests(location, creds, fullManifest) if err != nil { return nil, errors.Wrapf(err, "cannot read manifests at location %s", location) } - res := make([]*Manifest, 0, len(manifests)) - res = append(res, manifests...) - return res, nil + return manifests, nil } // BackupProcessor handles the different stages of the backup process. @@ -688,6 +703,10 @@ func (pr *BackupProcessor) CompleteBackup(ctx context.Context, m *Manifest) erro if err := CreateManifest(handler, uri, manifest); err != nil { return errors.Wrap(err, "complete backup failed") } + // Best-effort: write summary manifest. Failure does not abort the backup. + if err := CreateManifestSummary(handler, manifest); err != nil { + glog.Warningf("Failed to write backup summary manifest (non-fatal): %v", err) + } glog.Infof("Backup completed OK.") return nil } diff --git a/worker/backup_handler.go b/worker/backup_handler.go index 45198d92d9c..fcb2776eb82 100644 --- a/worker/backup_handler.go +++ b/worker/backup_handler.go @@ -51,6 +51,14 @@ const ( backupManifest = `manifest.json` tmpManifest = `manifest_tmp.json` + + // backupManifestSummary is the lightweight summary manifest written alongside + // manifest.json. It holds all per-backup metadata but omits Groups and + // DropOperations, keeping listing fast even for 500 MB+ manifests on clusters + // with many vector predicates. Old tools that do not know about this file continue + // to use manifest.json without any change. + backupManifestSummary = `manifest_summary.json` + tmpManifestSummary = `manifest_summary_tmp.json` ) func createBackupFile(h UriHandler, uri *url.URL, req *pb.BackupRequest) (io.WriteCloser, error) { diff --git a/worker/backup_manifest.go b/worker/backup_manifest.go index 194262ad776..8e5024cc686 100644 --- a/worker/backup_manifest.go +++ b/worker/backup_manifest.go @@ -12,6 +12,7 @@ import ( "path/filepath" "sort" "strings" + "time" "github.com/golang/glog" "github.com/pkg/errors" @@ -293,8 +294,147 @@ func CreateManifest(h UriHandler, uri *url.URL, manifest *MasterManifest) error "fix the backup manifest.", tmpManifest, backupManifest) } +// BackupDateFilter holds optional date-range criteria for listing backups. +type BackupDateFilter struct { + Since *time.Time + Until *time.Time +} + +// parseBackupTime extracts the UTC timestamp from a backup directory path like +// "dgraph.20260101.120000.000". +func parseBackupTime(path string) (time.Time, error) { + const prefix = "dgraph." + if !strings.HasPrefix(path, prefix) { + return time.Time{}, fmt.Errorf("unexpected backup path format: %s", path) + } + t, err := time.Parse("20060102.150405.000", strings.TrimPrefix(path, prefix)) + return t.UTC(), err +} + +// FilterManifestsByDate returns only the manifests whose path timestamps fall +// within the inclusive [filter.Since, filter.Until] window. Manifests whose +// path cannot be parsed are always included (fail-open for old path formats). +func FilterManifestsByDate(manifests []*Manifest, filter BackupDateFilter) []*Manifest { + if filter.Since == nil && filter.Until == nil { + return manifests + } + var out []*Manifest + for _, m := range manifests { + t, err := parseBackupTime(m.Path) + if err != nil { + glog.Warningf("FilterManifestsByDate: cannot parse backup path %q, including it: %v", m.Path, err) + out = append(out, m) + continue + } + if filter.Since != nil && t.Before(*filter.Since) { + continue + } + if filter.Until != nil && t.After(*filter.Until) { + continue + } + out = append(out, m) + } + return out +} + +// BackupListStats holds summary statistics derived from a manifest list. +type BackupListStats struct { + Total int + LastFullBackup *time.Time + LastIncrBackup *time.Time + OldestBackup *time.Time + NewestBackup *time.Time + BackupSeriesCount int +} + +// ComputeBackupListStats derives statistics from a list of manifests. +func ComputeBackupListStats(manifests []*Manifest) BackupListStats { + stats := BackupListStats{Total: len(manifests)} + ids := make(map[string]struct{}) + for _, m := range manifests { + if m.BackupId != "" { + ids[m.BackupId] = struct{}{} + } + t, err := parseBackupTime(m.Path) + if err != nil { + continue + } + tc := t + if stats.OldestBackup == nil || t.Before(*stats.OldestBackup) { + stats.OldestBackup = &tc + } + if stats.NewestBackup == nil || t.After(*stats.NewestBackup) { + stats.NewestBackup = &tc + } + if m.Type == "full" && (stats.LastFullBackup == nil || t.After(*stats.LastFullBackup)) { + stats.LastFullBackup = &tc + } + if m.Type == "incremental" && (stats.LastIncrBackup == nil || t.After(*stats.LastIncrBackup)) { + stats.LastIncrBackup = &tc + } + } + stats.BackupSeriesCount = len(ids) + return stats +} + +// readMasterManifestSummary reads the lightweight manifest_summary.json file. +func readMasterManifestSummary(h UriHandler) (*MasterManifestSummary, error) { + var ms MasterManifestSummary + b, err := h.Read(backupManifestSummary) + if err != nil { + return nil, errors.Wrap(err, "readMasterManifestSummary failed to read") + } + if err := json.Unmarshal(b, &ms); err != nil { + return nil, errors.Wrap(err, "readMasterManifestSummary failed to unmarshal") + } + return &ms, nil +} + +// summariesToManifests converts lightweight summaries back to full Manifest structs +// (with nil Groups/DropOperations) so the listing path is type-compatible. +func summariesToManifests(summaries []*ManifestSummary) []*Manifest { + out := make([]*Manifest, len(summaries)) + for i, s := range summaries { + out[i] = &Manifest{ManifestBase: s.ManifestBase} + } + return out +} + +// CreateManifestSummary writes a lightweight manifest_summary.json that omits +// Groups and DropOperations. The full manifest.json is left untouched. +func CreateManifestSummary(h UriHandler, master *MasterManifest) (retErr error) { + summary := &MasterManifestSummary{ + Manifests: make([]*ManifestSummary, len(master.Manifests)), + } + for i, m := range master.Manifests { + summary.Manifests[i] = &ManifestSummary{ManifestBase: m.ManifestBase} + } + w, err := h.CreateFile(tmpManifestSummary) + if err != nil { + return errors.Wrap(err, "createManifestSummary failed to create tmp file") + } + defer func() { + if err := w.Close(); err != nil && retErr == nil { + retErr = err + } + }() + if err = json.NewEncoder(w).Encode(summary); err != nil { + return err + } + return errors.Wrapf(h.Rename(tmpManifestSummary, backupManifestSummary), + "MOVING TEMPORARY SUMMARY MANIFEST FAILED! Move %s to %s manually.", + tmpManifestSummary, backupManifestSummary) +} + // ListBackupManifests scans location l for backup files and returns the list of manifests. -func ListBackupManifests(l string, creds *x.MinioCredentials) ([]*Manifest, error) { +// +// When fullManifest is false (the default for listing), it tries the lightweight +// manifest_summary.json first; if absent or unreadable it falls back to manifest.json. +// The summary path omits Groups and DropOperations, making it fast even for 500 MB+ manifests. +// +// When fullManifest is true, the full manifest.json is always read — useful when the caller +// needs predicate or DROP-operation data (e.g. --verbose listing, restore planning). +func ListBackupManifests(l string, creds *x.MinioCredentials, fullManifest bool) ([]*Manifest, error) { uri, err := url.Parse(l) if err != nil { return nil, err @@ -305,6 +445,18 @@ func ListBackupManifests(l string, creds *x.MinioCredentials) ([]*Manifest, erro return nil, errors.Wrap(err, "error in listBackupManifests") } + // Fast path: summary manifest (unless caller explicitly wants the full manifest). + if !fullManifest && h.FileExists(backupManifestSummary) { + ms, err := readMasterManifestSummary(h) + if err == nil { + glog.V(2).Infof("ListBackupManifests: using summary manifest (%d entries)", + len(ms.Manifests)) + return summariesToManifests(ms.Manifests), nil + } + glog.Warningf("Summary manifest unreadable, falling back to full manifest: %v", err) + } + + // Full manifest.json path. m, err := GetManifest(h, uri) if err != nil { return nil, err diff --git a/worker/backup_manifest_test.go b/worker/backup_manifest_test.go new file mode 100644 index 00000000000..0f936a07204 --- /dev/null +++ b/worker/backup_manifest_test.go @@ -0,0 +1,406 @@ +/* + * SPDX-FileCopyrightText: © 2017-2025 Istari Digital, Inc. + * SPDX-License-Identifier: Apache-2.0 + */ + +package worker + +import ( + "encoding/json" + "net/url" + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestParseBackupTime(t *testing.T) { + tests := []struct { + name string + path string + wantErr bool + wantYear int + wantMonth time.Month + wantDay int + }{ + { + name: "full timestamp", + path: "dgraph.20260101.120000.000", + wantYear: 2026, + wantMonth: time.January, + wantDay: 1, + }, + { + name: "mid-year timestamp", + path: "dgraph.20230415.093045.123", + wantYear: 2023, + wantMonth: time.April, + wantDay: 15, + }, + { + name: "missing dgraph prefix", + path: "backup.20260101.120000.000", + wantErr: true, + }, + { + name: "empty timestamp after prefix", + path: "dgraph.", + wantErr: true, + }, + { + name: "no prefix at all", + path: "somebackup", + wantErr: true, + }, + { + name: "malformed timestamp", + path: "dgraph.notadate", + wantErr: true, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got, err := parseBackupTime(tc.path) + if tc.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + require.Equal(t, tc.wantYear, got.Year()) + require.Equal(t, tc.wantMonth, got.Month()) + require.Equal(t, tc.wantDay, got.Day()) + }) + } +} + +func TestFilterManifestsByDate(t *testing.T) { + tp := func(year, month, day int) *time.Time { + ts := time.Date(year, time.Month(month), day, 0, 0, 0, 0, time.UTC) + return &ts + } + + manifests := []*Manifest{ + {ManifestBase: ManifestBase{Path: "dgraph.20260101.000000.000", BackupId: "a", Type: "full"}}, + {ManifestBase: ManifestBase{Path: "dgraph.20260115.000000.000", BackupId: "a", Type: "incremental"}}, + {ManifestBase: ManifestBase{Path: "dgraph.20260201.000000.000", BackupId: "b", Type: "full"}}, + {ManifestBase: ManifestBase{Path: "dgraph.20260215.000000.000", BackupId: "b", Type: "incremental"}}, + // unparseable path — always included (fail-open) + {ManifestBase: ManifestBase{Path: "unknown_format", BackupId: "c", Type: "full"}}, + } + + t.Run("no filter returns all", func(t *testing.T) { + got := FilterManifestsByDate(manifests, BackupDateFilter{}) + require.Equal(t, len(manifests), len(got)) + }) + + t.Run("since filter", func(t *testing.T) { + filter := BackupDateFilter{Since: tp(2026, 2, 1)} + got := FilterManifestsByDate(manifests, filter) + // Feb 1, Feb 15, unparseable + require.Equal(t, 3, len(got)) + require.Equal(t, "dgraph.20260201.000000.000", got[0].Path) + require.Equal(t, "dgraph.20260215.000000.000", got[1].Path) + }) + + t.Run("until filter", func(t *testing.T) { + filter := BackupDateFilter{Until: tp(2026, 1, 15)} + got := FilterManifestsByDate(manifests, filter) + // Jan 1, Jan 15, unparseable + require.Equal(t, 3, len(got)) + require.Equal(t, "dgraph.20260101.000000.000", got[0].Path) + require.Equal(t, "dgraph.20260115.000000.000", got[1].Path) + }) + + t.Run("since and until range", func(t *testing.T) { + filter := BackupDateFilter{Since: tp(2026, 1, 10), Until: tp(2026, 1, 31)} + got := FilterManifestsByDate(manifests, filter) + // Jan 15 + unparseable + require.Equal(t, 2, len(got)) + require.Equal(t, "dgraph.20260115.000000.000", got[0].Path) + require.Equal(t, "unknown_format", got[1].Path) + }) + + t.Run("no parseable results only unparseable remains", func(t *testing.T) { + filter := BackupDateFilter{Since: tp(2030, 1, 1)} + got := FilterManifestsByDate(manifests, filter) + require.Equal(t, 1, len(got)) + require.Equal(t, "unknown_format", got[0].Path) + }) + + t.Run("nil manifests", func(t *testing.T) { + got := FilterManifestsByDate(nil, BackupDateFilter{Since: tp(2026, 1, 1)}) + require.Nil(t, got) + }) +} + +func TestComputeBackupListStats(t *testing.T) { + t.Run("empty list", func(t *testing.T) { + stats := ComputeBackupListStats(nil) + require.Equal(t, 0, stats.Total) + require.Equal(t, 0, stats.BackupSeriesCount) + require.Nil(t, stats.OldestBackup) + require.Nil(t, stats.NewestBackup) + }) + + t.Run("full stats", func(t *testing.T) { + manifests := []*Manifest{ + {ManifestBase: ManifestBase{Path: "dgraph.20260101.000000.000", BackupId: "a", Type: "full"}}, + {ManifestBase: ManifestBase{Path: "dgraph.20260115.000000.000", BackupId: "a", Type: "incremental"}}, + {ManifestBase: ManifestBase{Path: "dgraph.20260201.000000.000", BackupId: "b", Type: "full"}}, + {ManifestBase: ManifestBase{Path: "dgraph.20260215.000000.000", BackupId: "b", Type: "incremental"}}, + } + stats := ComputeBackupListStats(manifests) + + require.Equal(t, 4, stats.Total) + require.Equal(t, 2, stats.BackupSeriesCount) + + require.NotNil(t, stats.OldestBackup) + require.Equal(t, 2026, stats.OldestBackup.Year()) + require.Equal(t, time.January, stats.OldestBackup.Month()) + require.Equal(t, 1, stats.OldestBackup.Day()) + + require.NotNil(t, stats.NewestBackup) + require.Equal(t, time.February, stats.NewestBackup.Month()) + require.Equal(t, 15, stats.NewestBackup.Day()) + + require.NotNil(t, stats.LastFullBackup) + require.Equal(t, time.February, stats.LastFullBackup.Month()) + require.Equal(t, 1, stats.LastFullBackup.Day()) + + require.NotNil(t, stats.LastIncrBackup) + require.Equal(t, time.February, stats.LastIncrBackup.Month()) + require.Equal(t, 15, stats.LastIncrBackup.Day()) + }) + + t.Run("only full backups", func(t *testing.T) { + manifests := []*Manifest{ + {ManifestBase: ManifestBase{Path: "dgraph.20260101.000000.000", BackupId: "a", Type: "full"}}, + {ManifestBase: ManifestBase{Path: "dgraph.20260201.000000.000", BackupId: "b", Type: "full"}}, + } + stats := ComputeBackupListStats(manifests) + require.Equal(t, 2, stats.Total) + require.Equal(t, 2, stats.BackupSeriesCount) + require.NotNil(t, stats.LastFullBackup) + require.Nil(t, stats.LastIncrBackup) + }) + + t.Run("unparseable paths do not affect timestamps", func(t *testing.T) { + manifests := []*Manifest{ + {ManifestBase: ManifestBase{Path: "legacy_backup", BackupId: "a", Type: "full"}}, + } + stats := ComputeBackupListStats(manifests) + require.Equal(t, 1, stats.Total) + require.Equal(t, 1, stats.BackupSeriesCount) + require.Nil(t, stats.OldestBackup) + require.Nil(t, stats.LastFullBackup) + }) +} + +func TestSummariesToManifests(t *testing.T) { + summaries := []*ManifestSummary{ + { + ManifestBase: ManifestBase{ + Type: "full", + BackupId: "abc123", + BackupNum: 1, + Path: "dgraph.20260101.000000.000", + ReadTs: 1000, + SinceTsDeprecated: 0, + Encrypted: true, + Compression: "snappy", + Version: 2105, + }, + }, + } + + got := summariesToManifests(summaries) + require.Equal(t, 1, len(got)) + m := got[0] + require.Equal(t, "full", m.Type) + require.Equal(t, "abc123", m.BackupId) + require.Equal(t, uint64(1), m.BackupNum) + require.Equal(t, "dgraph.20260101.000000.000", m.Path) + require.Equal(t, uint64(1000), m.ReadTs) + require.Equal(t, uint64(0), m.SinceTsDeprecated) + require.True(t, m.Encrypted) + require.Equal(t, "snappy", m.Compression) + // Groups and DropOperations must be absent (summary never stores them) + require.Nil(t, m.Groups) + require.Nil(t, m.DropOperations) +} + +// testFileHandlerForDir returns a fileHandler and parsed URL for a temp directory. +func testFileHandlerForDir(t *testing.T, dir string) (*fileHandler, *url.URL) { + t.Helper() + uri, err := url.Parse(dir) + require.NoError(t, err) + return NewFileHandler(uri), uri +} + +// writeMasterManifestToDir writes a MasterManifest to dir/manifest.json. +func writeMasterManifestToDir(t *testing.T, dir string, manifests []*Manifest) { + t.Helper() + master := &MasterManifest{Manifests: manifests} + b, err := json.Marshal(master) + require.NoError(t, err) + require.NoError(t, os.WriteFile(filepath.Join(dir, backupManifest), b, 0644)) +} + +func TestCreateManifestSummary(t *testing.T) { + tmpDir := t.TempDir() + h, _ := testFileHandlerForDir(t, tmpDir) + + master := &MasterManifest{ + Manifests: []*Manifest{ + { + ManifestBase: ManifestBase{ + Type: "full", BackupId: "id1", BackupNum: 1, + Path: "dgraph.20260101.000000.000", ReadTs: 100, + }, + Groups: map[uint32][]string{1: {"name", "age", "email"}}, + }, + { + ManifestBase: ManifestBase{ + Type: "incremental", BackupId: "id1", BackupNum: 2, + Path: "dgraph.20260115.000000.000", ReadTs: 200, + SinceTsDeprecated: 100, + }, + Groups: map[uint32][]string{1: {"name"}}, + }, + }, + } + + require.NoError(t, CreateManifestSummary(h, master)) + + summaryPath := filepath.Join(tmpDir, backupManifestSummary) + require.FileExists(t, summaryPath) + // Temp file must be cleaned up after rename + require.NoFileExists(t, filepath.Join(tmpDir, tmpManifestSummary)) + + raw, err := os.ReadFile(summaryPath) + require.NoError(t, err) + + var got MasterManifestSummary + require.NoError(t, json.Unmarshal(raw, &got)) + require.Equal(t, 2, len(got.Manifests)) + + s0 := got.Manifests[0] + require.Equal(t, "full", s0.Type) + require.Equal(t, "id1", s0.BackupId) + require.Equal(t, uint64(1), s0.BackupNum) + require.Equal(t, uint64(100), s0.ReadTs) + + // The raw JSON must never contain the groups or drop_operations keys. + rawStr := string(raw) + require.NotContains(t, rawStr, `"groups"`) + require.NotContains(t, rawStr, `"drop_operations"`) +} + +func TestListBackupManifests_UsesSummaryWhenPresent(t *testing.T) { + tmpDir := t.TempDir() + h, _ := testFileHandlerForDir(t, tmpDir) + + manifests := []*Manifest{ + { + ManifestBase: ManifestBase{ + Type: "full", BackupId: "id1", BackupNum: 1, + Path: "dgraph.20260101.000000.000", ReadTs: 100, + }, + Groups: map[uint32][]string{1: {"name", "age"}}, + }, + } + // Write both full manifest and summary + writeMasterManifestToDir(t, tmpDir, manifests) + require.NoError(t, CreateManifestSummary(h, &MasterManifest{Manifests: manifests})) + + // Default (fullManifest=false) should use the summary. + got, err := ListBackupManifests(tmpDir, nil, false) + require.NoError(t, err) + require.Equal(t, 1, len(got)) + require.Equal(t, "full", got[0].Type) + require.Equal(t, "id1", got[0].BackupId) + // Summary path omits Groups — verify listing path does not expose them + require.Nil(t, got[0].Groups) +} + +func TestListBackupManifests_FullManifestFlag(t *testing.T) { + tmpDir := t.TempDir() + h, _ := testFileHandlerForDir(t, tmpDir) + + manifests := []*Manifest{ + { + ManifestBase: ManifestBase{ + Type: "full", BackupId: "id1", BackupNum: 1, + Path: "dgraph.20260101.000000.000", ReadTs: 100, Version: 2105, + }, + Groups: map[uint32][]string{1: {"0-name", "0-age"}}, + }, + } + writeMasterManifestToDir(t, tmpDir, manifests) + require.NoError(t, CreateManifestSummary(h, &MasterManifest{Manifests: manifests})) + + // fullManifest=true must bypass the summary and return Groups. + got, err := ListBackupManifests(tmpDir, nil, true) + require.NoError(t, err) + require.Equal(t, 1, len(got)) + require.NotNil(t, got[0].Groups, "full manifest flag must return Groups") +} + +func TestListBackupManifests_FallsBackToFullManifest(t *testing.T) { + tmpDir := t.TempDir() + + // Version 2105 so upgradeManifest is a no-op and predicates are returned as-is. + manifests := []*Manifest{ + { + ManifestBase: ManifestBase{ + Type: "full", BackupId: "id2", BackupNum: 1, + Path: "dgraph.20260101.000000.000", ReadTs: 100, Version: 2105, + }, + Groups: map[uint32][]string{1: {"0-pred1", "0-pred2"}}, + }, + } + // Write only the full manifest — no summary present + writeMasterManifestToDir(t, tmpDir, manifests) + + got, err := ListBackupManifests(tmpDir, nil, false) + require.NoError(t, err) + require.Equal(t, 1, len(got)) + require.Equal(t, "id2", got[0].BackupId) + // Full manifest includes Groups + require.NotNil(t, got[0].Groups) + require.Equal(t, []string{"0-pred1", "0-pred2"}, got[0].Groups[1]) +} + +func TestListBackupManifests_CorruptSummaryFallsBackToFullManifest(t *testing.T) { + tmpDir := t.TempDir() + + manifests := []*Manifest{ + { + ManifestBase: ManifestBase{ + Type: "full", BackupId: "id3", BackupNum: 1, + Path: "dgraph.20260101.000000.000", ReadTs: 100, Version: 2105, + }, + Groups: map[uint32][]string{1: {"0-pred1"}}, + }, + } + writeMasterManifestToDir(t, tmpDir, manifests) + + // Write deliberately corrupt summary + summaryPath := filepath.Join(tmpDir, backupManifestSummary) + require.NoError(t, os.WriteFile(summaryPath, []byte("not valid json {{"), 0644)) + + got, err := ListBackupManifests(tmpDir, nil, false) + require.NoError(t, err) + require.Equal(t, 1, len(got)) + require.Equal(t, "id3", got[0].BackupId) + require.NotNil(t, got[0].Groups) +} + +func TestListBackupManifests_EmptyLocation(t *testing.T) { + tmpDir := t.TempDir() + // No manifest files — getConsolidatedManifest returns empty MasterManifest + _, err := ListBackupManifests(tmpDir, nil, false) + require.NoError(t, err) +}