diff --git a/internal/setup/conflict.go b/internal/setup/conflict.go new file mode 100644 index 00000000..5ce783d2 --- /dev/null +++ b/internal/setup/conflict.go @@ -0,0 +1,261 @@ +package setup + +import ( + "errors" + "fmt" + "maps" + "slices" + "strings" + + "github.com/canonical/chisel/internal/strdist" +) + +type segmentSlice struct { + Slice *Slice + // PathInfo is kept here as an optimization to avoid lookups on + // Slice.Contents for every slice. + PathInfo PathInfo + // WholePath is used to simplify both error reporting and matching against + // paths with "**"; both of which require reconstructing the whole path. + WholePath string +} + +type segment struct { + Text string + // HasGlob is set when the path contains "*" or "?" or "**". + HasGlob bool + // HasDoubleGlob is set when the path contains "**". + HasDoubleGlob bool +} + +type node struct { + Segment segment + Slices []*segmentSlice + Children map[string]*node +} + +// pathConflictTree uses a custom trie to find conflicts that might arise from +// extracting different paths into the same root directory. +// +// It optimizes conflict resolution by calling strdist.GlobPath only when +// strictly necessary and by passing it less data to compare. It relies on the +// fact that real chisel releases most paths often share a very long prefix +// that does not need to be compared each time. Additionally, our grammar is +// very restrictive (only "*", "?" and "**") meaning that unless "**" is used, +// any symbol can only match until a "/" is found. +// +// Because of the above, this algorithm splits paths into segments that are +// delimited by "/". When inserting a path, each segment is compared at most +// once with the path independently of how many paths there are in the release. +// Lastly, when looking for conflicts, if the segments do not contain "**" then +// instead of comparing the whole path we can compare only the segment. +type pathConflictTree struct { + Root *node + PathToSlices map[string][]*Slice +} + +func newConflictTree(pathToSlices map[string][]*Slice) pathConflictTree { + root := &node{ + Segment: segment{"/", false, false}, + Children: map[string]*node{}, + } + return pathConflictTree{Root: root, PathToSlices: pathToSlices} +} + +func (g *pathConflictTree) HasConflict() error { + // Make errors deterministic. + paths := slices.Collect(maps.Keys(g.PathToSlices)) + slices.Sort(paths) + + for _, path := range paths { + slices := g.PathToSlices[path] + var oldSlices []*segmentSlice + for _, oldSlice := range slices { + oldSlices = append(oldSlices, &segmentSlice{oldSlice, oldSlice.Contents[path], path}) + } + segments, err := pathToSegments(path) + if err != nil { + return err + } + err = g.pathHasConflict(segments, oldSlices) + if err != nil { + return err + } + g.insertSegments(segments, oldSlices) + } + return nil +} + +func (g *pathConflictTree) pathHasConflict(oldSegments []segment, oldSlices []*segmentSlice) error { + conflictErrMsg := func(oldSegmentSlice, newSegmentSlice *segmentSlice) error { + oldSlice, oldPath := oldSegmentSlice.Slice, oldSegmentSlice.WholePath + newSlice, newPath := newSegmentSlice.Slice, newSegmentSlice.WholePath + if (oldSlice.Package > newSlice.Package) || (oldSlice.Package == newSlice.Package && oldSlice.Name > newSlice.Name) || + (oldSlice.Package == newSlice.Package && oldSlice.Name == newSlice.Name && oldPath > newPath) { + oldSlice, newSlice = newSlice, oldSlice + oldPath, newPath = newPath, oldPath + } + return fmt.Errorf("slices %s and %s conflict on %s and %s", oldSlice, newSlice, oldPath, newPath) + } + + var currentQueue []*node + var nextQueue []*node + + // Skip "/". + currentQueue = slices.Collect(maps.Values(g.Root.Children)) + oldSegments = oldSegments[1:] + + // If we run out of segments from the graph or the path there cannot be a + // conflict (note paths with "**" are collapsed to one segment). + for len(currentQueue) > 0 && len(oldSegments) > 0 { + oldSegment := oldSegments[0] + for _, newNode := range currentQueue { + newNodeLoop: + for _, oldSegmentSlice := range oldSlices { + oldSlice := oldSegmentSlice.Slice + oldPathInfo := oldSegmentSlice.PathInfo + for _, newSegmentSlice := range newNode.Slices { + newSlice := newSegmentSlice.Slice + newPathInfo := newSegmentSlice.PathInfo + newSegment := newNode.Segment + + // If slices cannot conflict then skip the more expensive + // checks. + if (oldPathInfo.Kind == GlobPath || oldPathInfo.Kind == CopyPath) && (newPathInfo.Kind == GlobPath || newPathInfo.Kind == CopyPath) { + if newSlice.Package == oldSlice.Package { + // If content is **extracted** from the same + // package, it will necessarily be the same. + continue + } + } + + if newSegment.HasDoubleGlob || oldSegment.HasDoubleGlob { + // Case 1: One of the strings has a double glob, we + // need to check the whole remaining path against + // each other. + if strdist.GlobPath(oldSegmentSlice.WholePath, newSegmentSlice.WholePath) { + return conflictErrMsg(oldSegmentSlice, newSegmentSlice) + } + } else if newSegment.HasGlob || oldSegment.HasGlob { + // Case 2: Either segment has a single glob (* or ?). + // We only need to check the segment. + if strdist.GlobPath(oldSegment.Text, newSegment.Text) { + // Only when we get to leaf (i.e. no children, can + // we have a conflict). + if len(newNode.Children) == 0 && len(oldSegments) == 1 { + // If we are at the terminal node of both paths we found a conflict. + return conflictErrMsg(oldSegmentSlice, newSegmentSlice) + } + for _, child := range newNode.Children { + nextQueue = append(nextQueue, child) + } + break newNodeLoop + } else { + // Once GlobPath returns false there cannot be a + // conflict between both paths, we can + // break here. + break newNodeLoop + } + } else { + // Case 3: No globs, we can compare the strings directly. + if oldSegment.Text == newSegment.Text { + if len(newNode.Children) == 0 && len(oldSegments) == 1 { + // If these are both terminal nodes, conflict found. + return conflictErrMsg(oldSegmentSlice, newSegmentSlice) + } + for _, child := range newNode.Children { + nextQueue = append(nextQueue, child) + } + break newNodeLoop + } + } + } + } + } + currentQueue, nextQueue = nextQueue, currentQueue + nextQueue = nextQueue[0:0] + + oldSegments = oldSegments[1:] + } + + return nil +} + +// insertSegments inserts the path's segments blindly in the graph without +// looking at conflicts. +func (g *pathConflictTree) insertSegments(segments []segment, slices []*segmentSlice) { + parent := g.Root + // Skip "/". + segments = segments[1:] + + for _, segment := range segments { + current, ok := parent.Children[segment.Text] + if !ok { + current = &node{ + Segment: segment, + Children: map[string]*node{}, + } + } + current.Slices = append(current.Slices, slices...) + parent.Children[segment.Text] = current + parent = current + } +} + +// pathToSegments returns the list of segments that compose the path. +// Directories, i.e. paths that end with "/", contain the empty segment "" for +// explicit termination in the trie to distinguish them from parent directories +// of other paths. +func pathToSegments(path string) ([]segment, error) { + if path[0] != '/' { + return nil, errors.New("internal error: path does not start with '/'") + } + segments := []segment{segment{"/", false, false}} + path = path[1:] + for { + end, singleGlob, doubleGlob := segmentEnd(path) + segment := segment{ + Text: path[:end+1], + HasGlob: singleGlob, + HasDoubleGlob: doubleGlob, + } + segments = append(segments, segment) + path = path[end+1:] + if path == "" && !strings.HasSuffix(segment.Text, "/") { + // Non-directories: last segment is also termination node. + break + } + if segment.Text == "" { + // Directories: add the termination node. + break + } + } + return segments, nil +} + +// segmentEnd finds the end of a segment according to the following rules: +// - If s contains "/" then segment will finish at the first "/" found unless +// there is a "**" before that, in that case segment = s. +// - Else segment = s. +// +// hasGlob is set to true if "*", "?" or "**" is found in the segment. +// hasDoubleGlob is set to true if "**" is found in the segment. +func segmentEnd(s string) (end int, hasGlob bool, hasDoubleGlob bool) { + end = strings.IndexAny(s, "*?/") + if end == -1 { + end = len(s) - 1 + } else if s[end] == '*' || s[end] == '?' { + hasGlob = true + slash := strings.IndexRune(s[end:], '/') + if slash != -1 { + end = end + slash + } else { + end = len(s) - 1 + } + hasDoubleGlob = strings.Contains(s[:end+1], "**") + if hasDoubleGlob { + end = len(s) - 1 + } + } + return end, hasGlob, hasDoubleGlob +} diff --git a/internal/setup/conflict_test.go b/internal/setup/conflict_test.go new file mode 100644 index 00000000..c1cdc964 --- /dev/null +++ b/internal/setup/conflict_test.go @@ -0,0 +1,185 @@ +package setup_test + +import ( + "slices" + "strings" + + . "gopkg.in/check.v1" + + "github.com/canonical/chisel/internal/setup" +) + +func (s *S) TestPathToSegments(c *C) { + tests := []struct { + path string + segments []setup.PathSegment + err string + }{{ + path: "/foo/bar", + segments: []setup.PathSegment{ + {Text: "/"}, + {Text: "foo/"}, + {Text: "bar"}, + }, + }, { + path: "/foo/", + segments: []setup.PathSegment{ + {Text: "/"}, + {Text: "foo/"}, + {Text: ""}, + }, + }, { + path: "/", + segments: []setup.PathSegment{ + {Text: "/"}, + {Text: ""}, + }, + }, { + path: "/*", + segments: []setup.PathSegment{ + {Text: "/"}, + {Text: "*", HasGlob: true}, + }, + }, { + path: "/*/", + segments: []setup.PathSegment{ + {Text: "/"}, + {Text: "*/", HasGlob: true}, + {Text: ""}, + }, + }, { + path: "/**", + segments: []setup.PathSegment{ + {Text: "/"}, + {Text: "**", HasGlob: true, HasDoubleGlob: true}, + }, + }, { + path: "/**/bar", + segments: []setup.PathSegment{ + {Text: "/"}, + {Text: "**/bar", HasGlob: true, HasDoubleGlob: true}, + }, + }, { + path: "/foo*/bar", + segments: []setup.PathSegment{ + {Text: "/"}, + {Text: "foo*/", HasGlob: true}, + {Text: "bar"}, + }, + }, { + path: "/foo?/bar", + segments: []setup.PathSegment{ + {Text: "/"}, + {Text: "foo?/", HasGlob: true}, + {Text: "bar"}, + }, + }, { + path: "/f*oo/f**/bar", + segments: []setup.PathSegment{ + {Text: "/"}, + {Text: "f*oo/", HasGlob: true}, + {Text: "f**/bar", HasGlob: true, HasDoubleGlob: true}, + }, + }, { + path: "/foo**/bar/baz", + segments: []setup.PathSegment{ + {Text: "/"}, + {Text: "foo**/bar/baz", HasGlob: true, HasDoubleGlob: true}, + }, + }, { + path: "foo/bar", + err: `internal error: path does not start with '/'`, + }} + + for _, test := range tests { + c.Logf("Test: %q", test.path) + segments, err := setup.PathToSegments(test.path) + if test.err != "" { + c.Assert(err, ErrorMatches, test.err) + continue + } + c.Assert(err, IsNil) + c.Assert(segments, DeepEquals, test.segments) + } +} + +func (s *S) TestConflictTree(c *C) { + slicePath := &setup.Slice{ + Package: "pkg1", + Name: "path", + Contents: map[string]setup.PathInfo{ + "/a/*/b": {Kind: setup.CopyPath}, + }, + } + sliceGlob := &setup.Slice{ + Package: "pkg2", + Name: "glob", + Contents: map[string]setup.PathInfo{ + "/a/*": {Kind: setup.GlobPath}, + }, + } + + pathInfo := setup.PathSegmentSlice{ + Slice: slicePath, + PathInfo: setup.PathInfo{Kind: setup.CopyPath}, + WholePath: "/a/*/b", + } + globInfo := setup.PathSegmentSlice{ + Slice: sliceGlob, + PathInfo: setup.PathInfo{Kind: setup.GlobPath}, + WholePath: "/a/*", + } + + tree := setup.NewConflictTree(map[string][]*setup.Slice{ + "/a/*/b": {slicePath}, + "/a/*": {sliceGlob}, + }) + err := tree.HasConflict() + c.Assert(err, IsNil) + + expected := &setup.PathNode{ + Segment: setup.PathSegment{Text: "/"}, + Children: map[string]*setup.PathNode{ + "a/": { + Segment: setup.PathSegment{Text: "a/"}, + Slices: []*setup.PathSegmentSlice{&pathInfo, &globInfo}, + Children: map[string]*setup.PathNode{ + "*": { + Segment: setup.PathSegment{Text: "*", HasGlob: true}, + Slices: []*setup.PathSegmentSlice{&globInfo}, + }, + "*/": { + Segment: setup.PathSegment{Text: "*/", HasGlob: true}, + Slices: []*setup.PathSegmentSlice{&pathInfo}, + Children: map[string]*setup.PathNode{ + "b": { + Segment: setup.PathSegment{Text: "b"}, + Slices: []*setup.PathSegmentSlice{&pathInfo}, + }, + }, + }, + }, + }, + }, + } + assertTreeEquals(c, tree.Root, expected) +} + +func assertTreeEquals(c *C, obtained, expected *setup.PathNode) { + c.Assert(obtained.Segment, DeepEquals, expected.Segment) + + slices.SortFunc(obtained.Slices, func(a, b *setup.PathSegmentSlice) int { + return strings.Compare(a.Slice.String(), b.Slice.String()) + }) + slices.SortFunc(expected.Slices, func(a, b *setup.PathSegmentSlice) int { + return strings.Compare(a.Slice.String(), b.Slice.String()) + }) + c.Assert(obtained.Slices, DeepEquals, expected.Slices) + + c.Assert(len(obtained.Children), Equals, len(expected.Children)) + for name, expectedChild := range expected.Children { + obtainedChild, ok := obtained.Children[name] + c.Assert(ok, Equals, true) + assertTreeEquals(c, obtainedChild, expectedChild) + } +} diff --git a/internal/setup/export_test.go b/internal/setup/export_test.go index 35231e50..7fd2d933 100644 --- a/internal/setup/export_test.go +++ b/internal/setup/export_test.go @@ -1,3 +1,11 @@ package setup type YAMLPath = yamlPath + +type PathSegment = segment +type PathSegmentSlice = segmentSlice +type PathNode = node +type PathConflictTree = pathConflictTree + +var PathToSegments func(string) ([]PathSegment, error) = pathToSegments +var NewConflictTree func(map[string][]*Slice) PathConflictTree = newConflictTree diff --git a/internal/setup/setup.go b/internal/setup/setup.go index aaa8d1e5..f9583d7e 100644 --- a/internal/setup/setup.go +++ b/internal/setup/setup.go @@ -13,7 +13,6 @@ import ( "github.com/canonical/chisel/internal/apacheutil" "github.com/canonical/chisel/internal/deb" - "github.com/canonical/chisel/internal/strdist" ) // Release is a collection of package slices targeting a particular @@ -272,36 +271,11 @@ func (r *Release) validate() error { } } - // Check for glob and generate conflicts. - for oldPath, oldSlices := range paths { - for _, old := range oldSlices { - oldInfo := old.Contents[oldPath] - if oldInfo.Kind != GeneratePath && oldInfo.Kind != GlobPath { - break - } - for newPath, newSlices := range paths { - if oldPath == newPath { - // Identical paths have been filtered earlier. - continue - } - for _, new := range newSlices { - newInfo := new.Contents[newPath] - if oldInfo.Kind == GlobPath && (newInfo.Kind == GlobPath || newInfo.Kind == CopyPath) { - if new.Package == old.Package { - continue - } - } - if strdist.GlobPath(newPath, oldPath) { - if (old.Package > new.Package) || (old.Package == new.Package && old.Name > new.Name) || - (old.Package == new.Package && old.Name == new.Name && oldPath > newPath) { - old, new = new, old - oldPath, newPath = newPath, oldPath - } - return fmt.Errorf("slices %s and %s conflict on %s and %s", old, new, oldPath, newPath) - } - } - } - } + // Check for conflicts among different paths. + tree := newConflictTree(paths) + err = tree.HasConflict() + if err != nil { + return err } // Check for cycles. @@ -556,7 +530,7 @@ func (r *Release) prefers() (map[preferKey]string, error) { // preferredPathPackage returns pkg1 if it can be reached from pkg2 following // prefer relationships, and conversely for pkg2. If none are reachable it -// returns the preferNone error. +// returns the errPreferNone error. // // If there is a cycle, an error is returned. func preferredPathPackage(path, pkg1, pkg2 string, prefers map[preferKey]string) (choice string, err error) { diff --git a/internal/setup/setup_test.go b/internal/setup/setup_test.go index d669943f..cd63e244 100644 --- a/internal/setup/setup_test.go +++ b/internal/setup/setup_test.go @@ -518,6 +518,125 @@ var setupTests = []setupTest{{ `, }, relerror: "slices mypkg1_myslice1 and mypkg2_myslice1 conflict on /path1", +}, { + summary: "When multiple prefixes match all are tested", + input: map[string]string{ + "slices/mydir/mypkg1.yaml": ` + package: mypkg1 + slices: + myslice1: + contents: + /path/*/foo: + `, + "slices/mydir/mypkg2.yaml": ` + package: mypkg2 + slices: + myslice1: + contents: + /path/bar/foo1: + /path/bar/f*: + `, + }, + relerror: "slices mypkg1_myslice1 and mypkg2_myslice1 conflict on /path/\\*/foo and /path/bar/f\\*", +}, { + summary: "Later matching paths are tested after several non-matches", + input: map[string]string{ + "slices/mydir/mypkg1.yaml": ` + package: mypkg1 + slices: + myslice1: + contents: + /path/*/foo: + `, + "slices/mydir/mypkg2.yaml": ` + package: mypkg2 + slices: + myslice1: + contents: + /path/bar/f: + /path/bar/fa: + /path/bar/fb: + /path/bar/f*: + `, + }, + relerror: "slices mypkg1_myslice1 and mypkg2_myslice1 conflict on /path/\\*/foo and /path/bar/f\\*", +}, { + summary: "Conflicting globs with wildcard in the middle of the segment", + input: map[string]string{ + "slices/mydir/mypkg1.yaml": ` + package: mypkg1 + slices: + myslice: + contents: + /path/ab*cd/file: + `, + "slices/mydir/mypkg2.yaml": ` + package: mypkg2 + slices: + myslice: + contents: + /path/ab12cd/file: + `, + }, + relerror: `slices mypkg1_myslice and mypkg2_myslice conflict on /path/ab\*cd/file and /path/ab12cd/file`, +}, { + summary: "Conflicting globs with trailing **", + input: map[string]string{ + "slices/mydir/mypkg1.yaml": ` + package: mypkg1 + slices: + myslice: + contents: + /path/ab*cd/**: + `, + "slices/mydir/mypkg2.yaml": ` + package: mypkg2 + slices: + myslice: + contents: + /path/ab12cd/: + `, + }, + relerror: `slices mypkg1_myslice and mypkg2_myslice conflict on /path/ab\*cd/\*\* and /path/ab12cd/`, +}, { + summary: "Conflicting globs when one side uses double-star", + input: map[string]string{ + "slices/mydir/mypkg1.yaml": ` + package: mypkg1 + slices: + myslice: + contents: + /path/**/file: + `, + "slices/mydir/mypkg2.yaml": ` + package: mypkg2 + slices: + myslice: + contents: + /path/*/file: + `, + }, + relerror: `slices mypkg1_myslice and mypkg2_myslice conflict on /path/\*\*/file and /path/\*/file`, +}, { + summary: "** checks all paths with the same prefix", + input: map[string]string{ + "slices/mydir/mypkg1.yaml": ` + package: mypkg1 + slices: + myslice: + contents: + /path/**/file: + `, + "slices/mydir/mypkg2.yaml": ` + package: mypkg2 + slices: + myslice: + contents: + /path/subdir/nope: + /path/subdir/file: + `, + }, + relerror: `slices mypkg1_myslice and mypkg2_myslice conflict on /path/\*\*/file and /path/subdir/file`, }, { summary: "Directories must be suffixed with /", input: map[string]string{ @@ -678,8 +797,22 @@ var setupTests = []setupTest{{ contents: /file/foob*r: /file/f*obar: + `, + }, +}, { + summary: "Glob conflicts with directory that also has children", + input: map[string]string{ + "slices/mydir/mypkg.yaml": ` + package: mypkg + slices: + myslice: + contents: + /a/: {make: true} + /a/b: + /a/*: `, }, + relerror: `slices mypkg_myslice and mypkg_myslice conflict on /a/ and /a/\*`, }, { summary: "Invalid glob options", input: map[string]string{ @@ -1943,6 +2076,25 @@ var setupTests = []setupTest{{ `, }, relerror: `slices mypkg_myslice1 and mypkg_myslice2 conflict on /path/subdir/\*\* and /path/\*\*`, +}, { + summary: "Generate paths conflict with glob paths sharing the prefix", + input: map[string]string{ + "slices/mydir/mypkg1.yaml": ` + package: mypkg1 + slices: + myslice: + contents: + /path/subdir/**: {generate: manifest} + `, + "slices/mydir/mypkg2.yaml": ` + package: mypkg2 + slices: + myslice: + contents: + /path/subdir/f*: + `, + }, + relerror: `slices mypkg1_myslice and mypkg2_myslice conflict on /path/subdir/\*\* and /path/subdir/f\*`, }, { summary: `No other options in "generate" paths`, input: map[string]string{