diff --git a/registry/storage/catalog.go b/registry/storage/catalog.go index 019d159ed..55500694d 100644 --- a/registry/storage/catalog.go +++ b/registry/storage/catalog.go @@ -3,32 +3,23 @@ package storage import ( "context" "errors" - "fmt" "io" "path" - "sort" "strings" "github.com/distribution/distribution/v3/reference" "github.com/distribution/distribution/v3/registry/storage/driver" ) -var ( - // ErrStopReposWalk is a sentinel error to indicate that the repository path walk was stopped. - ErrStopReposWalk = errors.New("stop repos walk") -) - -// Repositories populates the passed passed repos slice with repositories in the -// registry up to the capacity of the slice. Returns the number of repos returned and -// `io.EOF` if no more repositories are available. +// Returns a list, or partial list, of repositories in the registry. // Because it's a quite expensive operation, it should only be used when building up // an initial set of repositories. func (reg *registry) Repositories(ctx context.Context, repos []string, last string) (n int, err error) { var finishedWalk bool var foundRepos []string - if len(repos) == 0 && cap(repos) == 0 { - return 0, errors.New("no repos requested") + if len(repos) == 0 { + return 0, errors.New("no space in slice") } root, err := pathFor(repositoriesRootPathSpec{}) @@ -36,17 +27,20 @@ func (reg *registry) Repositories(ctx context.Context, repos []string, last stri return 0, err } - err = reg.walkRepos(ctx, root, last, func(repoPath string) error { - // this is placed before the append, - // so that we will get an extra repo if - // any. This ensures that we do not return - // io.EOF without it being the last record. - if len(foundRepos) == len(repos) { - finishedWalk = true - return ErrStopReposWalk + err = reg.blobStore.driver.Walk(ctx, root, func(fileInfo driver.FileInfo) error { + err := handleRepository(fileInfo, root, last, func(repoPath string) error { + foundRepos = append(foundRepos, repoPath) + return nil + }) + if err != nil { + return err } - foundRepos = append(foundRepos, repoPath) + // if we've filled our array, no need to walk any further + if len(foundRepos) == len(repos) { + finishedWalk = true + return driver.ErrSkipDir + } return nil }) @@ -70,7 +64,11 @@ func (reg *registry) Enumerate(ctx context.Context, ingester func(string) error) return err } - return reg.walkRepos(ctx, root, "", ingester) + err = reg.blobStore.driver.Walk(ctx, root, func(fileInfo driver.FileInfo) error { + return handleRepository(fileInfo, root, "", ingester) + }) + + return err } // Remove removes a repository from storage @@ -83,86 +81,78 @@ func (reg *registry) Remove(ctx context.Context, name reference.Named) error { return reg.driver.Delete(ctx, repoDir) } -// walkRepos walks paths rooted in root calling fn for each found repo path. -// Paths are walked in a lexical order which makes the output deterministic. -// If last is not an empty string it walks all repo paths. Otherwise -// it returns when last repoPath is found. -func (reg *registry) walkRepos(ctx context.Context, root, last string, fn func(repoPath string) error) error { - midFn := fn - - // middleware func to exclude the `last` repo - // only use it, if there is set a last. - if last != "" { - midFn = func(repoPath string) error { - if repoPath != last { - return fn(repoPath) - } - return nil - } - } - - // call our recursive func, with the midFn and the start path - // of where we want to find repositories. - err := reg.walkReposPath(ctx, root, root, last, midFn) - if err == ErrStopReposWalk { - return nil - } - return err +// lessPath returns true if one path a is less than path b. +// +// A component-wise comparison is done, rather than the lexical comparison of +// strings. +func lessPath(a, b string) bool { + // we provide this behavior by making separator always sort first. + return compareReplaceInline(a, b, '/', '\x00') < 0 } -// walkReposPath walks through all folders in `lookPath`, -// looking for repositories. See walkRepos for more detailed description. -func (reg *registry) walkReposPath(ctx context.Context, root, lookPath, last string, fn func(repoPath string) error) error { - // get children in the current path - children, err := reg.blobStore.driver.List(ctx, lookPath) - if err != nil { - return err +// compareReplaceInline modifies runtime.cmpstring to replace old with new +// during a byte-wise comparison. +func compareReplaceInline(s1, s2 string, old, new byte) int { + // TODO(stevvooe): We are missing an optimization when the s1 and s2 have + // the exact same slice header. It will make the code unsafe but can + // provide some extra performance. + + l := len(s1) + if len(s2) < l { + l = len(s2) } - // sort this, so that it will be added in the correct order - sort.Strings(children) - - if last != "" { - splitLast := strings.Split(last, "/") - - // call the next iteration of walkReposPath if any, but - // exclude the current one. - if len(splitLast) > 1 { - if err := reg.walkReposPath(ctx, root, lookPath+"/"+splitLast[0], strings.Join(splitLast[1:], "/"), fn); err != nil { - return err - } + for i := 0; i < l; i++ { + c1, c2 := s1[i], s2[i] + if c1 == old { + c1 = new } - // find current last path in our children - n := sort.SearchStrings(children, lookPath+"/"+splitLast[0]) - if n == len(children) || children[n] != lookPath+"/"+splitLast[0] { - return fmt.Errorf("%q repository not found", last) + if c2 == old { + c2 = new } - // if this is not a final `last` (there are more `/` left) - // then exclude the current index, else include it - if len(splitLast) > 1 { - children = children[n+1:] - } else { - children = children[n:] + if c1 < c2 { + return -1 + } + + if c1 > c2 { + return +1 } } - for _, child := range children { - _, file := path.Split(child) + if len(s1) < len(s2) { + return -1 + } - if file == "_manifests" { - if err := fn(strings.TrimPrefix(lookPath, root+"/")); err != nil { - if err == driver.ErrSkipDir { - break - } - return err - } - } else if !strings.HasPrefix(file, "_") { - if err := reg.walkReposPath(ctx, root, child, "", fn); err != nil { + if len(s1) > len(s2) { + return +1 + } + + return 0 +} + +// handleRepository calls function fn with a repository path if fileInfo +// has a path of a repository under root and that it is lexographically +// after last. Otherwise, it will return ErrSkipDir. This should be used +// with Walk to do handling with repositories in a storage. +func handleRepository(fileInfo driver.FileInfo, root, last string, fn func(repoPath string) error) error { + filePath := fileInfo.Path() + + // lop the base path off + repo := filePath[len(root)+1:] + + _, file := path.Split(repo) + if file == "_manifests" { + repo = strings.TrimSuffix(repo, "/_manifests") + if lessPath(last, repo) { + if err := fn(repo); err != nil { return err } } + return driver.ErrSkipDir + } else if strings.HasPrefix(file, "_") { + return driver.ErrSkipDir } return nil diff --git a/registry/storage/catalog_test.go b/registry/storage/catalog_test.go index 554365d97..14ba4e833 100644 --- a/registry/storage/catalog_test.go +++ b/registry/storage/catalog_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "io" + "math/rand" "testing" "github.com/distribution/distribution/v3" @@ -240,3 +241,85 @@ func TestCatalogWalkError(t *testing.T) { t.Errorf("Expected catalog driver list error") } } + +func BenchmarkPathCompareEqual(B *testing.B) { + B.StopTimer() + pp := randomPath(100) + // make a real copy + ppb := append([]byte{}, []byte(pp)...) + a, b := pp, string(ppb) + + B.StartTimer() + for i := 0; i < B.N; i++ { + lessPath(a, b) + } +} + +func BenchmarkPathCompareNotEqual(B *testing.B) { + B.StopTimer() + a, b := randomPath(100), randomPath(100) + B.StartTimer() + + for i := 0; i < B.N; i++ { + lessPath(a, b) + } +} + +func BenchmarkPathCompareNative(B *testing.B) { + B.StopTimer() + a, b := randomPath(100), randomPath(100) + B.StartTimer() + + for i := 0; i < B.N; i++ { + c := a < b + _ = c && false + } +} + +func BenchmarkPathCompareNativeEqual(B *testing.B) { + B.StopTimer() + pp := randomPath(100) + a, b := pp, pp + B.StartTimer() + + for i := 0; i < B.N; i++ { + c := a < b + _ = c && false + } +} + +var ( + filenameChars = []byte("abcdefghijklmnopqrstuvwxyz0123456789") + separatorChars = []byte("._-") +) + +func randomPath(length int64) string { + path := "/" + for int64(len(path)) < length { + chunkLength := rand.Int63n(length-int64(len(path))) + 1 + chunk := randomFilename(chunkLength) + path += chunk + remaining := length - int64(len(path)) + if remaining == 1 { + path += randomFilename(1) + } else if remaining > 1 { + path += "/" + } + } + return path +} + +func randomFilename(length int64) string { + b := make([]byte, length) + wasSeparator := true + for i := range b { + if !wasSeparator && i < len(b)-1 && rand.Intn(4) == 0 { + b[i] = separatorChars[rand.Intn(len(separatorChars))] + wasSeparator = true + } else { + b[i] = filenameChars[rand.Intn(len(filenameChars))] + wasSeparator = false + } + } + return string(b) +}