1
0
mirror of https://github.com/distribution/distribution synced 2024-11-06 19:35:52 +01:00

Merge pull request #755 from stevvooe/digest-type

Move Digest type into discrete package
This commit is contained in:
Olivier Gambier 2014-11-19 17:02:54 -08:00
commit b65d8d046e
14 changed files with 735 additions and 309 deletions

@ -0,0 +1,95 @@
package testutil
import (
"archive/tar"
"bytes"
"crypto/rand"
"fmt"
"io"
"io/ioutil"
mrand "math/rand"
"time"
"github.com/docker/docker/pkg/tarsum"
)
// CreateRandomTarFile creates a random tarfile, returning it as an
// io.ReadSeeker along with its tarsum. An error is returned if there is a
// problem generating valid content.
func CreateRandomTarFile() (rs io.ReadSeeker, tarSum string, err error) {
nFiles := mrand.Intn(10) + 10
target := &bytes.Buffer{}
wr := tar.NewWriter(target)
// Perturb this on each iteration of the loop below.
header := &tar.Header{
Mode: 0644,
ModTime: time.Now(),
Typeflag: tar.TypeReg,
Uname: "randocalrissian",
Gname: "cloudcity",
AccessTime: time.Now(),
ChangeTime: time.Now(),
}
for fileNumber := 0; fileNumber < nFiles; fileNumber++ {
fileSize := mrand.Int63n(1<<20) + 1<<20
header.Name = fmt.Sprint(fileNumber)
header.Size = fileSize
if err := wr.WriteHeader(header); err != nil {
return nil, "", err
}
randomData := make([]byte, fileSize)
// Fill up the buffer with some random data.
n, err := rand.Read(randomData)
if n != len(randomData) {
return nil, "", fmt.Errorf("short read creating random reader: %v bytes != %v bytes", n, len(randomData))
}
if err != nil {
return nil, "", err
}
nn, err := io.Copy(wr, bytes.NewReader(randomData))
if nn != fileSize {
return nil, "", fmt.Errorf("short copy writing random file to tar")
}
if err != nil {
return nil, "", err
}
if err := wr.Flush(); err != nil {
return nil, "", err
}
}
if err := wr.Close(); err != nil {
return nil, "", err
}
reader := bytes.NewReader(target.Bytes())
// A tar builder that supports tarsum inline calculation would be awesome
// here.
ts, err := tarsum.NewTarSum(reader, true, tarsum.Version1)
if err != nil {
return nil, "", err
}
nn, err := io.Copy(ioutil.Discard, ts)
if nn != int64(len(target.Bytes())) {
return nil, "", fmt.Errorf("short copy when getting tarsum of random layer: %v != %v", nn, len(target.Bytes()))
}
if err != nil {
return nil, "", err
}
return bytes.NewReader(target.Bytes()), ts.Sum(nil), nil
}

146
digest/digest.go Normal file

@ -0,0 +1,146 @@
package digest
import (
"bytes"
"crypto/sha256"
"fmt"
"hash"
"io"
"io/ioutil"
"strings"
"github.com/docker/docker-registry/common"
"github.com/docker/docker/pkg/tarsum"
)
// Digest allows simple protection of hex formatted digest strings, prefixed
// by their algorithm. Strings of type Digest have some guarantee of being in
// the correct format and it provides quick access to the components of a
// digest string.
//
// The following is an example of the contents of Digest types:
//
// sha256:7173b809ca12ec5dee4506cd86be934c4596dd234ee82c0662eac04a8c2c71dc
//
// More important for this code base, this type is compatible with tarsum
// digests. For example, the following would be a valid Digest:
//
// tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b
//
// This allows to abstract the digest behind this type and work only in those
// terms.
type Digest string
// NewDigest returns a Digest from alg and a hash.Hash object.
func NewDigest(alg string, h hash.Hash) Digest {
return Digest(fmt.Sprintf("%s:%x", alg, h.Sum(nil)))
}
var (
// ErrDigestInvalidFormat returned when digest format invalid.
ErrDigestInvalidFormat = fmt.Errorf("invalid checksum digest format")
// ErrDigestUnsupported returned when the digest algorithm is unsupported by registry.
ErrDigestUnsupported = fmt.Errorf("unsupported digest algorithm")
)
// ParseDigest parses s and returns the validated digest object. An error will
// be returned if the format is invalid.
func ParseDigest(s string) (Digest, error) {
// Common case will be tarsum
_, err := common.ParseTarSum(s)
if err == nil {
return Digest(s), nil
}
// Continue on for general parser
i := strings.Index(s, ":")
if i < 0 {
return "", ErrDigestInvalidFormat
}
// case: "sha256:" with no hex.
if i+1 == len(s) {
return "", ErrDigestInvalidFormat
}
switch s[:i] {
case "md5", "sha1", "sha256":
break
default:
return "", ErrDigestUnsupported
}
return Digest(s), nil
}
// FromReader returns the most valid digest for the underlying content.
func FromReader(rd io.Reader) (Digest, error) {
// TODO(stevvooe): This is pretty inefficient to always be calculating a
// sha256 hash to provide fallback, but it provides some nice semantics in
// that we never worry about getting the right digest for a given reader.
// For the most part, we can detect tar vs non-tar with only a few bytes,
// so a scheme that saves those bytes would probably be better here.
h := sha256.New()
tr := io.TeeReader(rd, h)
ts, err := tarsum.NewTarSum(tr, true, tarsum.Version1)
if err != nil {
return "", err
}
// Try to copy from the tarsum, if we fail, copy the remaining bytes into
// hash directly.
if _, err := io.Copy(ioutil.Discard, ts); err != nil {
if err.Error() != "archive/tar: invalid tar header" {
return "", err
}
if _, err := io.Copy(h, rd); err != nil {
return "", err
}
return NewDigest("sha256", h), nil
}
d, err := ParseDigest(ts.Sum(nil))
if err != nil {
return "", err
}
return d, nil
}
// FromBytes digests the input and returns a Digest.
func FromBytes(p []byte) (Digest, error) {
return FromReader(bytes.NewReader(p))
}
// Algorithm returns the algorithm portion of the digest. This will panic if
// the underlying digest is not in a valid format.
func (d Digest) Algorithm() string {
return string(d[:d.sepIndex()])
}
// Hex returns the hex digest portion of the digest. This will panic if the
// underlying digest is not in a valid format.
func (d Digest) Hex() string {
return string(d[d.sepIndex()+1:])
}
func (d Digest) String() string {
return string(d)
}
func (d Digest) sepIndex() int {
i := strings.Index(string(d), ":")
if i < 0 {
panic("invalid digest: " + d)
}
return i
}

80
digest/digest_test.go Normal file

@ -0,0 +1,80 @@
package digest
import "testing"
func TestParseDigest(t *testing.T) {
for _, testcase := range []struct {
input string
err error
algorithm string
hex string
}{
{
input: "tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b",
algorithm: "tarsum+sha256",
hex: "e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b",
},
{
input: "tarsum.dev+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b",
algorithm: "tarsum.dev+sha256",
hex: "e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b",
},
{
input: "tarsum.v1+sha256:220a60ecd4a3c32c282622a625a54db9ba0ff55b5ba9c29c7064a2bc358b6a3e",
algorithm: "tarsum.v1+sha256",
hex: "220a60ecd4a3c32c282622a625a54db9ba0ff55b5ba9c29c7064a2bc358b6a3e",
},
{
input: "sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b",
algorithm: "sha256",
hex: "e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b",
},
{
input: "md5:d41d8cd98f00b204e9800998ecf8427e",
algorithm: "md5",
hex: "d41d8cd98f00b204e9800998ecf8427e",
},
{
// empty hex
input: "sha256:",
err: ErrDigestInvalidFormat,
},
{
// just hex
input: "d41d8cd98f00b204e9800998ecf8427e",
err: ErrDigestInvalidFormat,
},
{
input: "foo:d41d8cd98f00b204e9800998ecf8427e",
err: ErrDigestUnsupported,
},
} {
digest, err := ParseDigest(testcase.input)
if err != testcase.err {
t.Fatalf("error differed from expected while parsing %q: %v != %v", testcase.input, err, testcase.err)
}
if testcase.err != nil {
continue
}
if digest.Algorithm() != testcase.algorithm {
t.Fatalf("incorrect algorithm for parsed digest: %q != %q", digest.Algorithm(), testcase.algorithm)
}
if digest.Hex() != testcase.hex {
t.Fatalf("incorrect hex for parsed digest: %q != %q", digest.Hex(), testcase.hex)
}
// Parse string return value and check equality
newParsed, err := ParseDigest(digest.String())
if err != nil {
t.Fatalf("unexpected error parsing input %q: %v", testcase.input, err)
}
if newParsed != digest {
t.Fatalf("expected equal: %q != %q", newParsed, digest)
}
}
}

52
digest/doc.go Normal file

@ -0,0 +1,52 @@
// Package digest provides a generalized type to opaquely represent message
// digests and their operations within the registry. The Digest type is
// designed to serve as a flexible identifier in a content-addressable system.
// More importantly, it provides tools and wrappers to work with tarsums and
// hash.Hash-based digests with little effort.
//
// Basics
//
// The format of a digest is simply a string with two parts, dubbed the
// "algorithm" and the "digest", separated by a colon:
//
// <algorithm>:<digest>
//
// An example of a sha256 digest representation follows:
//
// sha256:7173b809ca12ec5dee4506cd86be934c4596dd234ee82c0662eac04a8c2c71dc
//
// In this case, the string "sha256" is the algorithm and the hex bytes are
// the "digest". A tarsum example will be more illustrative of the use case
// involved in the registry:
//
// tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b
//
// For this, we consider the algorithm to be "tarsum+sha256". Prudent
// applications will favor the ParseDigest function to verify the format over
// using simple type casts. However, a normal string can be cast as a digest
// with a simple type conversion:
//
// Digest("tarsum+sha256:e58fcf7418d4390dec8e8fb69d88c06ec07039d651fedd3aa72af9972e7d046b")
//
// Because the Digest type is simply a string, once a valid Digest is
// obtained, comparisons are cheap, quick and simple to express with the
// standard equality operator.
//
// Verification
//
// The main benefit of using the Digest type is simple verification against a
// given digest. The Verifier interface, modeled after the stdlib hash.Hash
// interface, provides a common write sink for digest verification. After
// writing is complete, calling the Verifier.Verified method will indicate
// whether or not the stream of bytes matches the target digest.
//
// Missing Features
//
// In addition to the above, we intend to add the following features to this
// package:
//
// 1. A Digester type that supports write sink digest calculation.
//
// 2. Suspend and resume of ongoing digest calculations to support efficient digest verification in the registry.
//
package digest

135
digest/verifiers.go Normal file

@ -0,0 +1,135 @@
package digest
import (
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"hash"
"io"
"io/ioutil"
"github.com/docker/docker/pkg/tarsum"
)
// Verifier presents a general verification interface to be used with message
// digests and other byte stream verifications. Users instantiate a Verifier
// from one of the various methods, write the data under test to it then check
// the result with the Verified method.
type Verifier interface {
io.Writer
// Verified will return true if the content written to Verifier matches
// the digest.
Verified() bool
// Planned methods:
// Err() error
// Reset()
}
// NewDigestVerifier returns a verifier that compares the written bytes
// against a passed in digest.
func NewDigestVerifier(d Digest) Verifier {
alg := d.Algorithm()
switch alg {
case "md5", "sha1", "sha256":
return hashVerifier{
hash: newHash(alg),
digest: d,
}
default:
// Assume we have a tarsum.
version, err := tarsum.GetVersionFromTarsum(string(d))
if err != nil {
panic(err) // Always assume valid tarsum at this point.
}
pr, pw := io.Pipe()
// TODO(stevvooe): We may actually want to ban the earlier versions of
// tarsum. That decision may not be the place of the verifier.
ts, err := tarsum.NewTarSum(pr, true, version)
if err != nil {
panic(err)
}
// TODO(sday): Ick! A goroutine per digest verification? We'll have to
// get the tarsum library to export an io.Writer variant.
go func() {
io.Copy(ioutil.Discard, ts)
pw.Close()
}()
return &tarsumVerifier{
digest: d,
ts: ts,
pr: pr,
pw: pw,
}
}
}
// NewLengthVerifier returns a verifier that returns true when the number of
// read bytes equals the expected parameter.
func NewLengthVerifier(expected int64) Verifier {
return &lengthVerifier{
expected: expected,
}
}
type lengthVerifier struct {
expected int64 // expected bytes read
len int64 // bytes read
}
func (lv *lengthVerifier) Write(p []byte) (n int, err error) {
n = len(p)
lv.len += int64(n)
return n, err
}
func (lv *lengthVerifier) Verified() bool {
return lv.expected == lv.len
}
func newHash(name string) hash.Hash {
switch name {
case "sha256":
return sha256.New()
case "sha1":
return sha1.New()
case "md5":
return md5.New()
default:
panic("unsupport algorithm: " + name)
}
}
type hashVerifier struct {
digest Digest
hash hash.Hash
}
func (hv hashVerifier) Write(p []byte) (n int, err error) {
return hv.hash.Write(p)
}
func (hv hashVerifier) Verified() bool {
return hv.digest == NewDigest(hv.digest.Algorithm(), hv.hash)
}
type tarsumVerifier struct {
digest Digest
ts tarsum.TarSum
pr *io.PipeReader
pw *io.PipeWriter
}
func (tv *tarsumVerifier) Write(p []byte) (n int, err error) {
return tv.pw.Write(p)
}
func (tv *tarsumVerifier) Verified() bool {
return tv.digest == Digest(tv.ts.Sum(nil))
}

71
digest/verifiers_test.go Normal file

@ -0,0 +1,71 @@
package digest
import (
"bytes"
"crypto/rand"
"io"
"os"
"testing"
"github.com/docker/docker-registry/common/testutil"
)
func TestDigestVerifier(t *testing.T) {
p := make([]byte, 1<<20)
rand.Read(p)
digest, err := FromBytes(p)
if err != nil {
t.Fatalf("unexpected error digesting bytes: %#v", err)
}
verifier := NewDigestVerifier(digest)
io.Copy(verifier, bytes.NewReader(p))
if !verifier.Verified() {
t.Fatalf("bytes not verified")
}
tf, tarSum, err := testutil.CreateRandomTarFile()
if err != nil {
t.Fatalf("error creating tarfile: %v", err)
}
digest, err = FromReader(tf)
if err != nil {
t.Fatalf("error digesting tarsum: %v", err)
}
if digest.String() != tarSum {
t.Fatalf("unexpected digest: %q != %q", digest.String(), tarSum)
}
expectedSize, _ := tf.Seek(0, os.SEEK_END) // Get tar file size
tf.Seek(0, os.SEEK_SET) // seek back
// This is the most relevant example for the registry application. It's
// effectively a read through pipeline, where the final sink is the digest
// verifier.
verifier = NewDigestVerifier(digest)
lengthVerifier := NewLengthVerifier(expectedSize)
rd := io.TeeReader(tf, lengthVerifier)
io.Copy(verifier, rd)
if !lengthVerifier.Verified() {
t.Fatalf("verifier detected incorrect length")
}
if !verifier.Verified() {
t.Fatalf("bytes not verified")
}
}
// TODO(stevvooe): Add benchmarks to measure bytes/second throughput for
// DigestVerifier. We should be tarsum/gzip limited for common cases but we
// want to verify this.
//
// The relevant benchmarks for comparison can be run with the following
// commands:
//
// go test -bench . crypto/sha1
// go test -bench . github.com/docker/docker/pkg/tarsum
//

@ -1,59 +0,0 @@
package storage
import (
"fmt"
"hash"
"strings"
)
// Digest allows simple protection of hex formatted digest strings, prefixed
// by their algorithm. Strings of type Digest have some guarantee of being in
// the correct format and it provides quick access to the components of a
// digest string.
//
// The following is an example of the contents of Digest types:
//
// sha256:7173b809ca12ec5dee4506cd86be934c4596dd234ee82c0662eac04a8c2c71dc
//
type Digest string
// NewDigest returns a Digest from alg and a hash.Hash object.
func NewDigest(alg string, h hash.Hash) Digest {
return Digest(fmt.Sprintf("%s:%x", alg, h.Sum(nil)))
}
var (
// ErrDigestInvalidFormat returned when digest format invalid.
ErrDigestInvalidFormat = fmt.Errorf("invalid checksum digest format")
// ErrDigestUnsupported returned when the digest algorithm is unsupported by registry.
ErrDigestUnsupported = fmt.Errorf("unsupported digest algorithm")
)
// ParseDigest parses s and returns the validated digest object. An error will
// be returned if the format is invalid.
func ParseDigest(s string) (Digest, error) {
parts := strings.SplitN(s, ":", 2)
if len(parts) != 2 {
return "", ErrDigestInvalidFormat
}
switch parts[0] {
case "sha256":
break
default:
return "", ErrDigestUnsupported
}
return Digest(s), nil
}
// Algorithm returns the algorithm portion of the digest.
func (d Digest) Algorithm() string {
return strings.SplitN(string(d), ":", 2)[0]
}
// Hex returns the hex digest portion of the digest.
func (d Digest) Hex() string {
return strings.SplitN(string(d), ":", 2)[1]
}

@ -4,24 +4,25 @@ import (
"fmt"
"io"
"time"
"github.com/docker/docker-registry/digest"
)
// LayerService provides operations on layer files in a backend storage.
type LayerService interface {
// Exists returns true if the layer exists.
Exists(tarSum string) (bool, error)
Exists(name string, digest digest.Digest) (bool, error)
// Fetch the layer identifed by TarSum.
Fetch(tarSum string) (Layer, error)
Fetch(name string, digest digest.Digest) (Layer, error)
// Upload begins a layer upload, returning a handle. If the layer upload
// is already in progress or the layer has already been uploaded, this
// will return an error.
Upload(name, tarSum string) (LayerUpload, error)
// Upload begins a layer upload to repository identified by name,
// returning a handle.
Upload(name string) (LayerUpload, error)
// Resume continues an in progress layer upload, returning the current
// state of the upload.
Resume(name, tarSum, uuid string) (LayerUpload, error)
Resume(uuid string) (LayerUpload, error)
}
// Layer provides a readable and seekable layer object. Typically,
@ -35,8 +36,9 @@ type Layer interface {
// Name returns the repository under which this layer is linked.
Name() string // TODO(stevvooe): struggling with nomenclature: should this be "repo" or "name"?
// TarSum returns the unique tarsum of the layer.
TarSum() string
// Digest returns the unique digest of the blob, which is the tarsum for
// layers.
Digest() digest.Digest
// CreatedAt returns the time this layer was created. Until we implement
// Stat call on storagedriver, this just returns the zero time.
@ -55,18 +57,13 @@ type LayerUpload interface {
// Name of the repository under which the layer will be linked.
Name() string
// TarSum identifier of the proposed layer. Resulting data must match this
// tarsum.
TarSum() string
// Offset returns the position of the last byte written to this layer.
Offset() int64
// Finish marks the upload as completed, returning a valid handle to the
// uploaded layer. The final size and checksum are validated against the
// contents of the uploaded layer. The checksum should be provided in the
// format <algorithm>:<hex digest>.
Finish(size int64, digest string) (Layer, error)
// uploaded layer. The final size and digest are validated against the
// contents of the uploaded layer.
Finish(size int64, digest digest.Digest) (Layer, error)
// Cancel the layer upload process.
Cancel() error
@ -85,11 +82,8 @@ var (
// ErrLayerUploadUnknown returned when upload is not found.
ErrLayerUploadUnknown = fmt.Errorf("layer upload unknown")
// ErrLayerInvalidChecksum returned when checksum/digest check fails.
ErrLayerInvalidChecksum = fmt.Errorf("invalid layer checksum")
// ErrLayerInvalidTarsum returned when tarsum check fails.
ErrLayerInvalidTarsum = fmt.Errorf("invalid layer tarsum")
// ErrLayerInvalidDigest returned when tarsum check fails.
ErrLayerInvalidDigest = fmt.Errorf("invalid layer digest")
// ErrLayerInvalidLength returned when length check fails.
ErrLayerInvalidLength = fmt.Errorf("invalid layer length")

@ -1,20 +1,16 @@
package storage
import (
"archive/tar"
"bytes"
"crypto/rand"
"crypto/sha256"
"fmt"
"io"
"io/ioutil"
mrand "math/rand"
"os"
"testing"
"time"
"github.com/docker/docker/pkg/tarsum"
"github.com/docker/docker-registry/common/testutil"
"github.com/docker/docker-registry/digest"
"github.com/docker/docker-registry/storagedriver"
"github.com/docker/docker-registry/storagedriver/inmemory"
)
@ -22,12 +18,14 @@ import (
// TestSimpleLayerUpload covers the layer upload process, exercising common
// error paths that might be seen during an upload.
func TestSimpleLayerUpload(t *testing.T) {
randomDataReader, tarSum, err := createRandomReader()
randomDataReader, tarSumStr, err := testutil.CreateRandomTarFile()
if err != nil {
t.Fatalf("error creating random reader: %v", err)
}
dgst := digest.Digest(tarSumStr)
uploadStore, err := newTemporaryLocalFSLayerUploadStore()
if err != nil {
t.Fatalf("error allocating upload store: %v", err)
@ -48,7 +46,7 @@ func TestSimpleLayerUpload(t *testing.T) {
h := sha256.New()
rd := io.TeeReader(randomDataReader, h)
layerUpload, err := ls.Upload(imageName, tarSum)
layerUpload, err := ls.Upload(imageName)
if err != nil {
t.Fatalf("unexpected error starting layer upload: %s", err)
@ -60,13 +58,13 @@ func TestSimpleLayerUpload(t *testing.T) {
}
// Do a resume, get unknown upload
layerUpload, err = ls.Resume(imageName, tarSum, layerUpload.UUID())
layerUpload, err = ls.Resume(layerUpload.UUID())
if err != ErrLayerUploadUnknown {
t.Fatalf("unexpected error resuming upload, should be unkown: %v", err)
}
// Restart!
layerUpload, err = ls.Upload(imageName, tarSum)
layerUpload, err = ls.Upload(imageName)
if err != nil {
t.Fatalf("unexpected error starting layer upload: %s", err)
}
@ -92,25 +90,25 @@ func TestSimpleLayerUpload(t *testing.T) {
layerUpload.Close()
// Do a resume, for good fun
layerUpload, err = ls.Resume(imageName, tarSum, layerUpload.UUID())
layerUpload, err = ls.Resume(layerUpload.UUID())
if err != nil {
t.Fatalf("unexpected error resuming upload: %v", err)
}
digest := NewDigest("sha256", h)
layer, err := layerUpload.Finish(randomDataSize, string(digest))
sha256Digest := digest.NewDigest("sha256", h)
layer, err := layerUpload.Finish(randomDataSize, dgst)
if err != nil {
t.Fatalf("unexpected error finishing layer upload: %v", err)
}
// After finishing an upload, it should no longer exist.
if _, err := ls.Resume(imageName, tarSum, layerUpload.UUID()); err != ErrLayerUploadUnknown {
if _, err := ls.Resume(layerUpload.UUID()); err != ErrLayerUploadUnknown {
t.Fatalf("expected layer upload to be unknown, got %v", err)
}
// Test for existence.
exists, err := ls.Exists(layer.TarSum())
exists, err := ls.Exists(layer.Name(), layer.Digest())
if err != nil {
t.Fatalf("unexpected error checking for existence: %v", err)
}
@ -129,8 +127,8 @@ func TestSimpleLayerUpload(t *testing.T) {
t.Fatalf("incorrect read length")
}
if NewDigest("sha256", h) != digest {
t.Fatalf("unexpected digest from uploaded layer: %q != %q", NewDigest("sha256", h), digest)
if digest.NewDigest("sha256", h) != sha256Digest {
t.Fatalf("unexpected digest from uploaded layer: %q != %q", digest.NewDigest("sha256", h), sha256Digest)
}
}
@ -148,13 +146,15 @@ func TestSimpleLayerRead(t *testing.T) {
},
}
randomLayerReader, tarSum, err := createRandomReader()
randomLayerReader, tarSumStr, err := testutil.CreateRandomTarFile()
if err != nil {
t.Fatalf("error creating random data: %v", err)
}
dgst := digest.Digest(tarSumStr)
// Test for existence.
exists, err := ls.Exists(tarSum)
exists, err := ls.Exists(imageName, dgst)
if err != nil {
t.Fatalf("unexpected error checking for existence: %v", err)
}
@ -164,7 +164,7 @@ func TestSimpleLayerRead(t *testing.T) {
}
// Try to get the layer and make sure we get a not found error
layer, err := ls.Fetch(tarSum)
layer, err := ls.Fetch(imageName, dgst)
if err == nil {
t.Fatalf("error expected fetching unknown layer")
}
@ -174,8 +174,7 @@ func TestSimpleLayerRead(t *testing.T) {
} else {
err = nil
}
randomLayerDigest, err := writeTestLayer(driver, ls.pathMapper, imageName, tarSum, randomLayerReader)
randomLayerDigest, err := writeTestLayer(driver, ls.pathMapper, imageName, dgst, randomLayerReader)
if err != nil {
t.Fatalf("unexpected error writing test layer: %v", err)
}
@ -185,7 +184,7 @@ func TestSimpleLayerRead(t *testing.T) {
t.Fatalf("error getting seeker size for random layer: %v", err)
}
layer, err = ls.Fetch(tarSum)
layer, err = ls.Fetch(imageName, dgst)
if err != nil {
t.Fatal(err)
}
@ -202,9 +201,9 @@ func TestSimpleLayerRead(t *testing.T) {
t.Fatalf("stored incorrect number of bytes in layer: %d != %d", nn, randomLayerSize)
}
digest := NewDigest("sha256", h)
if digest != randomLayerDigest {
t.Fatalf("fetched digest does not match: %q != %q", digest, randomLayerDigest)
sha256Digest := digest.NewDigest("sha256", h)
if sha256Digest != randomLayerDigest {
t.Fatalf("fetched digest does not match: %q != %q", sha256Digest, randomLayerDigest)
}
// Now seek back the layer, read the whole thing and check against randomLayerData
@ -270,12 +269,14 @@ func TestLayerReadErrors(t *testing.T) {
// writeRandomLayer creates a random layer under name and tarSum using driver
// and pathMapper. An io.ReadSeeker with the data is returned, along with the
// sha256 hex digest.
func writeRandomLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper, name string) (rs io.ReadSeeker, tarSum string, digest Digest, err error) {
reader, tarSum, err := createRandomReader()
func writeRandomLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper, name string) (rs io.ReadSeeker, tarSum digest.Digest, sha256digest digest.Digest, err error) {
reader, tarSumStr, err := testutil.CreateRandomTarFile()
if err != nil {
return nil, "", "", err
}
tarSum = digest.Digest(tarSumStr)
// Now, actually create the layer.
randomLayerDigest, err := writeTestLayer(driver, pathMapper, name, tarSum, ioutil.NopCloser(reader))
@ -312,91 +313,10 @@ func seekerSize(seeker io.ReadSeeker) (int64, error) {
return end, nil
}
// createRandomReader returns a random read seeker and its tarsum. The
// returned content will be a valid tar file with a random number of files and
// content.
func createRandomReader() (rs io.ReadSeeker, tarSum string, err error) {
nFiles := mrand.Intn(10) + 10
target := &bytes.Buffer{}
wr := tar.NewWriter(target)
// Perturb this on each iteration of the loop below.
header := &tar.Header{
Mode: 0644,
ModTime: time.Now(),
Typeflag: tar.TypeReg,
Uname: "randocalrissian",
Gname: "cloudcity",
AccessTime: time.Now(),
ChangeTime: time.Now(),
}
for fileNumber := 0; fileNumber < nFiles; fileNumber++ {
fileSize := mrand.Int63n(1<<20) + 1<<20
header.Name = fmt.Sprint(fileNumber)
header.Size = fileSize
if err := wr.WriteHeader(header); err != nil {
return nil, "", err
}
randomData := make([]byte, fileSize)
// Fill up the buffer with some random data.
n, err := rand.Read(randomData)
if n != len(randomData) {
return nil, "", fmt.Errorf("short read creating random reader: %v bytes != %v bytes", n, len(randomData))
}
if err != nil {
return nil, "", err
}
nn, err := io.Copy(wr, bytes.NewReader(randomData))
if nn != fileSize {
return nil, "", fmt.Errorf("short copy writing random file to tar")
}
if err != nil {
return nil, "", err
}
if err := wr.Flush(); err != nil {
return nil, "", err
}
}
if err := wr.Close(); err != nil {
return nil, "", err
}
reader := bytes.NewReader(target.Bytes())
// A tar builder that supports tarsum inline calculation would be awesome
// here.
ts, err := tarsum.NewTarSum(reader, true, tarsum.Version1)
if err != nil {
return nil, "", err
}
nn, err := io.Copy(ioutil.Discard, ts)
if nn != int64(len(target.Bytes())) {
return nil, "", fmt.Errorf("short copy when getting tarsum of random layer: %v != %v", nn, len(target.Bytes()))
}
if err != nil {
return nil, "", err
}
return bytes.NewReader(target.Bytes()), ts.Sum(nil), nil
}
// createTestLayer creates a simple test layer in the provided driver under
// tarsum, returning the string digest. This is implemented peicemeal and
// should probably be replaced by the uploader when it's ready.
func writeTestLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper, name, tarSum string, content io.Reader) (Digest, error) {
// tarsum dgst, returning the sha256 digest location. This is implemented
// peicemeal and should probably be replaced by the uploader when it's ready.
func writeTestLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper, name string, dgst digest.Digest, content io.Reader) (digest.Digest, error) {
h := sha256.New()
rd := io.TeeReader(content, h)
@ -406,11 +326,11 @@ func writeTestLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper,
return "", nil
}
digest := NewDigest("sha256", h)
blobDigestSHA := digest.NewDigest("sha256", h)
blobPath, err := pathMapper.path(blobPathSpec{
alg: digest.Algorithm(),
digest: digest.Hex(),
alg: blobDigestSHA.Algorithm(),
digest: blobDigestSHA.Hex(),
})
if err := driver.PutContent(blobPath, p); err != nil {
@ -418,7 +338,7 @@ func writeTestLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper,
}
layerIndexLinkPath, err := pathMapper.path(layerIndexLinkPathSpec{
tarSum: tarSum,
digest: dgst,
})
if err != nil {
@ -427,18 +347,14 @@ func writeTestLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper,
layerLinkPath, err := pathMapper.path(layerLinkPathSpec{
name: name,
tarSum: tarSum,
digest: dgst,
})
if err != nil {
return "", err
}
if err != nil {
return "", err
}
if err := driver.PutContent(layerLinkPath, []byte(string(NewDigest("sha256", h)))); err != nil {
if err := driver.PutContent(layerLinkPath, []byte(blobDigestSHA.String())); err != nil {
return "", nil
}
@ -446,5 +362,5 @@ func writeTestLayer(driver storagedriver.StorageDriver, pathMapper *pathMapper,
return "", nil
}
return NewDigest("sha256", h), err
return blobDigestSHA, err
}

@ -6,6 +6,8 @@ import (
"io"
"os"
"time"
"github.com/docker/docker-registry/digest"
)
// layerReadSeeker implements Layer and provides facilities for reading and
@ -16,7 +18,7 @@ type layerReader struct {
brd *bufio.Reader
name string // repo name of this layer
tarSum string
digest digest.Digest
path string
createdAt time.Time
@ -35,8 +37,8 @@ func (lrs *layerReader) Name() string {
return lrs.name
}
func (lrs *layerReader) TarSum() string {
return lrs.tarSum
func (lrs *layerReader) Digest() digest.Digest {
return lrs.digest
}
func (lrs *layerReader) CreatedAt() time.Time {

@ -6,6 +6,7 @@ import (
"time"
"github.com/Sirupsen/logrus"
"github.com/docker/docker-registry/digest"
"github.com/docker/docker-registry/storagedriver"
)
@ -15,10 +16,10 @@ type layerStore struct {
uploadStore layerUploadStore
}
func (ls *layerStore) Exists(tarSum string) (bool, error) {
func (ls *layerStore) Exists(name string, digest digest.Digest) (bool, error) {
// Because this implementation just follows blob links, an existence check
// is pretty cheap by starting and closing a fetch.
_, err := ls.Fetch(tarSum)
_, err := ls.Fetch(name, digest)
if err != nil {
if err == ErrLayerUnknown {
@ -31,8 +32,8 @@ func (ls *layerStore) Exists(tarSum string) (bool, error) {
return true, nil
}
func (ls *layerStore) Fetch(tarSum string) (Layer, error) {
repos, err := ls.resolveContainingRepositories(tarSum)
func (ls *layerStore) Fetch(name string, digest digest.Digest) (Layer, error) {
repos, err := ls.resolveContainingRepositories(digest)
if err != nil {
// TODO(stevvooe): Unknown tarsum error: need to wrap.
@ -44,7 +45,7 @@ func (ls *layerStore) Fetch(tarSum string) (Layer, error) {
// against the list of repos to which we have pull access. The argument
// repos needs to be filtered against that access list.
name, blobPath, err := ls.resolveBlobPath(repos, tarSum)
_, blobPath, err := ls.resolveBlobPath(repos, digest)
if err != nil {
// TODO(stevvooe): Map this error correctly, perhaps in the callee.
@ -72,7 +73,7 @@ func (ls *layerStore) Fetch(tarSum string) (Layer, error) {
layerStore: ls,
path: p,
name: name,
tarSum: tarSum,
digest: digest,
// TODO(stevvooe): Storage backend does not support modification time
// queries yet. Layers "never" change, so just return the zero value.
@ -88,25 +89,13 @@ func (ls *layerStore) Fetch(tarSum string) (Layer, error) {
// Upload begins a layer upload, returning a handle. If the layer upload
// is already in progress or the layer has already been uploaded, this
// will return an error.
func (ls *layerStore) Upload(name, tarSum string) (LayerUpload, error) {
exists, err := ls.Exists(tarSum)
if err != nil {
return nil, err
}
if exists {
// TODO(stevvoe): This looks simple now, but we really should only
// return the layer exists error when the layer exists AND the current
// client has access to the layer. If the client doesn't have access
// to the layer, the upload should proceed.
return nil, ErrLayerExists
}
func (ls *layerStore) Upload(name string) (LayerUpload, error) {
// NOTE(stevvooe): Consider the issues with allowing concurrent upload of
// the same two layers. Should it be disallowed? For now, we allow both
// parties to proceed and the the first one uploads the layer.
lus, err := ls.uploadStore.New(name, tarSum)
lus, err := ls.uploadStore.New(name)
if err != nil {
return nil, err
}
@ -116,7 +105,7 @@ func (ls *layerStore) Upload(name, tarSum string) (LayerUpload, error) {
// Resume continues an in progress layer upload, returning the current
// state of the upload.
func (ls *layerStore) Resume(name, tarSum, uuid string) (LayerUpload, error) {
func (ls *layerStore) Resume(uuid string) (LayerUpload, error) {
lus, err := ls.uploadStore.GetState(uuid)
if err != nil {
@ -135,9 +124,9 @@ func (ls *layerStore) newLayerUpload(lus LayerUploadState) LayerUpload {
}
}
func (ls *layerStore) resolveContainingRepositories(tarSum string) ([]string, error) {
func (ls *layerStore) resolveContainingRepositories(digest digest.Digest) ([]string, error) {
// Lookup the layer link in the index by tarsum id.
layerIndexLinkPath, err := ls.pathMapper.path(layerIndexLinkPathSpec{tarSum: tarSum})
layerIndexLinkPath, err := ls.pathMapper.path(layerIndexLinkPathSpec{digest: digest})
if err != nil {
return nil, err
}
@ -164,10 +153,10 @@ func (ls *layerStore) resolveContainingRepositories(tarSum string) ([]string, er
// resolveBlobId lookups up the tarSum in the various repos to find the blob
// link, returning the repo name and blob path spec or an error on failure.
func (ls *layerStore) resolveBlobPath(repos []string, tarSum string) (name string, bps blobPathSpec, err error) {
func (ls *layerStore) resolveBlobPath(repos []string, digest digest.Digest) (name string, bps blobPathSpec, err error) {
for _, repo := range repos {
pathSpec := layerLinkPathSpec{name: repo, tarSum: tarSum}
pathSpec := layerLinkPathSpec{name: repo, digest: digest}
layerLinkPath, err := ls.pathMapper.path(pathSpec)
if err != nil {
@ -199,5 +188,5 @@ func (ls *layerStore) resolveBlobPath(repos []string, tarSum string) (name strin
// TODO(stevvooe): Map this error to repo not found, but it basically
// means we exited the loop above without finding a blob link.
return "", bps, fmt.Errorf("unable to resolve blog id for repos=%v and tarSum=%q", repos, tarSum)
return "", bps, fmt.Errorf("unable to resolve blog id for repos=%v and digest=%v", repos, digest)
}

@ -1,7 +1,6 @@
package storage
import (
"crypto/sha256"
"encoding/json"
"fmt"
"io/ioutil"
@ -12,6 +11,7 @@ import (
"code.google.com/p/go-uuid/uuid"
"github.com/docker/docker-registry/digest"
"github.com/docker/docker-registry/storagedriver"
"github.com/docker/docker/pkg/tarsum"
@ -23,11 +23,6 @@ type LayerUploadState struct {
// name is the primary repository under which the layer will be linked.
Name string
// tarSum identifies the target layer. Provided by the client. If the
// resulting tarSum does not match this value, an error should be
// returned.
TarSum string
// UUID identifies the upload.
UUID string
@ -64,7 +59,7 @@ type layerFile interface {
// uploads. This interface will definitely change and will most likely end up
// being exported to the app layer. Move the layer.go when it's ready to go.
type layerUploadStore interface {
New(name, tarSum string) (LayerUploadState, error)
New(name string) (LayerUploadState, error)
Open(uuid string) (layerFile, error)
GetState(uuid string) (LayerUploadState, error)
SaveState(lus LayerUploadState) error
@ -78,12 +73,6 @@ func (luc *layerUploadController) Name() string {
return luc.LayerUploadState.Name
}
// TarSum identifier of the proposed layer. Resulting data must match this
// tarsum.
func (luc *layerUploadController) TarSum() string {
return luc.LayerUploadState.TarSum
}
// UUID returns the identifier for this upload.
func (luc *layerUploadController) UUID() string {
return luc.LayerUploadState.UUID
@ -98,7 +87,7 @@ func (luc *layerUploadController) Offset() int64 {
// uploaded layer. The final size and checksum are validated against the
// contents of the uploaded layer. The checksum should be provided in the
// format <algorithm>:<hex digest>.
func (luc *layerUploadController) Finish(size int64, digestStr string) (Layer, error) {
func (luc *layerUploadController) Finish(size int64, digest digest.Digest) (Layer, error) {
// This section is going to be pretty ugly now. We will have to read the
// file twice. First, to get the tarsum and checksum. When those are
@ -115,16 +104,11 @@ func (luc *layerUploadController) Finish(size int64, digestStr string) (Layer, e
return nil, err
}
digest, err := ParseDigest(digestStr)
digest, err = luc.validateLayer(fp, size, digest)
if err != nil {
return nil, err
}
if err := luc.validateLayer(fp, size, digest); err != nil {
// Cleanup?
return nil, err
}
if err := luc.writeLayer(fp, size, digest); err != nil {
// Cleanup?
return nil, err
@ -142,7 +126,7 @@ func (luc *layerUploadController) Finish(size int64, digestStr string) (Layer, e
return nil, err
}
return luc.layerStore.Fetch(luc.TarSum())
return luc.layerStore.Fetch(luc.Name(), digest)
}
// Cancel the layer upload process.
@ -239,69 +223,69 @@ func (luc *layerUploadController) reset() {
}
// validateLayer runs several checks on the layer file to ensure its validity.
// This is currently very expensive and relies on fast io and fast seek.
func (luc *layerUploadController) validateLayer(fp layerFile, size int64, digest Digest) error {
// This is currently very expensive and relies on fast io and fast seek on the
// local host. If successful, the latest digest is returned, which should be
// used over the passed in value.
func (luc *layerUploadController) validateLayer(fp layerFile, size int64, dgst digest.Digest) (digest.Digest, error) {
// First, check the incoming tarsum version of the digest.
version, err := tarsum.GetVersionFromTarsum(dgst.String())
if err != nil {
return "", err
}
// TODO(stevvooe): Should we push this down into the digest type?
switch version {
case tarsum.Version1:
default:
// version 0 and dev, for now.
return "", ErrLayerTarSumVersionUnsupported
}
digestVerifier := digest.NewDigestVerifier(dgst)
lengthVerifier := digest.NewLengthVerifier(size)
// First, seek to the end of the file, checking the size is as expected.
end, err := fp.Seek(0, os.SEEK_END)
if err != nil {
return err
return "", err
}
if end != size {
return ErrLayerInvalidLength
// Fast path length check.
return "", ErrLayerInvalidLength
}
// Now seek back to start and take care of tarsum and checksum.
// Now seek back to start and take care of the digest.
if _, err := fp.Seek(0, os.SEEK_SET); err != nil {
return err
return "", err
}
version, err := tarsum.GetVersionFromTarsum(luc.TarSum())
tr := io.TeeReader(fp, lengthVerifier)
tr = io.TeeReader(tr, digestVerifier)
// TODO(stevvooe): This is one of the places we need a Digester write
// sink. Instead, its read driven. This migth be okay.
// Calculate an updated digest with the latest version.
dgst, err = digest.FromReader(tr)
if err != nil {
return ErrLayerTarSumVersionUnsupported
return "", err
}
// // We only support tarsum version 1 for now.
if version != tarsum.Version1 {
return ErrLayerTarSumVersionUnsupported
if !lengthVerifier.Verified() {
return "", ErrLayerInvalidLength
}
ts, err := tarsum.NewTarSum(fp, true, tarsum.Version1)
if err != nil {
return err
if !digestVerifier.Verified() {
return "", ErrLayerInvalidDigest
}
h := sha256.New()
// Pull the layer file through by writing it to a checksum.
nn, err := io.Copy(h, ts)
if nn != int64(size) {
return fmt.Errorf("bad read while finishing upload(%s) %v: %v != %v, err=%v", luc.UUID(), fp, nn, size, err)
}
if err != nil && err != io.EOF {
return err
}
calculatedDigest := NewDigest("sha256", h)
// Compare the digests!
if digest != calculatedDigest {
return ErrLayerInvalidChecksum
}
// Compare the tarsums!
if ts.Sum(nil) != luc.TarSum() {
return ErrLayerInvalidTarsum
}
return nil
return dgst, nil
}
// writeLayer actually writes the the layer file into its final destination.
// The layer should be validated before commencing the write.
func (luc *layerUploadController) writeLayer(fp layerFile, size int64, digest Digest) error {
func (luc *layerUploadController) writeLayer(fp layerFile, size int64, digest digest.Digest) error {
blobPath, err := luc.layerStore.pathMapper.path(blobPathSpec{
alg: digest.Algorithm(),
digest: digest.Hex(),
@ -342,10 +326,10 @@ func (luc *layerUploadController) writeLayer(fp layerFile, size int64, digest Di
// linkLayer links a valid, written layer blog into the registry, first
// linking the repository namespace, then adding it to the layerindex.
func (luc *layerUploadController) linkLayer(digest Digest) error {
func (luc *layerUploadController) linkLayer(digest digest.Digest) error {
layerLinkPath, err := luc.layerStore.pathMapper.path(layerLinkPathSpec{
name: luc.Name(),
tarSum: luc.TarSum(),
digest: digest,
})
if err != nil {
@ -358,7 +342,7 @@ func (luc *layerUploadController) linkLayer(digest Digest) error {
// Link the layer into the name index.
layerIndexLinkPath, err := luc.layerStore.pathMapper.path(layerIndexLinkPathSpec{
tarSum: luc.TarSum(),
digest: digest,
})
if err != nil {
@ -435,11 +419,10 @@ func newTemporaryLocalFSLayerUploadStore() (layerUploadStore, error) {
}, nil
}
func (llufs *localFSLayerUploadStore) New(name, tarSum string) (LayerUploadState, error) {
func (llufs *localFSLayerUploadStore) New(name string) (LayerUploadState, error) {
lus := LayerUploadState{
Name: name,
TarSum: tarSum,
UUID: uuid.New(),
Name: name,
UUID: uuid.New(),
}
if err := os.Mkdir(llufs.path(lus.UUID, ""), 0755); err != nil {

@ -3,12 +3,19 @@ package storage
import (
"fmt"
"path"
"strings"
"github.com/docker/docker-registry/common"
"github.com/docker/docker-registry/digest"
)
const storagePathVersion = "v2"
// TODO(sday): This needs to be changed: all layers for an image will be
// linked under the repository. Lookup from tarsum to name is not necessary,
// so we can remove the layer index. For this to properly work, image push
// must link the images layers under the repo.
// pathMapper maps paths based on "object names" and their ids. The "object
// names" mapped by pathMapper are internal to the storage system.
//
@ -79,7 +86,12 @@ func (pm *pathMapper) path(spec pathSpec) (string, error) {
switch v := spec.(type) {
case layerLinkPathSpec:
tsi, err := common.ParseTarSum(v.tarSum)
if !strings.HasPrefix(v.digest.Algorithm(), "tarsum") {
// Only tarsum is supported, for now
return "", fmt.Errorf("unsupport content digest: %v", v.digest)
}
tsi, err := common.ParseTarSum(v.digest.String())
if err != nil {
// TODO(sday): This will return an InvalidTarSumError from
@ -93,7 +105,12 @@ func (pm *pathMapper) path(spec pathSpec) (string, error) {
return p, nil
case layerIndexLinkPathSpec:
tsi, err := common.ParseTarSum(v.tarSum)
if !strings.HasPrefix(v.digest.Algorithm(), "tarsum") {
// Only tarsum is supported, for now
return "", fmt.Errorf("unsupport content digest: %v", v.digest)
}
tsi, err := common.ParseTarSum(v.digest.String())
if err != nil {
// TODO(sday): This will return an InvalidTarSumError from
@ -136,7 +153,7 @@ type pathSpec interface {
// sha256 that can be fetched from the blob store.
type layerLinkPathSpec struct {
name string
tarSum string
digest digest.Digest
}
func (layerLinkPathSpec) pathSpec() {}
@ -152,7 +169,7 @@ func (layerLinkPathSpec) pathSpec() {}
// library/ubuntu repository. The storage layer should access the tarsum from
// the first repository to which the client has access.
type layerIndexLinkPathSpec struct {
tarSum string
digest digest.Digest
}
func (layerIndexLinkPathSpec) pathSpec() {}
@ -160,6 +177,7 @@ func (layerIndexLinkPathSpec) pathSpec() {}
// blobPath contains the path for the registry global blob store. For now,
// this contains layer data, exclusively.
type blobPathSpec struct {
// TODO(stevvooe): Port this to make better use of Digest type.
alg string
digest string
}

@ -1,6 +1,10 @@
package storage
import "testing"
import (
"testing"
"github.com/docker/docker-registry/digest"
)
func TestPathMapper(t *testing.T) {
pm := &pathMapper{
@ -15,13 +19,13 @@ func TestPathMapper(t *testing.T) {
{
spec: layerLinkPathSpec{
name: "foo/bar",
tarSum: "tarsum.v1+test:abcdef",
digest: digest.Digest("tarsum.v1+test:abcdef"),
},
expected: "/pathmapper-test/repositories/foo/bar/layers/tarsum/v1/test/abcdef",
},
{
spec: layerIndexLinkPathSpec{
tarSum: "tarsum.v1+test:abcdef",
digest: digest.Digest("tarsum.v1+test:abcdef"),
},
expected: "/pathmapper-test/layerindex/tarsum/v1/test/abcdef",
},