123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295 |
- // Package tarsum provides algorithms to perform checksum calculation on
- // filesystem layers.
- //
- // The transportation of filesystems, regarding Docker, is done with tar(1)
- // archives. There are a variety of tar serialization formats [2], and a key
- // concern here is ensuring a repeatable checksum given a set of inputs from a
- // generic tar archive. Types of transportation include distribution to and from a
- // registry endpoint, saving and loading through commands or Docker daemon APIs,
- // transferring the build context from client to Docker daemon, and committing the
- // filesystem of a container to become an image.
- //
- // As tar archives are used for transit, but not preserved in many situations, the
- // focus of the algorithm is to ensure the integrity of the preserved filesystem,
- // while maintaining a deterministic accountability. This includes neither
- // constraining the ordering or manipulation of the files during the creation or
- // unpacking of the archive, nor include additional metadata state about the file
- // system attributes.
- package tarsum
- import (
- "archive/tar"
- "bytes"
- "compress/gzip"
- "crypto"
- "crypto/sha256"
- "encoding/hex"
- "errors"
- "fmt"
- "hash"
- "io"
- "path"
- "strings"
- )
- const (
- buf8K = 8 * 1024
- buf16K = 16 * 1024
- buf32K = 32 * 1024
- )
- // NewTarSum creates a new interface for calculating a fixed time checksum of a
- // tar archive.
- //
- // This is used for calculating checksums of layers of an image, in some cases
- // including the byte payload of the image's json metadata as well, and for
- // calculating the checksums for buildcache.
- func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) {
- return NewTarSumHash(r, dc, v, DefaultTHash)
- }
- // NewTarSumHash creates a new TarSum, providing a THash to use rather than
- // the DefaultTHash.
- func NewTarSumHash(r io.Reader, dc bool, v Version, tHash THash) (TarSum, error) {
- headerSelector, err := getTarHeaderSelector(v)
- if err != nil {
- return nil, err
- }
- ts := &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v, headerSelector: headerSelector, tHash: tHash}
- err = ts.initTarSum()
- return ts, err
- }
- // NewTarSumForLabel creates a new TarSum using the provided TarSum version+hash label.
- func NewTarSumForLabel(r io.Reader, disableCompression bool, label string) (TarSum, error) {
- parts := strings.SplitN(label, "+", 2)
- if len(parts) != 2 {
- return nil, errors.New("tarsum label string should be of the form: {tarsum_version}+{hash_name}")
- }
- versionName, hashName := parts[0], parts[1]
- version, ok := tarSumVersionsByName[versionName]
- if !ok {
- return nil, fmt.Errorf("unknown TarSum version name: %q", versionName)
- }
- hashConfig, ok := standardHashConfigs[hashName]
- if !ok {
- return nil, fmt.Errorf("unknown TarSum hash name: %q", hashName)
- }
- tHash := NewTHash(hashConfig.name, hashConfig.hash.New)
- return NewTarSumHash(r, disableCompression, version, tHash)
- }
- // TarSum is the generic interface for calculating fixed time
- // checksums of a tar archive.
- type TarSum interface {
- io.Reader
- GetSums() FileInfoSums
- Sum([]byte) string
- Version() Version
- Hash() THash
- }
- // tarSum struct is the structure for a Version0 checksum calculation.
- type tarSum struct {
- io.Reader
- tarR *tar.Reader
- tarW *tar.Writer
- writer writeCloseFlusher
- bufTar *bytes.Buffer
- bufWriter *bytes.Buffer
- bufData []byte
- h hash.Hash
- tHash THash
- sums FileInfoSums
- fileCounter int64
- currentFile string
- finished bool
- first bool
- DisableCompression bool // false by default. When false, the output gzip compressed.
- tarSumVersion Version // this field is not exported so it can not be mutated during use
- headerSelector tarHeaderSelector // handles selecting and ordering headers for files in the archive
- }
- func (ts tarSum) Hash() THash {
- return ts.tHash
- }
- func (ts tarSum) Version() Version {
- return ts.tarSumVersion
- }
- // THash provides a hash.Hash type generator and its name.
- type THash interface {
- Hash() hash.Hash
- Name() string
- }
- // NewTHash is a convenience method for creating a THash.
- func NewTHash(name string, h func() hash.Hash) THash {
- return simpleTHash{n: name, h: h}
- }
- type tHashConfig struct {
- name string
- hash crypto.Hash
- }
- var (
- // NOTE: DO NOT include MD5 or SHA1, which are considered insecure.
- standardHashConfigs = map[string]tHashConfig{
- "sha256": {name: "sha256", hash: crypto.SHA256},
- "sha512": {name: "sha512", hash: crypto.SHA512},
- }
- )
- // DefaultTHash is default TarSum hashing algorithm - "sha256".
- var DefaultTHash = NewTHash("sha256", sha256.New)
- type simpleTHash struct {
- n string
- h func() hash.Hash
- }
- func (sth simpleTHash) Name() string { return sth.n }
- func (sth simpleTHash) Hash() hash.Hash { return sth.h() }
- func (ts *tarSum) encodeHeader(h *tar.Header) error {
- for _, elem := range ts.headerSelector.selectHeaders(h) {
- if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil {
- return err
- }
- }
- return nil
- }
- func (ts *tarSum) initTarSum() error {
- ts.bufTar = bytes.NewBuffer([]byte{})
- ts.bufWriter = bytes.NewBuffer([]byte{})
- ts.tarR = tar.NewReader(ts.Reader)
- ts.tarW = tar.NewWriter(ts.bufTar)
- if !ts.DisableCompression {
- ts.writer = gzip.NewWriter(ts.bufWriter)
- } else {
- ts.writer = &nopCloseFlusher{Writer: ts.bufWriter}
- }
- if ts.tHash == nil {
- ts.tHash = DefaultTHash
- }
- ts.h = ts.tHash.Hash()
- ts.h.Reset()
- ts.first = true
- ts.sums = FileInfoSums{}
- return nil
- }
- func (ts *tarSum) Read(buf []byte) (int, error) {
- if ts.finished {
- return ts.bufWriter.Read(buf)
- }
- if len(ts.bufData) < len(buf) {
- switch {
- case len(buf) <= buf8K:
- ts.bufData = make([]byte, buf8K)
- case len(buf) <= buf16K:
- ts.bufData = make([]byte, buf16K)
- case len(buf) <= buf32K:
- ts.bufData = make([]byte, buf32K)
- default:
- ts.bufData = make([]byte, len(buf))
- }
- }
- buf2 := ts.bufData[:len(buf)]
- n, err := ts.tarR.Read(buf2)
- if err != nil {
- if err == io.EOF {
- if _, err := ts.h.Write(buf2[:n]); err != nil {
- return 0, err
- }
- if !ts.first {
- ts.sums = append(ts.sums, fileInfoSum{name: ts.currentFile, sum: hex.EncodeToString(ts.h.Sum(nil)), pos: ts.fileCounter})
- ts.fileCounter++
- ts.h.Reset()
- } else {
- ts.first = false
- }
- currentHeader, err := ts.tarR.Next()
- if err != nil {
- if err == io.EOF {
- if err := ts.tarW.Close(); err != nil {
- return 0, err
- }
- if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
- return 0, err
- }
- if err := ts.writer.Close(); err != nil {
- return 0, err
- }
- ts.finished = true
- return n, nil
- }
- return n, err
- }
- ts.currentFile = path.Clean(currentHeader.Name)
- if err := ts.encodeHeader(currentHeader); err != nil {
- return 0, err
- }
- if err := ts.tarW.WriteHeader(currentHeader); err != nil {
- return 0, err
- }
- if _, err := ts.tarW.Write(buf2[:n]); err != nil {
- return 0, err
- }
- ts.tarW.Flush()
- if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
- return 0, err
- }
- ts.writer.Flush()
- return ts.bufWriter.Read(buf)
- }
- return n, err
- }
- // Filling the hash buffer
- if _, err = ts.h.Write(buf2[:n]); err != nil {
- return 0, err
- }
- // Filling the tar writer
- if _, err = ts.tarW.Write(buf2[:n]); err != nil {
- return 0, err
- }
- ts.tarW.Flush()
- // Filling the output writer
- if _, err = io.Copy(ts.writer, ts.bufTar); err != nil {
- return 0, err
- }
- ts.writer.Flush()
- return ts.bufWriter.Read(buf)
- }
- func (ts *tarSum) Sum(extra []byte) string {
- ts.sums.SortBySums()
- h := ts.tHash.Hash()
- if extra != nil {
- h.Write(extra)
- }
- for _, fis := range ts.sums {
- h.Write([]byte(fis.Sum()))
- }
- checksum := ts.Version().String() + "+" + ts.tHash.Name() + ":" + hex.EncodeToString(h.Sum(nil))
- return checksum
- }
- func (ts *tarSum) GetSums() FileInfoSums {
- return ts.sums
- }
|