tarsum.go 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. // Package tarsum provides algorithms to perform checksum calculation on
  2. // filesystem layers.
  3. //
  4. // The transportation of filesystems, regarding Docker, is done with tar(1)
  5. // archives. There are a variety of tar serialization formats [2], and a key
  6. // concern here is ensuring a repeatable checksum given a set of inputs from a
  7. // generic tar archive. Types of transportation include distribution to and from a
  8. // registry endpoint, saving and loading through commands or Docker daemon APIs,
  9. // transferring the build context from client to Docker daemon, and committing the
  10. // filesystem of a container to become an image.
  11. //
  12. // As tar archives are used for transit, but not preserved in many situations, the
  13. // focus of the algorithm is to ensure the integrity of the preserved filesystem,
  14. // while maintaining a deterministic accountability. This includes neither
  15. // constraining the ordering or manipulation of the files during the creation or
  16. // unpacking of the archive, nor include additional metadata state about the file
  17. // system attributes.
  18. package tarsum // import "github.com/docker/docker/pkg/tarsum"
  19. import (
  20. "archive/tar"
  21. "bytes"
  22. "compress/gzip"
  23. "crypto"
  24. "crypto/sha256"
  25. "encoding/hex"
  26. "errors"
  27. "fmt"
  28. "hash"
  29. "io"
  30. "path"
  31. "strings"
  32. )
  33. const (
  34. buf8K = 8 * 1024
  35. buf16K = 16 * 1024
  36. buf32K = 32 * 1024
  37. )
  38. // NewTarSum creates a new interface for calculating a fixed time checksum of a
  39. // tar archive.
  40. //
  41. // This is used for calculating checksums of layers of an image, in some cases
  42. // including the byte payload of the image's json metadata as well, and for
  43. // calculating the checksums for buildcache.
  44. func NewTarSum(r io.Reader, dc bool, v Version) (TarSum, error) {
  45. return NewTarSumHash(r, dc, v, DefaultTHash)
  46. }
  47. // NewTarSumHash creates a new TarSum, providing a THash to use rather than
  48. // the DefaultTHash.
  49. func NewTarSumHash(r io.Reader, dc bool, v Version, tHash THash) (TarSum, error) {
  50. headerSelector, err := getTarHeaderSelector(v)
  51. if err != nil {
  52. return nil, err
  53. }
  54. ts := &tarSum{Reader: r, DisableCompression: dc, tarSumVersion: v, headerSelector: headerSelector, tHash: tHash}
  55. err = ts.initTarSum()
  56. return ts, err
  57. }
  58. // NewTarSumForLabel creates a new TarSum using the provided TarSum version+hash label.
  59. func NewTarSumForLabel(r io.Reader, disableCompression bool, label string) (TarSum, error) {
  60. versionName, hashName, ok := strings.Cut(label, "+")
  61. if !ok {
  62. return nil, errors.New("tarsum label string should be of the form: {tarsum_version}+{hash_name}")
  63. }
  64. version, ok := tarSumVersionsByName[versionName]
  65. if !ok {
  66. return nil, fmt.Errorf("unknown TarSum version name: %q", versionName)
  67. }
  68. hashConfig, ok := standardHashConfigs[hashName]
  69. if !ok {
  70. return nil, fmt.Errorf("unknown TarSum hash name: %q", hashName)
  71. }
  72. tHash := NewTHash(hashConfig.name, hashConfig.hash.New)
  73. return NewTarSumHash(r, disableCompression, version, tHash)
  74. }
  75. // TarSum is the generic interface for calculating fixed time
  76. // checksums of a tar archive.
  77. type TarSum interface {
  78. io.Reader
  79. GetSums() FileInfoSums
  80. Sum([]byte) string
  81. Version() Version
  82. Hash() THash
  83. }
  84. // tarSum struct is the structure for a Version0 checksum calculation.
  85. type tarSum struct {
  86. io.Reader
  87. tarR *tar.Reader
  88. tarW *tar.Writer
  89. writer writeCloseFlusher
  90. bufTar *bytes.Buffer
  91. bufWriter *bytes.Buffer
  92. bufData []byte
  93. h hash.Hash
  94. tHash THash
  95. sums FileInfoSums
  96. fileCounter int64
  97. currentFile string
  98. finished bool
  99. first bool
  100. DisableCompression bool // false by default. When false, the output gzip compressed.
  101. tarSumVersion Version // this field is not exported so it can not be mutated during use
  102. headerSelector tarHeaderSelector // handles selecting and ordering headers for files in the archive
  103. }
  104. func (ts tarSum) Hash() THash {
  105. return ts.tHash
  106. }
  107. func (ts tarSum) Version() Version {
  108. return ts.tarSumVersion
  109. }
  110. // THash provides a hash.Hash type generator and its name.
  111. type THash interface {
  112. Hash() hash.Hash
  113. Name() string
  114. }
  115. // NewTHash is a convenience method for creating a THash.
  116. func NewTHash(name string, h func() hash.Hash) THash {
  117. return simpleTHash{n: name, h: h}
  118. }
  119. type tHashConfig struct {
  120. name string
  121. hash crypto.Hash
  122. }
  123. // NOTE: DO NOT include MD5 or SHA1, which are considered insecure.
  124. var standardHashConfigs = map[string]tHashConfig{
  125. "sha256": {name: "sha256", hash: crypto.SHA256},
  126. "sha512": {name: "sha512", hash: crypto.SHA512},
  127. }
  128. // DefaultTHash is default TarSum hashing algorithm - "sha256".
  129. var DefaultTHash = NewTHash("sha256", sha256.New)
  130. type simpleTHash struct {
  131. n string
  132. h func() hash.Hash
  133. }
  134. func (sth simpleTHash) Name() string { return sth.n }
  135. func (sth simpleTHash) Hash() hash.Hash { return sth.h() }
  136. func (ts *tarSum) encodeHeader(h *tar.Header) error {
  137. for _, elem := range ts.headerSelector.selectHeaders(h) {
  138. // Ignore these headers to be compatible with versions
  139. // before go 1.10
  140. if elem[0] == "gname" || elem[0] == "uname" {
  141. elem[1] = ""
  142. }
  143. if _, err := ts.h.Write([]byte(elem[0] + elem[1])); err != nil {
  144. return err
  145. }
  146. }
  147. return nil
  148. }
  149. func (ts *tarSum) initTarSum() error {
  150. ts.bufTar = bytes.NewBuffer([]byte{})
  151. ts.bufWriter = bytes.NewBuffer([]byte{})
  152. ts.tarR = tar.NewReader(ts.Reader)
  153. ts.tarW = tar.NewWriter(ts.bufTar)
  154. if !ts.DisableCompression {
  155. ts.writer = gzip.NewWriter(ts.bufWriter)
  156. } else {
  157. ts.writer = &nopCloseFlusher{Writer: ts.bufWriter}
  158. }
  159. if ts.tHash == nil {
  160. ts.tHash = DefaultTHash
  161. }
  162. ts.h = ts.tHash.Hash()
  163. ts.h.Reset()
  164. ts.first = true
  165. ts.sums = FileInfoSums{}
  166. return nil
  167. }
  168. func (ts *tarSum) Read(buf []byte) (int, error) {
  169. if ts.finished {
  170. return ts.bufWriter.Read(buf)
  171. }
  172. if len(ts.bufData) < len(buf) {
  173. switch {
  174. case len(buf) <= buf8K:
  175. ts.bufData = make([]byte, buf8K)
  176. case len(buf) <= buf16K:
  177. ts.bufData = make([]byte, buf16K)
  178. case len(buf) <= buf32K:
  179. ts.bufData = make([]byte, buf32K)
  180. default:
  181. ts.bufData = make([]byte, len(buf))
  182. }
  183. }
  184. buf2 := ts.bufData[:len(buf)]
  185. n, err := ts.tarR.Read(buf2)
  186. if err != nil {
  187. if err == io.EOF {
  188. if _, err := ts.h.Write(buf2[:n]); err != nil {
  189. return 0, err
  190. }
  191. if !ts.first {
  192. ts.sums = append(ts.sums, fileInfoSum{name: ts.currentFile, sum: hex.EncodeToString(ts.h.Sum(nil)), pos: ts.fileCounter})
  193. ts.fileCounter++
  194. ts.h.Reset()
  195. } else {
  196. ts.first = false
  197. }
  198. if _, err := ts.tarW.Write(buf2[:n]); err != nil {
  199. return 0, err
  200. }
  201. currentHeader, err := ts.tarR.Next()
  202. if err != nil {
  203. if err == io.EOF {
  204. if err := ts.tarW.Close(); err != nil {
  205. return 0, err
  206. }
  207. if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
  208. return 0, err
  209. }
  210. if err := ts.writer.Close(); err != nil {
  211. return 0, err
  212. }
  213. ts.finished = true
  214. return ts.bufWriter.Read(buf)
  215. }
  216. return 0, err
  217. }
  218. //#nosec G305 -- The joined path is not passed to any filesystem APIs.
  219. ts.currentFile = path.Join(".", path.Join("/", currentHeader.Name))
  220. if err := ts.encodeHeader(currentHeader); err != nil {
  221. return 0, err
  222. }
  223. if err := ts.tarW.WriteHeader(currentHeader); err != nil {
  224. return 0, err
  225. }
  226. if _, err := io.Copy(ts.writer, ts.bufTar); err != nil {
  227. return 0, err
  228. }
  229. ts.writer.Flush()
  230. return ts.bufWriter.Read(buf)
  231. }
  232. return 0, err
  233. }
  234. // Filling the hash buffer
  235. if _, err = ts.h.Write(buf2[:n]); err != nil {
  236. return 0, err
  237. }
  238. // Filling the tar writer
  239. if _, err = ts.tarW.Write(buf2[:n]); err != nil {
  240. return 0, err
  241. }
  242. // Filling the output writer
  243. if _, err = io.Copy(ts.writer, ts.bufTar); err != nil {
  244. return 0, err
  245. }
  246. ts.writer.Flush()
  247. return ts.bufWriter.Read(buf)
  248. }
  249. func (ts *tarSum) Sum(extra []byte) string {
  250. ts.sums.SortBySums()
  251. h := ts.tHash.Hash()
  252. if extra != nil {
  253. h.Write(extra)
  254. }
  255. for _, fis := range ts.sums {
  256. h.Write([]byte(fis.Sum()))
  257. }
  258. checksum := ts.Version().String() + "+" + ts.tHash.Name() + ":" + hex.EncodeToString(h.Sum(nil))
  259. return checksum
  260. }
  261. func (ts *tarSum) GetSums() FileInfoSums {
  262. return ts.sums
  263. }