123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443 |
- package archive // import "github.com/docker/docker/pkg/archive"
- import (
- "archive/tar"
- "bytes"
- "context"
- "fmt"
- "io"
- "os"
- "path/filepath"
- "sort"
- "strings"
- "syscall"
- "time"
- "github.com/containerd/containerd/log"
- "github.com/docker/docker/pkg/idtools"
- "github.com/docker/docker/pkg/pools"
- "github.com/docker/docker/pkg/system"
- )
- // ChangeType represents the change type.
- type ChangeType int
- const (
- // ChangeModify represents the modify operation.
- ChangeModify = iota
- // ChangeAdd represents the add operation.
- ChangeAdd
- // ChangeDelete represents the delete operation.
- ChangeDelete
- )
- func (c ChangeType) String() string {
- switch c {
- case ChangeModify:
- return "C"
- case ChangeAdd:
- return "A"
- case ChangeDelete:
- return "D"
- }
- return ""
- }
- // Change represents a change, it wraps the change type and path.
- // It describes changes of the files in the path respect to the
- // parent layers. The change could be modify, add, delete.
- // This is used for layer diff.
- type Change struct {
- Path string
- Kind ChangeType
- }
- func (change *Change) String() string {
- return fmt.Sprintf("%s %s", change.Kind, change.Path)
- }
- // for sort.Sort
- type changesByPath []Change
- func (c changesByPath) Less(i, j int) bool { return c[i].Path < c[j].Path }
- func (c changesByPath) Len() int { return len(c) }
- func (c changesByPath) Swap(i, j int) { c[j], c[i] = c[i], c[j] }
- // Gnu tar doesn't have sub-second mtime precision. The go tar
- // writer (1.10+) does when using PAX format, but we round times to seconds
- // to ensure archives have the same hashes for backwards compatibility.
- // See https://github.com/moby/moby/pull/35739/commits/fb170206ba12752214630b269a40ac7be6115ed4.
- //
- // Non-sub-second is problematic when we apply changes via tar
- // files. We handle this by comparing for exact times, *or* same
- // second count and either a or b having exactly 0 nanoseconds
- func sameFsTime(a, b time.Time) bool {
- return a.Equal(b) ||
- (a.Unix() == b.Unix() &&
- (a.Nanosecond() == 0 || b.Nanosecond() == 0))
- }
- func sameFsTimeSpec(a, b syscall.Timespec) bool {
- return a.Sec == b.Sec &&
- (a.Nsec == b.Nsec || a.Nsec == 0 || b.Nsec == 0)
- }
- // Changes walks the path rw and determines changes for the files in the path,
- // with respect to the parent layers
- func Changes(layers []string, rw string) ([]Change, error) {
- return changes(layers, rw, aufsDeletedFile, aufsMetadataSkip)
- }
- func aufsMetadataSkip(path string) (skip bool, err error) {
- skip, err = filepath.Match(string(os.PathSeparator)+WhiteoutMetaPrefix+"*", path)
- if err != nil {
- skip = true
- }
- return
- }
- func aufsDeletedFile(root, path string, fi os.FileInfo) (string, error) {
- f := filepath.Base(path)
- // If there is a whiteout, then the file was removed
- if strings.HasPrefix(f, WhiteoutPrefix) {
- originalFile := f[len(WhiteoutPrefix):]
- return filepath.Join(filepath.Dir(path), originalFile), nil
- }
- return "", nil
- }
- type (
- skipChange func(string) (bool, error)
- deleteChange func(string, string, os.FileInfo) (string, error)
- )
- func changes(layers []string, rw string, dc deleteChange, sc skipChange) ([]Change, error) {
- var (
- changes []Change
- changedDirs = make(map[string]struct{})
- )
- err := filepath.Walk(rw, func(path string, f os.FileInfo, err error) error {
- if err != nil {
- return err
- }
- // Rebase path
- path, err = filepath.Rel(rw, path)
- if err != nil {
- return err
- }
- // As this runs on the daemon side, file paths are OS specific.
- path = filepath.Join(string(os.PathSeparator), path)
- // Skip root
- if path == string(os.PathSeparator) {
- return nil
- }
- if sc != nil {
- if skip, err := sc(path); skip {
- return err
- }
- }
- change := Change{
- Path: path,
- }
- deletedFile, err := dc(rw, path, f)
- if err != nil {
- return err
- }
- // Find out what kind of modification happened
- if deletedFile != "" {
- change.Path = deletedFile
- change.Kind = ChangeDelete
- } else {
- // Otherwise, the file was added
- change.Kind = ChangeAdd
- // ...Unless it already existed in a top layer, in which case, it's a modification
- for _, layer := range layers {
- stat, err := os.Stat(filepath.Join(layer, path))
- if err != nil && !os.IsNotExist(err) {
- return err
- }
- if err == nil {
- // The file existed in the top layer, so that's a modification
- // However, if it's a directory, maybe it wasn't actually modified.
- // If you modify /foo/bar/baz, then /foo will be part of the changed files only because it's the parent of bar
- if stat.IsDir() && f.IsDir() {
- if f.Size() == stat.Size() && f.Mode() == stat.Mode() && sameFsTime(f.ModTime(), stat.ModTime()) {
- // Both directories are the same, don't record the change
- return nil
- }
- }
- change.Kind = ChangeModify
- break
- }
- }
- }
- // If /foo/bar/file.txt is modified, then /foo/bar must be part of the changed files.
- // This block is here to ensure the change is recorded even if the
- // modify time, mode and size of the parent directory in the rw and ro layers are all equal.
- // Check https://github.com/docker/docker/pull/13590 for details.
- if f.IsDir() {
- changedDirs[path] = struct{}{}
- }
- if change.Kind == ChangeAdd || change.Kind == ChangeDelete {
- parent := filepath.Dir(path)
- if _, ok := changedDirs[parent]; !ok && parent != "/" {
- changes = append(changes, Change{Path: parent, Kind: ChangeModify})
- changedDirs[parent] = struct{}{}
- }
- }
- // Record change
- changes = append(changes, change)
- return nil
- })
- if err != nil && !os.IsNotExist(err) {
- return nil, err
- }
- return changes, nil
- }
- // FileInfo describes the information of a file.
- type FileInfo struct {
- parent *FileInfo
- name string
- stat *system.StatT
- children map[string]*FileInfo
- capability []byte
- added bool
- }
- // LookUp looks up the file information of a file.
- func (info *FileInfo) LookUp(path string) *FileInfo {
- // As this runs on the daemon side, file paths are OS specific.
- parent := info
- if path == string(os.PathSeparator) {
- return info
- }
- pathElements := strings.Split(path, string(os.PathSeparator))
- for _, elem := range pathElements {
- if elem != "" {
- child := parent.children[elem]
- if child == nil {
- return nil
- }
- parent = child
- }
- }
- return parent
- }
- func (info *FileInfo) path() string {
- if info.parent == nil {
- // As this runs on the daemon side, file paths are OS specific.
- return string(os.PathSeparator)
- }
- return filepath.Join(info.parent.path(), info.name)
- }
- func (info *FileInfo) addChanges(oldInfo *FileInfo, changes *[]Change) {
- sizeAtEntry := len(*changes)
- if oldInfo == nil {
- // add
- change := Change{
- Path: info.path(),
- Kind: ChangeAdd,
- }
- *changes = append(*changes, change)
- info.added = true
- }
- // We make a copy so we can modify it to detect additions
- // also, we only recurse on the old dir if the new info is a directory
- // otherwise any previous delete/change is considered recursive
- oldChildren := make(map[string]*FileInfo)
- if oldInfo != nil && info.isDir() {
- for k, v := range oldInfo.children {
- oldChildren[k] = v
- }
- }
- for name, newChild := range info.children {
- oldChild := oldChildren[name]
- if oldChild != nil {
- // change?
- oldStat := oldChild.stat
- newStat := newChild.stat
- // Note: We can't compare inode or ctime or blocksize here, because these change
- // when copying a file into a container. However, that is not generally a problem
- // because any content change will change mtime, and any status change should
- // be visible when actually comparing the stat fields. The only time this
- // breaks down is if some code intentionally hides a change by setting
- // back mtime
- if statDifferent(oldStat, newStat) ||
- !bytes.Equal(oldChild.capability, newChild.capability) {
- change := Change{
- Path: newChild.path(),
- Kind: ChangeModify,
- }
- *changes = append(*changes, change)
- newChild.added = true
- }
- // Remove from copy so we can detect deletions
- delete(oldChildren, name)
- }
- newChild.addChanges(oldChild, changes)
- }
- for _, oldChild := range oldChildren {
- // delete
- change := Change{
- Path: oldChild.path(),
- Kind: ChangeDelete,
- }
- *changes = append(*changes, change)
- }
- // If there were changes inside this directory, we need to add it, even if the directory
- // itself wasn't changed. This is needed to properly save and restore filesystem permissions.
- // As this runs on the daemon side, file paths are OS specific.
- if len(*changes) > sizeAtEntry && info.isDir() && !info.added && info.path() != string(os.PathSeparator) {
- change := Change{
- Path: info.path(),
- Kind: ChangeModify,
- }
- // Let's insert the directory entry before the recently added entries located inside this dir
- *changes = append(*changes, change) // just to resize the slice, will be overwritten
- copy((*changes)[sizeAtEntry+1:], (*changes)[sizeAtEntry:])
- (*changes)[sizeAtEntry] = change
- }
- }
- // Changes add changes to file information.
- func (info *FileInfo) Changes(oldInfo *FileInfo) []Change {
- var changes []Change
- info.addChanges(oldInfo, &changes)
- return changes
- }
- func newRootFileInfo() *FileInfo {
- // As this runs on the daemon side, file paths are OS specific.
- root := &FileInfo{
- name: string(os.PathSeparator),
- children: make(map[string]*FileInfo),
- }
- return root
- }
- // ChangesDirs compares two directories and generates an array of Change objects describing the changes.
- // If oldDir is "", then all files in newDir will be Add-Changes.
- func ChangesDirs(newDir, oldDir string) ([]Change, error) {
- var oldRoot, newRoot *FileInfo
- if oldDir == "" {
- emptyDir, err := os.MkdirTemp("", "empty")
- if err != nil {
- return nil, err
- }
- defer os.Remove(emptyDir)
- oldDir = emptyDir
- }
- oldRoot, newRoot, err := collectFileInfoForChanges(oldDir, newDir)
- if err != nil {
- return nil, err
- }
- return newRoot.Changes(oldRoot), nil
- }
- // ChangesSize calculates the size in bytes of the provided changes, based on newDir.
- func ChangesSize(newDir string, changes []Change) int64 {
- var (
- size int64
- sf = make(map[uint64]struct{})
- )
- for _, change := range changes {
- if change.Kind == ChangeModify || change.Kind == ChangeAdd {
- file := filepath.Join(newDir, change.Path)
- fileInfo, err := os.Lstat(file)
- if err != nil {
- log.G(context.TODO()).Errorf("Can not stat %q: %s", file, err)
- continue
- }
- if fileInfo != nil && !fileInfo.IsDir() {
- if hasHardlinks(fileInfo) {
- inode := getIno(fileInfo)
- if _, ok := sf[inode]; !ok {
- size += fileInfo.Size()
- sf[inode] = struct{}{}
- }
- } else {
- size += fileInfo.Size()
- }
- }
- }
- }
- return size
- }
- // ExportChanges produces an Archive from the provided changes, relative to dir.
- func ExportChanges(dir string, changes []Change, idMap idtools.IdentityMapping) (io.ReadCloser, error) {
- reader, writer := io.Pipe()
- go func() {
- ta := newTarAppender(idMap, writer, nil)
- // this buffer is needed for the duration of this piped stream
- defer pools.BufioWriter32KPool.Put(ta.Buffer)
- sort.Sort(changesByPath(changes))
- // In general we log errors here but ignore them because
- // during e.g. a diff operation the container can continue
- // mutating the filesystem and we can see transient errors
- // from this
- for _, change := range changes {
- if change.Kind == ChangeDelete {
- whiteOutDir := filepath.Dir(change.Path)
- whiteOutBase := filepath.Base(change.Path)
- whiteOut := filepath.Join(whiteOutDir, WhiteoutPrefix+whiteOutBase)
- timestamp := time.Now()
- hdr := &tar.Header{
- Name: whiteOut[1:],
- Size: 0,
- ModTime: timestamp,
- AccessTime: timestamp,
- ChangeTime: timestamp,
- }
- if err := ta.TarWriter.WriteHeader(hdr); err != nil {
- log.G(context.TODO()).Debugf("Can't write whiteout header: %s", err)
- }
- } else {
- path := filepath.Join(dir, change.Path)
- if err := ta.addTarFile(path, change.Path[1:]); err != nil {
- log.G(context.TODO()).Debugf("Can't add file %s to tar: %s", path, err)
- }
- }
- }
- // Make sure to check the error on Close.
- if err := ta.TarWriter.Close(); err != nil {
- log.G(context.TODO()).Debugf("Can't close layer: %s", err)
- }
- if err := writer.Close(); err != nil {
- log.G(context.TODO()).Debugf("failed close Changes writer: %s", err)
- }
- }()
- return reader, nil
- }
|