changes.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. package archive // import "github.com/docker/docker/pkg/archive"
  2. import (
  3. "archive/tar"
  4. "bytes"
  5. "fmt"
  6. "io"
  7. "os"
  8. "path/filepath"
  9. "sort"
  10. "strings"
  11. "syscall"
  12. "time"
  13. "github.com/docker/docker/pkg/idtools"
  14. "github.com/docker/docker/pkg/pools"
  15. "github.com/docker/docker/pkg/system"
  16. "github.com/sirupsen/logrus"
  17. )
  18. // ChangeType represents the change type.
  19. type ChangeType int
  20. const (
  21. // ChangeModify represents the modify operation.
  22. ChangeModify = iota
  23. // ChangeAdd represents the add operation.
  24. ChangeAdd
  25. // ChangeDelete represents the delete operation.
  26. ChangeDelete
  27. )
  28. func (c ChangeType) String() string {
  29. switch c {
  30. case ChangeModify:
  31. return "C"
  32. case ChangeAdd:
  33. return "A"
  34. case ChangeDelete:
  35. return "D"
  36. }
  37. return ""
  38. }
  39. // Change represents a change, it wraps the change type and path.
  40. // It describes changes of the files in the path respect to the
  41. // parent layers. The change could be modify, add, delete.
  42. // This is used for layer diff.
  43. type Change struct {
  44. Path string
  45. Kind ChangeType
  46. }
  47. func (change *Change) String() string {
  48. return fmt.Sprintf("%s %s", change.Kind, change.Path)
  49. }
  50. // for sort.Sort
  51. type changesByPath []Change
  52. func (c changesByPath) Less(i, j int) bool { return c[i].Path < c[j].Path }
  53. func (c changesByPath) Len() int { return len(c) }
  54. func (c changesByPath) Swap(i, j int) { c[j], c[i] = c[i], c[j] }
  55. // Gnu tar doesn't have sub-second mtime precision. The go tar
  56. // writer (1.10+) does when using PAX format, but we round times to seconds
  57. // to ensure archives have the same hashes for backwards compatibility.
  58. // See https://github.com/moby/moby/pull/35739/commits/fb170206ba12752214630b269a40ac7be6115ed4.
  59. //
  60. // Non-sub-second is problematic when we apply changes via tar
  61. // files. We handle this by comparing for exact times, *or* same
  62. // second count and either a or b having exactly 0 nanoseconds
  63. func sameFsTime(a, b time.Time) bool {
  64. return a.Equal(b) ||
  65. (a.Unix() == b.Unix() &&
  66. (a.Nanosecond() == 0 || b.Nanosecond() == 0))
  67. }
  68. func sameFsTimeSpec(a, b syscall.Timespec) bool {
  69. return a.Sec == b.Sec &&
  70. (a.Nsec == b.Nsec || a.Nsec == 0 || b.Nsec == 0)
  71. }
  72. // Changes walks the path rw and determines changes for the files in the path,
  73. // with respect to the parent layers
  74. func Changes(layers []string, rw string) ([]Change, error) {
  75. return changes(layers, rw, aufsDeletedFile, aufsMetadataSkip)
  76. }
  77. func aufsMetadataSkip(path string) (skip bool, err error) {
  78. skip, err = filepath.Match(string(os.PathSeparator)+WhiteoutMetaPrefix+"*", path)
  79. if err != nil {
  80. skip = true
  81. }
  82. return
  83. }
  84. func aufsDeletedFile(root, path string, fi os.FileInfo) (string, error) {
  85. f := filepath.Base(path)
  86. // If there is a whiteout, then the file was removed
  87. if strings.HasPrefix(f, WhiteoutPrefix) {
  88. originalFile := f[len(WhiteoutPrefix):]
  89. return filepath.Join(filepath.Dir(path), originalFile), nil
  90. }
  91. return "", nil
  92. }
  93. type skipChange func(string) (bool, error)
  94. type deleteChange func(string, string, os.FileInfo) (string, error)
  95. func changes(layers []string, rw string, dc deleteChange, sc skipChange) ([]Change, error) {
  96. var (
  97. changes []Change
  98. changedDirs = make(map[string]struct{})
  99. )
  100. err := filepath.Walk(rw, func(path string, f os.FileInfo, err error) error {
  101. if err != nil {
  102. return err
  103. }
  104. // Rebase path
  105. path, err = filepath.Rel(rw, path)
  106. if err != nil {
  107. return err
  108. }
  109. // As this runs on the daemon side, file paths are OS specific.
  110. path = filepath.Join(string(os.PathSeparator), path)
  111. // Skip root
  112. if path == string(os.PathSeparator) {
  113. return nil
  114. }
  115. if sc != nil {
  116. if skip, err := sc(path); skip {
  117. return err
  118. }
  119. }
  120. change := Change{
  121. Path: path,
  122. }
  123. deletedFile, err := dc(rw, path, f)
  124. if err != nil {
  125. return err
  126. }
  127. // Find out what kind of modification happened
  128. if deletedFile != "" {
  129. change.Path = deletedFile
  130. change.Kind = ChangeDelete
  131. } else {
  132. // Otherwise, the file was added
  133. change.Kind = ChangeAdd
  134. // ...Unless it already existed in a top layer, in which case, it's a modification
  135. for _, layer := range layers {
  136. stat, err := os.Stat(filepath.Join(layer, path))
  137. if err != nil && !os.IsNotExist(err) {
  138. return err
  139. }
  140. if err == nil {
  141. // The file existed in the top layer, so that's a modification
  142. // However, if it's a directory, maybe it wasn't actually modified.
  143. // If you modify /foo/bar/baz, then /foo will be part of the changed files only because it's the parent of bar
  144. if stat.IsDir() && f.IsDir() {
  145. if f.Size() == stat.Size() && f.Mode() == stat.Mode() && sameFsTime(f.ModTime(), stat.ModTime()) {
  146. // Both directories are the same, don't record the change
  147. return nil
  148. }
  149. }
  150. change.Kind = ChangeModify
  151. break
  152. }
  153. }
  154. }
  155. // If /foo/bar/file.txt is modified, then /foo/bar must be part of the changed files.
  156. // This block is here to ensure the change is recorded even if the
  157. // modify time, mode and size of the parent directory in the rw and ro layers are all equal.
  158. // Check https://github.com/docker/docker/pull/13590 for details.
  159. if f.IsDir() {
  160. changedDirs[path] = struct{}{}
  161. }
  162. if change.Kind == ChangeAdd || change.Kind == ChangeDelete {
  163. parent := filepath.Dir(path)
  164. if _, ok := changedDirs[parent]; !ok && parent != "/" {
  165. changes = append(changes, Change{Path: parent, Kind: ChangeModify})
  166. changedDirs[parent] = struct{}{}
  167. }
  168. }
  169. // Record change
  170. changes = append(changes, change)
  171. return nil
  172. })
  173. if err != nil && !os.IsNotExist(err) {
  174. return nil, err
  175. }
  176. return changes, nil
  177. }
  178. // FileInfo describes the information of a file.
  179. type FileInfo struct {
  180. parent *FileInfo
  181. name string
  182. stat *system.StatT
  183. children map[string]*FileInfo
  184. capability []byte
  185. added bool
  186. }
  187. // LookUp looks up the file information of a file.
  188. func (info *FileInfo) LookUp(path string) *FileInfo {
  189. // As this runs on the daemon side, file paths are OS specific.
  190. parent := info
  191. if path == string(os.PathSeparator) {
  192. return info
  193. }
  194. pathElements := strings.Split(path, string(os.PathSeparator))
  195. for _, elem := range pathElements {
  196. if elem != "" {
  197. child := parent.children[elem]
  198. if child == nil {
  199. return nil
  200. }
  201. parent = child
  202. }
  203. }
  204. return parent
  205. }
  206. func (info *FileInfo) path() string {
  207. if info.parent == nil {
  208. // As this runs on the daemon side, file paths are OS specific.
  209. return string(os.PathSeparator)
  210. }
  211. return filepath.Join(info.parent.path(), info.name)
  212. }
  213. func (info *FileInfo) addChanges(oldInfo *FileInfo, changes *[]Change) {
  214. sizeAtEntry := len(*changes)
  215. if oldInfo == nil {
  216. // add
  217. change := Change{
  218. Path: info.path(),
  219. Kind: ChangeAdd,
  220. }
  221. *changes = append(*changes, change)
  222. info.added = true
  223. }
  224. // We make a copy so we can modify it to detect additions
  225. // also, we only recurse on the old dir if the new info is a directory
  226. // otherwise any previous delete/change is considered recursive
  227. oldChildren := make(map[string]*FileInfo)
  228. if oldInfo != nil && info.isDir() {
  229. for k, v := range oldInfo.children {
  230. oldChildren[k] = v
  231. }
  232. }
  233. for name, newChild := range info.children {
  234. oldChild := oldChildren[name]
  235. if oldChild != nil {
  236. // change?
  237. oldStat := oldChild.stat
  238. newStat := newChild.stat
  239. // Note: We can't compare inode or ctime or blocksize here, because these change
  240. // when copying a file into a container. However, that is not generally a problem
  241. // because any content change will change mtime, and any status change should
  242. // be visible when actually comparing the stat fields. The only time this
  243. // breaks down is if some code intentionally hides a change by setting
  244. // back mtime
  245. if statDifferent(oldStat, newStat) ||
  246. !bytes.Equal(oldChild.capability, newChild.capability) {
  247. change := Change{
  248. Path: newChild.path(),
  249. Kind: ChangeModify,
  250. }
  251. *changes = append(*changes, change)
  252. newChild.added = true
  253. }
  254. // Remove from copy so we can detect deletions
  255. delete(oldChildren, name)
  256. }
  257. newChild.addChanges(oldChild, changes)
  258. }
  259. for _, oldChild := range oldChildren {
  260. // delete
  261. change := Change{
  262. Path: oldChild.path(),
  263. Kind: ChangeDelete,
  264. }
  265. *changes = append(*changes, change)
  266. }
  267. // If there were changes inside this directory, we need to add it, even if the directory
  268. // itself wasn't changed. This is needed to properly save and restore filesystem permissions.
  269. // As this runs on the daemon side, file paths are OS specific.
  270. if len(*changes) > sizeAtEntry && info.isDir() && !info.added && info.path() != string(os.PathSeparator) {
  271. change := Change{
  272. Path: info.path(),
  273. Kind: ChangeModify,
  274. }
  275. // Let's insert the directory entry before the recently added entries located inside this dir
  276. *changes = append(*changes, change) // just to resize the slice, will be overwritten
  277. copy((*changes)[sizeAtEntry+1:], (*changes)[sizeAtEntry:])
  278. (*changes)[sizeAtEntry] = change
  279. }
  280. }
  281. // Changes add changes to file information.
  282. func (info *FileInfo) Changes(oldInfo *FileInfo) []Change {
  283. var changes []Change
  284. info.addChanges(oldInfo, &changes)
  285. return changes
  286. }
  287. func newRootFileInfo() *FileInfo {
  288. // As this runs on the daemon side, file paths are OS specific.
  289. root := &FileInfo{
  290. name: string(os.PathSeparator),
  291. children: make(map[string]*FileInfo),
  292. }
  293. return root
  294. }
  295. // ChangesDirs compares two directories and generates an array of Change objects describing the changes.
  296. // If oldDir is "", then all files in newDir will be Add-Changes.
  297. func ChangesDirs(newDir, oldDir string) ([]Change, error) {
  298. var (
  299. oldRoot, newRoot *FileInfo
  300. )
  301. if oldDir == "" {
  302. emptyDir, err := os.MkdirTemp("", "empty")
  303. if err != nil {
  304. return nil, err
  305. }
  306. defer os.Remove(emptyDir)
  307. oldDir = emptyDir
  308. }
  309. oldRoot, newRoot, err := collectFileInfoForChanges(oldDir, newDir)
  310. if err != nil {
  311. return nil, err
  312. }
  313. return newRoot.Changes(oldRoot), nil
  314. }
  315. // ChangesSize calculates the size in bytes of the provided changes, based on newDir.
  316. func ChangesSize(newDir string, changes []Change) int64 {
  317. var (
  318. size int64
  319. sf = make(map[uint64]struct{})
  320. )
  321. for _, change := range changes {
  322. if change.Kind == ChangeModify || change.Kind == ChangeAdd {
  323. file := filepath.Join(newDir, change.Path)
  324. fileInfo, err := os.Lstat(file)
  325. if err != nil {
  326. logrus.Errorf("Can not stat %q: %s", file, err)
  327. continue
  328. }
  329. if fileInfo != nil && !fileInfo.IsDir() {
  330. if hasHardlinks(fileInfo) {
  331. inode := getIno(fileInfo)
  332. if _, ok := sf[inode]; !ok {
  333. size += fileInfo.Size()
  334. sf[inode] = struct{}{}
  335. }
  336. } else {
  337. size += fileInfo.Size()
  338. }
  339. }
  340. }
  341. }
  342. return size
  343. }
  344. // ExportChanges produces an Archive from the provided changes, relative to dir.
  345. func ExportChanges(dir string, changes []Change, idMap idtools.IdentityMapping) (io.ReadCloser, error) {
  346. reader, writer := io.Pipe()
  347. go func() {
  348. ta := newTarAppender(idMap, writer, nil)
  349. // this buffer is needed for the duration of this piped stream
  350. defer pools.BufioWriter32KPool.Put(ta.Buffer)
  351. sort.Sort(changesByPath(changes))
  352. // In general we log errors here but ignore them because
  353. // during e.g. a diff operation the container can continue
  354. // mutating the filesystem and we can see transient errors
  355. // from this
  356. for _, change := range changes {
  357. if change.Kind == ChangeDelete {
  358. whiteOutDir := filepath.Dir(change.Path)
  359. whiteOutBase := filepath.Base(change.Path)
  360. whiteOut := filepath.Join(whiteOutDir, WhiteoutPrefix+whiteOutBase)
  361. timestamp := time.Now()
  362. hdr := &tar.Header{
  363. Name: whiteOut[1:],
  364. Size: 0,
  365. ModTime: timestamp,
  366. AccessTime: timestamp,
  367. ChangeTime: timestamp,
  368. }
  369. if err := ta.TarWriter.WriteHeader(hdr); err != nil {
  370. logrus.Debugf("Can't write whiteout header: %s", err)
  371. }
  372. } else {
  373. path := filepath.Join(dir, change.Path)
  374. if err := ta.addTarFile(path, change.Path[1:]); err != nil {
  375. logrus.Debugf("Can't add file %s to tar: %s", path, err)
  376. }
  377. }
  378. }
  379. // Make sure to check the error on Close.
  380. if err := ta.TarWriter.Close(); err != nil {
  381. logrus.Debugf("Can't close layer: %s", err)
  382. }
  383. if err := writer.Close(); err != nil {
  384. logrus.Debugf("failed close Changes writer: %s", err)
  385. }
  386. }()
  387. return reader, nil
  388. }