diff.go 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. package fs
  2. import (
  3. "context"
  4. "os"
  5. "path/filepath"
  6. "strings"
  7. "golang.org/x/sync/errgroup"
  8. "github.com/sirupsen/logrus"
  9. )
  10. // ChangeKind is the type of modification that
  11. // a change is making.
  12. type ChangeKind int
  13. const (
  14. // ChangeKindUnmodified represents an unmodified
  15. // file
  16. ChangeKindUnmodified = iota
  17. // ChangeKindAdd represents an addition of
  18. // a file
  19. ChangeKindAdd
  20. // ChangeKindModify represents a change to
  21. // an existing file
  22. ChangeKindModify
  23. // ChangeKindDelete represents a delete of
  24. // a file
  25. ChangeKindDelete
  26. )
  27. func (k ChangeKind) String() string {
  28. switch k {
  29. case ChangeKindUnmodified:
  30. return "unmodified"
  31. case ChangeKindAdd:
  32. return "add"
  33. case ChangeKindModify:
  34. return "modify"
  35. case ChangeKindDelete:
  36. return "delete"
  37. default:
  38. return ""
  39. }
  40. }
  41. // Change represents single change between a diff and its parent.
  42. type Change struct {
  43. Kind ChangeKind
  44. Path string
  45. }
  46. // ChangeFunc is the type of function called for each change
  47. // computed during a directory changes calculation.
  48. type ChangeFunc func(ChangeKind, string, os.FileInfo, error) error
  49. // Changes computes changes between two directories calling the
  50. // given change function for each computed change. The first
  51. // directory is intended to the base directory and second
  52. // directory the changed directory.
  53. //
  54. // The change callback is called by the order of path names and
  55. // should be appliable in that order.
  56. // Due to this apply ordering, the following is true
  57. // - Removed directory trees only create a single change for the root
  58. // directory removed. Remaining changes are implied.
  59. // - A directory which is modified to become a file will not have
  60. // delete entries for sub-path items, their removal is implied
  61. // by the removal of the parent directory.
  62. //
  63. // Opaque directories will not be treated specially and each file
  64. // removed from the base directory will show up as a removal.
  65. //
  66. // File content comparisons will be done on files which have timestamps
  67. // which may have been truncated. If either of the files being compared
  68. // has a zero value nanosecond value, each byte will be compared for
  69. // differences. If 2 files have the same seconds value but different
  70. // nanosecond values where one of those values is zero, the files will
  71. // be considered unchanged if the content is the same. This behavior
  72. // is to account for timestamp truncation during archiving.
  73. func Changes(ctx context.Context, a, b string, changeFn ChangeFunc) error {
  74. if a == "" {
  75. logrus.Debugf("Using single walk diff for %s", b)
  76. return addDirChanges(ctx, changeFn, b)
  77. } else if diffOptions := detectDirDiff(b, a); diffOptions != nil {
  78. logrus.Debugf("Using single walk diff for %s from %s", diffOptions.diffDir, a)
  79. return diffDirChanges(ctx, changeFn, a, diffOptions)
  80. }
  81. logrus.Debugf("Using double walk diff for %s from %s", b, a)
  82. return doubleWalkDiff(ctx, changeFn, a, b)
  83. }
  84. func addDirChanges(ctx context.Context, changeFn ChangeFunc, root string) error {
  85. return filepath.Walk(root, func(path string, f os.FileInfo, err error) error {
  86. if err != nil {
  87. return err
  88. }
  89. // Rebase path
  90. path, err = filepath.Rel(root, path)
  91. if err != nil {
  92. return err
  93. }
  94. path = filepath.Join(string(os.PathSeparator), path)
  95. // Skip root
  96. if path == string(os.PathSeparator) {
  97. return nil
  98. }
  99. return changeFn(ChangeKindAdd, path, f, nil)
  100. })
  101. }
  102. // diffDirOptions is used when the diff can be directly calculated from
  103. // a diff directory to its base, without walking both trees.
  104. type diffDirOptions struct {
  105. diffDir string
  106. skipChange func(string) (bool, error)
  107. deleteChange func(string, string, os.FileInfo) (string, error)
  108. }
  109. // diffDirChanges walks the diff directory and compares changes against the base.
  110. func diffDirChanges(ctx context.Context, changeFn ChangeFunc, base string, o *diffDirOptions) error {
  111. changedDirs := make(map[string]struct{})
  112. return filepath.Walk(o.diffDir, func(path string, f os.FileInfo, err error) error {
  113. if err != nil {
  114. return err
  115. }
  116. // Rebase path
  117. path, err = filepath.Rel(o.diffDir, path)
  118. if err != nil {
  119. return err
  120. }
  121. path = filepath.Join(string(os.PathSeparator), path)
  122. // Skip root
  123. if path == string(os.PathSeparator) {
  124. return nil
  125. }
  126. // TODO: handle opaqueness, start new double walker at this
  127. // location to get deletes, and skip tree in single walker
  128. if o.skipChange != nil {
  129. if skip, err := o.skipChange(path); skip {
  130. return err
  131. }
  132. }
  133. var kind ChangeKind
  134. deletedFile, err := o.deleteChange(o.diffDir, path, f)
  135. if err != nil {
  136. return err
  137. }
  138. // Find out what kind of modification happened
  139. if deletedFile != "" {
  140. path = deletedFile
  141. kind = ChangeKindDelete
  142. f = nil
  143. } else {
  144. // Otherwise, the file was added
  145. kind = ChangeKindAdd
  146. // ...Unless it already existed in a base, in which case, it's a modification
  147. stat, err := os.Stat(filepath.Join(base, path))
  148. if err != nil && !os.IsNotExist(err) {
  149. return err
  150. }
  151. if err == nil {
  152. // The file existed in the base, so that's a modification
  153. // However, if it's a directory, maybe it wasn't actually modified.
  154. // If you modify /foo/bar/baz, then /foo will be part of the changed files only because it's the parent of bar
  155. if stat.IsDir() && f.IsDir() {
  156. if f.Size() == stat.Size() && f.Mode() == stat.Mode() && sameFsTime(f.ModTime(), stat.ModTime()) {
  157. // Both directories are the same, don't record the change
  158. return nil
  159. }
  160. }
  161. kind = ChangeKindModify
  162. }
  163. }
  164. // If /foo/bar/file.txt is modified, then /foo/bar must be part of the changed files.
  165. // This block is here to ensure the change is recorded even if the
  166. // modify time, mode and size of the parent directory in the rw and ro layers are all equal.
  167. // Check https://github.com/docker/docker/pull/13590 for details.
  168. if f.IsDir() {
  169. changedDirs[path] = struct{}{}
  170. }
  171. if kind == ChangeKindAdd || kind == ChangeKindDelete {
  172. parent := filepath.Dir(path)
  173. if _, ok := changedDirs[parent]; !ok && parent != "/" {
  174. pi, err := os.Stat(filepath.Join(o.diffDir, parent))
  175. if err := changeFn(ChangeKindModify, parent, pi, err); err != nil {
  176. return err
  177. }
  178. changedDirs[parent] = struct{}{}
  179. }
  180. }
  181. return changeFn(kind, path, f, nil)
  182. })
  183. }
  184. // doubleWalkDiff walks both directories to create a diff
  185. func doubleWalkDiff(ctx context.Context, changeFn ChangeFunc, a, b string) (err error) {
  186. g, ctx := errgroup.WithContext(ctx)
  187. var (
  188. c1 = make(chan *currentPath)
  189. c2 = make(chan *currentPath)
  190. f1, f2 *currentPath
  191. rmdir string
  192. lastEmittedDir = string(filepath.Separator)
  193. parents []os.FileInfo
  194. )
  195. g.Go(func() error {
  196. defer close(c1)
  197. return pathWalk(ctx, a, c1)
  198. })
  199. g.Go(func() error {
  200. defer close(c2)
  201. return pathWalk(ctx, b, c2)
  202. })
  203. g.Go(func() error {
  204. for c1 != nil || c2 != nil {
  205. if f1 == nil && c1 != nil {
  206. f1, err = nextPath(ctx, c1)
  207. if err != nil {
  208. return err
  209. }
  210. if f1 == nil {
  211. c1 = nil
  212. }
  213. }
  214. if f2 == nil && c2 != nil {
  215. f2, err = nextPath(ctx, c2)
  216. if err != nil {
  217. return err
  218. }
  219. if f2 == nil {
  220. c2 = nil
  221. }
  222. }
  223. if f1 == nil && f2 == nil {
  224. continue
  225. }
  226. var (
  227. f os.FileInfo
  228. emit = true
  229. )
  230. k, p := pathChange(f1, f2)
  231. switch k {
  232. case ChangeKindAdd:
  233. if rmdir != "" {
  234. rmdir = ""
  235. }
  236. f = f2.f
  237. f2 = nil
  238. case ChangeKindDelete:
  239. // Check if this file is already removed by being
  240. // under of a removed directory
  241. if rmdir != "" && strings.HasPrefix(f1.path, rmdir) {
  242. f1 = nil
  243. continue
  244. } else if rmdir == "" && f1.f.IsDir() {
  245. rmdir = f1.path + string(os.PathSeparator)
  246. } else if rmdir != "" {
  247. rmdir = ""
  248. }
  249. f1 = nil
  250. case ChangeKindModify:
  251. same, err := sameFile(f1, f2)
  252. if err != nil {
  253. return err
  254. }
  255. if f1.f.IsDir() && !f2.f.IsDir() {
  256. rmdir = f1.path + string(os.PathSeparator)
  257. } else if rmdir != "" {
  258. rmdir = ""
  259. }
  260. f = f2.f
  261. f1 = nil
  262. f2 = nil
  263. if same {
  264. if !isLinked(f) {
  265. emit = false
  266. }
  267. k = ChangeKindUnmodified
  268. }
  269. }
  270. if emit {
  271. emittedDir, emitParents := commonParents(lastEmittedDir, p, parents)
  272. for _, pf := range emitParents {
  273. p := filepath.Join(emittedDir, pf.Name())
  274. if err := changeFn(ChangeKindUnmodified, p, pf, nil); err != nil {
  275. return err
  276. }
  277. emittedDir = p
  278. }
  279. if err := changeFn(k, p, f, nil); err != nil {
  280. return err
  281. }
  282. if f != nil && f.IsDir() {
  283. lastEmittedDir = p
  284. } else {
  285. lastEmittedDir = emittedDir
  286. }
  287. parents = parents[:0]
  288. } else if f.IsDir() {
  289. lastEmittedDir, parents = commonParents(lastEmittedDir, p, parents)
  290. parents = append(parents, f)
  291. }
  292. }
  293. return nil
  294. })
  295. return g.Wait()
  296. }
  297. func commonParents(base, updated string, dirs []os.FileInfo) (string, []os.FileInfo) {
  298. if basePrefix := makePrefix(base); strings.HasPrefix(updated, basePrefix) {
  299. var (
  300. parents []os.FileInfo
  301. last = base
  302. )
  303. for _, d := range dirs {
  304. next := filepath.Join(last, d.Name())
  305. if strings.HasPrefix(updated, makePrefix(last)) {
  306. parents = append(parents, d)
  307. last = next
  308. } else {
  309. break
  310. }
  311. }
  312. return base, parents
  313. }
  314. baseS := strings.Split(base, string(filepath.Separator))
  315. updatedS := strings.Split(updated, string(filepath.Separator))
  316. commonS := []string{string(filepath.Separator)}
  317. min := len(baseS)
  318. if len(updatedS) < min {
  319. min = len(updatedS)
  320. }
  321. for i := 0; i < min; i++ {
  322. if baseS[i] == updatedS[i] {
  323. commonS = append(commonS, baseS[i])
  324. } else {
  325. break
  326. }
  327. }
  328. return filepath.Join(commonS...), []os.FileInfo{}
  329. }
  330. func makePrefix(d string) string {
  331. if d == "" || d[len(d)-1] != filepath.Separator {
  332. return d + string(filepath.Separator)
  333. }
  334. return d
  335. }