overlay.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633
  1. // +build linux
  2. package overlay2
  3. import (
  4. "bufio"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "io/ioutil"
  9. "os"
  10. "os/exec"
  11. "path"
  12. "path/filepath"
  13. "strconv"
  14. "strings"
  15. "syscall"
  16. "github.com/Sirupsen/logrus"
  17. "github.com/docker/docker/daemon/graphdriver"
  18. "github.com/docker/docker/daemon/graphdriver/quota"
  19. "github.com/docker/docker/pkg/archive"
  20. "github.com/docker/docker/pkg/chrootarchive"
  21. "github.com/docker/docker/pkg/directory"
  22. "github.com/docker/docker/pkg/idtools"
  23. "github.com/docker/docker/pkg/mount"
  24. "github.com/docker/docker/pkg/parsers"
  25. "github.com/docker/docker/pkg/parsers/kernel"
  26. "github.com/docker/go-units"
  27. "github.com/opencontainers/runc/libcontainer/label"
  28. )
  29. var (
  30. // untar defines the untar method
  31. untar = chrootarchive.UntarUncompressed
  32. )
  33. // This backend uses the overlay union filesystem for containers
  34. // with diff directories for each layer.
  35. // This version of the overlay driver requires at least kernel
  36. // 4.0.0 in order to support mounting multiple diff directories.
  37. // Each container/image has at least a "diff" directory and "link" file.
  38. // If there is also a "lower" file when there are diff layers
  39. // below as well as "merged" and "work" directories. The "diff" directory
  40. // has the upper layer of the overlay and is used to capture any
  41. // changes to the layer. The "lower" file contains all the lower layer
  42. // mounts separated by ":" and ordered from uppermost to lowermost
  43. // layers. The overlay itself is mounted in the "merged" directory,
  44. // and the "work" dir is needed for overlay to work.
  45. // The "link" file for each layer contains a unique string for the layer.
  46. // Under the "l" directory at the root there will be a symbolic link
  47. // with that unique string pointing the "diff" directory for the layer.
  48. // The symbolic links are used to reference lower layers in the "lower"
  49. // file and on mount. The links are used to shorten the total length
  50. // of a layer reference without requiring changes to the layer identifier
  51. // or root directory. Mounts are always done relative to root and
  52. // referencing the symbolic links in order to ensure the number of
  53. // lower directories can fit in a single page for making the mount
  54. // syscall. A hard upper limit of 128 lower layers is enforced to ensure
  55. // that mounts do not fail due to length.
  56. const (
  57. driverName = "overlay2"
  58. linkDir = "l"
  59. lowerFile = "lower"
  60. maxDepth = 128
  61. // idLength represents the number of random characters
  62. // which can be used to create the unique link identifer
  63. // for every layer. If this value is too long then the
  64. // page size limit for the mount command may be exceeded.
  65. // The idLength should be selected such that following equation
  66. // is true (512 is a buffer for label metadata).
  67. // ((idLength + len(linkDir) + 1) * maxDepth) <= (pageSize - 512)
  68. idLength = 26
  69. )
  70. type overlayOptions struct {
  71. overrideKernelCheck bool
  72. quota quota.Quota
  73. }
  74. // Driver contains information about the home directory and the list of active mounts that are created using this driver.
  75. type Driver struct {
  76. home string
  77. uidMaps []idtools.IDMap
  78. gidMaps []idtools.IDMap
  79. ctr *graphdriver.RefCounter
  80. quotaCtl *quota.Control
  81. options overlayOptions
  82. naiveDiff graphdriver.DiffDriver
  83. }
  84. var (
  85. backingFs = "<unknown>"
  86. projectQuotaSupported = false
  87. )
  88. func init() {
  89. graphdriver.Register(driverName, Init)
  90. }
  91. // Init returns the a native diff driver for overlay filesystem.
  92. // If overlay filesystem is not supported on the host, graphdriver.ErrNotSupported is returned as error.
  93. // If an overlay filesystem is not supported over an existing filesystem then error graphdriver.ErrIncompatibleFS is returned.
  94. func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
  95. opts, err := parseOptions(options)
  96. if err != nil {
  97. return nil, err
  98. }
  99. if err := supportsOverlay(); err != nil {
  100. return nil, graphdriver.ErrNotSupported
  101. }
  102. // require kernel 4.0.0 to ensure multiple lower dirs are supported
  103. v, err := kernel.GetKernelVersion()
  104. if err != nil {
  105. return nil, err
  106. }
  107. if kernel.CompareKernelVersion(*v, kernel.VersionInfo{Kernel: 4, Major: 0, Minor: 0}) < 0 {
  108. if !opts.overrideKernelCheck {
  109. return nil, graphdriver.ErrNotSupported
  110. }
  111. logrus.Warn("Using pre-4.0.0 kernel for overlay2, mount failures may require kernel update")
  112. }
  113. fsMagic, err := graphdriver.GetFSMagic(home)
  114. if err != nil {
  115. return nil, err
  116. }
  117. if fsName, ok := graphdriver.FsNames[fsMagic]; ok {
  118. backingFs = fsName
  119. }
  120. // check if they are running over btrfs, aufs, zfs, overlay, or ecryptfs
  121. switch fsMagic {
  122. case graphdriver.FsMagicBtrfs, graphdriver.FsMagicAufs, graphdriver.FsMagicZfs, graphdriver.FsMagicOverlay, graphdriver.FsMagicEcryptfs:
  123. logrus.Errorf("'overlay2' is not supported over %s", backingFs)
  124. return nil, graphdriver.ErrIncompatibleFS
  125. }
  126. rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
  127. if err != nil {
  128. return nil, err
  129. }
  130. // Create the driver home dir
  131. if err := idtools.MkdirAllAs(path.Join(home, linkDir), 0700, rootUID, rootGID); err != nil && !os.IsExist(err) {
  132. return nil, err
  133. }
  134. if err := mount.MakePrivate(home); err != nil {
  135. return nil, err
  136. }
  137. d := &Driver{
  138. home: home,
  139. uidMaps: uidMaps,
  140. gidMaps: gidMaps,
  141. ctr: graphdriver.NewRefCounter(graphdriver.NewFsChecker(graphdriver.FsMagicOverlay)),
  142. }
  143. d.naiveDiff = graphdriver.NewNaiveDiffDriver(d, uidMaps, gidMaps)
  144. if backingFs == "xfs" {
  145. // Try to enable project quota support over xfs.
  146. if d.quotaCtl, err = quota.NewControl(home); err == nil {
  147. projectQuotaSupported = true
  148. }
  149. }
  150. logrus.Debugf("backingFs=%s, projectQuotaSupported=%v", backingFs, projectQuotaSupported)
  151. return d, nil
  152. }
  153. func parseOptions(options []string) (*overlayOptions, error) {
  154. o := &overlayOptions{}
  155. for _, option := range options {
  156. key, val, err := parsers.ParseKeyValueOpt(option)
  157. if err != nil {
  158. return nil, err
  159. }
  160. key = strings.ToLower(key)
  161. switch key {
  162. case "overlay2.override_kernel_check":
  163. o.overrideKernelCheck, err = strconv.ParseBool(val)
  164. if err != nil {
  165. return nil, err
  166. }
  167. default:
  168. return nil, fmt.Errorf("overlay2: Unknown option %s\n", key)
  169. }
  170. }
  171. return o, nil
  172. }
  173. func supportsOverlay() error {
  174. // We can try to modprobe overlay first before looking at
  175. // proc/filesystems for when overlay is supported
  176. exec.Command("modprobe", "overlay").Run()
  177. f, err := os.Open("/proc/filesystems")
  178. if err != nil {
  179. return err
  180. }
  181. defer f.Close()
  182. s := bufio.NewScanner(f)
  183. for s.Scan() {
  184. if s.Text() == "nodev\toverlay" {
  185. return nil
  186. }
  187. }
  188. logrus.Error("'overlay' not found as a supported filesystem on this host. Please ensure kernel is new enough and has overlay support loaded.")
  189. return graphdriver.ErrNotSupported
  190. }
  191. func (d *Driver) String() string {
  192. return driverName
  193. }
  194. // Status returns current driver information in a two dimensional string array.
  195. // Output contains "Backing Filesystem" used in this implementation.
  196. func (d *Driver) Status() [][2]string {
  197. return [][2]string{
  198. {"Backing Filesystem", backingFs},
  199. }
  200. }
  201. // GetMetadata returns meta data about the overlay driver such as
  202. // LowerDir, UpperDir, WorkDir and MergeDir used to store data.
  203. func (d *Driver) GetMetadata(id string) (map[string]string, error) {
  204. dir := d.dir(id)
  205. if _, err := os.Stat(dir); err != nil {
  206. return nil, err
  207. }
  208. metadata := map[string]string{
  209. "WorkDir": path.Join(dir, "work"),
  210. "MergedDir": path.Join(dir, "merged"),
  211. "UpperDir": path.Join(dir, "diff"),
  212. }
  213. lowerDirs, err := d.getLowerDirs(id)
  214. if err != nil {
  215. return nil, err
  216. }
  217. if len(lowerDirs) > 0 {
  218. metadata["LowerDir"] = strings.Join(lowerDirs, ":")
  219. }
  220. return metadata, nil
  221. }
  222. // Cleanup any state created by overlay which should be cleaned when daemon
  223. // is being shutdown. For now, we just have to unmount the bind mounted
  224. // we had created.
  225. func (d *Driver) Cleanup() error {
  226. return mount.Unmount(d.home)
  227. }
  228. // CreateReadWrite creates a layer that is writable for use as a container
  229. // file system.
  230. func (d *Driver) CreateReadWrite(id, parent, mountLabel string, storageOpt map[string]string) error {
  231. return d.Create(id, parent, mountLabel, storageOpt)
  232. }
  233. // Create is used to create the upper, lower, and merge directories required for overlay fs for a given id.
  234. // The parent filesystem is used to configure these directories for the overlay.
  235. func (d *Driver) Create(id, parent, mountLabel string, storageOpt map[string]string) (retErr error) {
  236. if len(storageOpt) != 0 && !projectQuotaSupported {
  237. return fmt.Errorf("--storage-opt is supported only for overlay over xfs with 'pquota' mount option")
  238. }
  239. dir := d.dir(id)
  240. rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
  241. if err != nil {
  242. return err
  243. }
  244. if err := idtools.MkdirAllAs(path.Dir(dir), 0700, rootUID, rootGID); err != nil {
  245. return err
  246. }
  247. if err := idtools.MkdirAs(dir, 0700, rootUID, rootGID); err != nil {
  248. return err
  249. }
  250. defer func() {
  251. // Clean up on failure
  252. if retErr != nil {
  253. os.RemoveAll(dir)
  254. }
  255. }()
  256. if len(storageOpt) > 0 {
  257. driver := &Driver{}
  258. if err := d.parseStorageOpt(storageOpt, driver); err != nil {
  259. return err
  260. }
  261. if driver.options.quota.Size > 0 {
  262. // Set container disk quota limit
  263. if err := d.quotaCtl.SetQuota(dir, driver.options.quota); err != nil {
  264. return err
  265. }
  266. }
  267. }
  268. if err := idtools.MkdirAs(path.Join(dir, "diff"), 0755, rootUID, rootGID); err != nil {
  269. return err
  270. }
  271. lid := generateID(idLength)
  272. if err := os.Symlink(path.Join("..", id, "diff"), path.Join(d.home, linkDir, lid)); err != nil {
  273. return err
  274. }
  275. // Write link id to link file
  276. if err := ioutil.WriteFile(path.Join(dir, "link"), []byte(lid), 0644); err != nil {
  277. return err
  278. }
  279. // if no parent directory, done
  280. if parent == "" {
  281. return nil
  282. }
  283. if err := idtools.MkdirAs(path.Join(dir, "work"), 0700, rootUID, rootGID); err != nil {
  284. return err
  285. }
  286. if err := idtools.MkdirAs(path.Join(dir, "merged"), 0700, rootUID, rootGID); err != nil {
  287. return err
  288. }
  289. lower, err := d.getLower(parent)
  290. if err != nil {
  291. return err
  292. }
  293. if lower != "" {
  294. if err := ioutil.WriteFile(path.Join(dir, lowerFile), []byte(lower), 0666); err != nil {
  295. return err
  296. }
  297. }
  298. return nil
  299. }
  300. // Parse overlay storage options
  301. func (d *Driver) parseStorageOpt(storageOpt map[string]string, driver *Driver) error {
  302. // Read size to set the disk project quota per container
  303. for key, val := range storageOpt {
  304. key := strings.ToLower(key)
  305. switch key {
  306. case "size":
  307. size, err := units.RAMInBytes(val)
  308. if err != nil {
  309. return err
  310. }
  311. driver.options.quota.Size = uint64(size)
  312. default:
  313. return fmt.Errorf("Unknown option %s", key)
  314. }
  315. }
  316. return nil
  317. }
  318. func (d *Driver) getLower(parent string) (string, error) {
  319. parentDir := d.dir(parent)
  320. // Ensure parent exists
  321. if _, err := os.Lstat(parentDir); err != nil {
  322. return "", err
  323. }
  324. // Read Parent link fileA
  325. parentLink, err := ioutil.ReadFile(path.Join(parentDir, "link"))
  326. if err != nil {
  327. return "", err
  328. }
  329. lowers := []string{path.Join(linkDir, string(parentLink))}
  330. parentLower, err := ioutil.ReadFile(path.Join(parentDir, lowerFile))
  331. if err == nil {
  332. parentLowers := strings.Split(string(parentLower), ":")
  333. lowers = append(lowers, parentLowers...)
  334. }
  335. if len(lowers) > maxDepth {
  336. return "", errors.New("max depth exceeded")
  337. }
  338. return strings.Join(lowers, ":"), nil
  339. }
  340. func (d *Driver) dir(id string) string {
  341. return path.Join(d.home, id)
  342. }
  343. func (d *Driver) getLowerDirs(id string) ([]string, error) {
  344. var lowersArray []string
  345. lowers, err := ioutil.ReadFile(path.Join(d.dir(id), lowerFile))
  346. if err == nil {
  347. for _, s := range strings.Split(string(lowers), ":") {
  348. lp, err := os.Readlink(path.Join(d.home, s))
  349. if err != nil {
  350. return nil, err
  351. }
  352. lowersArray = append(lowersArray, path.Clean(path.Join(d.home, "link", lp)))
  353. }
  354. } else if !os.IsNotExist(err) {
  355. return nil, err
  356. }
  357. return lowersArray, nil
  358. }
  359. // Remove cleans the directories that are created for this id.
  360. func (d *Driver) Remove(id string) error {
  361. dir := d.dir(id)
  362. lid, err := ioutil.ReadFile(path.Join(dir, "link"))
  363. if err == nil {
  364. if err := os.RemoveAll(path.Join(d.home, linkDir, string(lid))); err != nil {
  365. logrus.Debugf("Failed to remove link: %v", err)
  366. }
  367. }
  368. if err := os.RemoveAll(dir); err != nil && !os.IsNotExist(err) {
  369. return err
  370. }
  371. return nil
  372. }
  373. // Get creates and mounts the required file system for the given id and returns the mount path.
  374. func (d *Driver) Get(id string, mountLabel string) (s string, err error) {
  375. dir := d.dir(id)
  376. if _, err := os.Stat(dir); err != nil {
  377. return "", err
  378. }
  379. diffDir := path.Join(dir, "diff")
  380. lowers, err := ioutil.ReadFile(path.Join(dir, lowerFile))
  381. if err != nil {
  382. // If no lower, just return diff directory
  383. if os.IsNotExist(err) {
  384. return diffDir, nil
  385. }
  386. return "", err
  387. }
  388. mergedDir := path.Join(dir, "merged")
  389. if count := d.ctr.Increment(mergedDir); count > 1 {
  390. return mergedDir, nil
  391. }
  392. defer func() {
  393. if err != nil {
  394. if c := d.ctr.Decrement(mergedDir); c <= 0 {
  395. syscall.Unmount(mergedDir, 0)
  396. }
  397. }
  398. }()
  399. workDir := path.Join(dir, "work")
  400. splitLowers := strings.Split(string(lowers), ":")
  401. absLowers := make([]string, len(splitLowers))
  402. for i, s := range splitLowers {
  403. absLowers[i] = path.Join(d.home, s)
  404. }
  405. opts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", strings.Join(absLowers, ":"), path.Join(dir, "diff"), path.Join(dir, "work"))
  406. mountData := label.FormatMountLabel(opts, mountLabel)
  407. mount := syscall.Mount
  408. mountTarget := mergedDir
  409. pageSize := syscall.Getpagesize()
  410. // Go can return a larger page size than supported by the system
  411. // as of go 1.7. This will be fixed in 1.8 and this block can be
  412. // removed when building with 1.8.
  413. // See https://github.com/golang/go/commit/1b9499b06989d2831e5b156161d6c07642926ee1
  414. // See https://github.com/docker/docker/issues/27384
  415. if pageSize > 4096 {
  416. pageSize = 4096
  417. }
  418. // Use relative paths and mountFrom when the mount data has exceeded
  419. // the page size. The mount syscall fails if the mount data cannot
  420. // fit within a page and relative links make the mount data much
  421. // smaller at the expense of requiring a fork exec to chroot.
  422. if len(mountData) > pageSize {
  423. opts = fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", string(lowers), path.Join(id, "diff"), path.Join(id, "work"))
  424. mountData = label.FormatMountLabel(opts, mountLabel)
  425. if len(mountData) > pageSize {
  426. return "", fmt.Errorf("cannot mount layer, mount label too large %d", len(mountData))
  427. }
  428. mount = func(source string, target string, mType string, flags uintptr, label string) error {
  429. return mountFrom(d.home, source, target, mType, flags, label)
  430. }
  431. mountTarget = path.Join(id, "merged")
  432. }
  433. if err := mount("overlay", mountTarget, "overlay", 0, mountData); err != nil {
  434. return "", fmt.Errorf("error creating overlay mount to %s: %v", mergedDir, err)
  435. }
  436. // chown "workdir/work" to the remapped root UID/GID. Overlay fs inside a
  437. // user namespace requires this to move a directory from lower to upper.
  438. rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
  439. if err != nil {
  440. return "", err
  441. }
  442. if err := os.Chown(path.Join(workDir, "work"), rootUID, rootGID); err != nil {
  443. return "", err
  444. }
  445. return mergedDir, nil
  446. }
  447. // Put unmounts the mount path created for the give id.
  448. func (d *Driver) Put(id string) error {
  449. mountpoint := path.Join(d.dir(id), "merged")
  450. if count := d.ctr.Decrement(mountpoint); count > 0 {
  451. return nil
  452. }
  453. if err := syscall.Unmount(mountpoint, 0); err != nil {
  454. logrus.Debugf("Failed to unmount %s overlay: %s - %v", id, mountpoint, err)
  455. }
  456. return nil
  457. }
  458. // Exists checks to see if the id is already mounted.
  459. func (d *Driver) Exists(id string) bool {
  460. _, err := os.Stat(d.dir(id))
  461. return err == nil
  462. }
  463. // isParent returns if the passed in parent is the direct parent of the passed in layer
  464. func (d *Driver) isParent(id, parent string) bool {
  465. lowers, err := d.getLowerDirs(id)
  466. if err != nil {
  467. return false
  468. }
  469. if parent == "" && len(lowers) > 0 {
  470. return false
  471. }
  472. parentDir := d.dir(parent)
  473. var ld string
  474. if len(lowers) > 0 {
  475. ld = filepath.Dir(lowers[0])
  476. }
  477. if ld == "" && parent == "" {
  478. return true
  479. }
  480. return ld == parentDir
  481. }
  482. // ApplyDiff applies the new layer into a root
  483. func (d *Driver) ApplyDiff(id string, parent string, diff io.Reader) (size int64, err error) {
  484. if !d.isParent(id, parent) {
  485. return d.naiveDiff.ApplyDiff(id, parent, diff)
  486. }
  487. applyDir := d.getDiffPath(id)
  488. logrus.Debugf("Applying tar in %s", applyDir)
  489. // Overlay doesn't need the parent id to apply the diff
  490. if err := untar(diff, applyDir, &archive.TarOptions{
  491. UIDMaps: d.uidMaps,
  492. GIDMaps: d.gidMaps,
  493. WhiteoutFormat: archive.OverlayWhiteoutFormat,
  494. }); err != nil {
  495. return 0, err
  496. }
  497. return d.DiffSize(id, parent)
  498. }
  499. func (d *Driver) getDiffPath(id string) string {
  500. dir := d.dir(id)
  501. return path.Join(dir, "diff")
  502. }
  503. // DiffSize calculates the changes between the specified id
  504. // and its parent and returns the size in bytes of the changes
  505. // relative to its base filesystem directory.
  506. func (d *Driver) DiffSize(id, parent string) (size int64, err error) {
  507. if !d.isParent(id, parent) {
  508. return d.naiveDiff.DiffSize(id, parent)
  509. }
  510. return directory.Size(d.getDiffPath(id))
  511. }
  512. // Diff produces an archive of the changes between the specified
  513. // layer and its parent layer which may be "".
  514. func (d *Driver) Diff(id, parent string) (io.ReadCloser, error) {
  515. if !d.isParent(id, parent) {
  516. return d.naiveDiff.Diff(id, parent)
  517. }
  518. diffPath := d.getDiffPath(id)
  519. logrus.Debugf("Tar with options on %s", diffPath)
  520. return archive.TarWithOptions(diffPath, &archive.TarOptions{
  521. Compression: archive.Uncompressed,
  522. UIDMaps: d.uidMaps,
  523. GIDMaps: d.gidMaps,
  524. WhiteoutFormat: archive.OverlayWhiteoutFormat,
  525. })
  526. }
  527. // Changes produces a list of changes between the specified layer
  528. // and its parent layer. If parent is "", then all changes will be ADD changes.
  529. func (d *Driver) Changes(id, parent string) ([]archive.Change, error) {
  530. if !d.isParent(id, parent) {
  531. return d.naiveDiff.Changes(id, parent)
  532. }
  533. // Overlay doesn't have snapshots, so we need to get changes from all parent
  534. // layers.
  535. diffPath := d.getDiffPath(id)
  536. layers, err := d.getLowerDirs(id)
  537. if err != nil {
  538. return nil, err
  539. }
  540. return archive.OverlayChanges(layers, diffPath)
  541. }