aufs.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669
  1. // +build linux
  2. /*
  3. aufs driver directory structure
  4. .
  5. ├── layers // Metadata of layers
  6. │ ├── 1
  7. │ ├── 2
  8. │ └── 3
  9. ├── diff // Content of the layer
  10. │ ├── 1 // Contains layers that need to be mounted for the id
  11. │ ├── 2
  12. │ └── 3
  13. └── mnt // Mount points for the rw layers to be mounted
  14. ├── 1
  15. ├── 2
  16. └── 3
  17. */
  18. package aufs
  19. import (
  20. "bufio"
  21. "fmt"
  22. "io"
  23. "io/ioutil"
  24. "os"
  25. "os/exec"
  26. "path"
  27. "path/filepath"
  28. "strings"
  29. "sync"
  30. "syscall"
  31. "time"
  32. "github.com/Sirupsen/logrus"
  33. "github.com/vbatts/tar-split/tar/storage"
  34. "github.com/docker/docker/daemon/graphdriver"
  35. "github.com/docker/docker/pkg/archive"
  36. "github.com/docker/docker/pkg/chrootarchive"
  37. "github.com/docker/docker/pkg/directory"
  38. "github.com/docker/docker/pkg/idtools"
  39. mountpk "github.com/docker/docker/pkg/mount"
  40. "github.com/opencontainers/runc/libcontainer/label"
  41. rsystem "github.com/opencontainers/runc/libcontainer/system"
  42. )
  43. var (
  44. // ErrAufsNotSupported is returned if aufs is not supported by the host.
  45. ErrAufsNotSupported = fmt.Errorf("AUFS was not found in /proc/filesystems")
  46. // ErrAufsNested means aufs cannot be used bc we are in a user namespace
  47. ErrAufsNested = fmt.Errorf("AUFS cannot be used in non-init user namespace")
  48. backingFs = "<unknown>"
  49. enableDirpermLock sync.Once
  50. enableDirperm bool
  51. )
  52. func init() {
  53. graphdriver.Register("aufs", Init)
  54. }
  55. // Driver contains information about the filesystem mounted.
  56. type Driver struct {
  57. sync.Mutex
  58. root string
  59. uidMaps []idtools.IDMap
  60. gidMaps []idtools.IDMap
  61. ctr *graphdriver.RefCounter
  62. pathCacheLock sync.Mutex
  63. pathCache map[string]string
  64. naiveDiff graphdriver.DiffDriver
  65. }
  66. // Init returns a new AUFS driver.
  67. // An error is returned if AUFS is not supported.
  68. func Init(root string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
  69. // Try to load the aufs kernel module
  70. if err := supportsAufs(); err != nil {
  71. return nil, graphdriver.ErrNotSupported
  72. }
  73. fsMagic, err := graphdriver.GetFSMagic(root)
  74. if err != nil {
  75. return nil, err
  76. }
  77. if fsName, ok := graphdriver.FsNames[fsMagic]; ok {
  78. backingFs = fsName
  79. }
  80. switch fsMagic {
  81. case graphdriver.FsMagicAufs, graphdriver.FsMagicBtrfs, graphdriver.FsMagicEcryptfs:
  82. logrus.Errorf("AUFS is not supported over %s", backingFs)
  83. return nil, graphdriver.ErrIncompatibleFS
  84. }
  85. paths := []string{
  86. "mnt",
  87. "diff",
  88. "layers",
  89. }
  90. a := &Driver{
  91. root: root,
  92. uidMaps: uidMaps,
  93. gidMaps: gidMaps,
  94. pathCache: make(map[string]string),
  95. ctr: graphdriver.NewRefCounter(graphdriver.NewFsChecker(graphdriver.FsMagicAufs)),
  96. }
  97. rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
  98. if err != nil {
  99. return nil, err
  100. }
  101. // Create the root aufs driver dir and return
  102. // if it already exists
  103. // If not populate the dir structure
  104. if err := idtools.MkdirAllAs(root, 0700, rootUID, rootGID); err != nil {
  105. if os.IsExist(err) {
  106. return a, nil
  107. }
  108. return nil, err
  109. }
  110. if err := mountpk.MakePrivate(root); err != nil {
  111. return nil, err
  112. }
  113. // Populate the dir structure
  114. for _, p := range paths {
  115. if err := idtools.MkdirAllAs(path.Join(root, p), 0700, rootUID, rootGID); err != nil {
  116. return nil, err
  117. }
  118. }
  119. a.naiveDiff = graphdriver.NewNaiveDiffDriver(a, uidMaps, gidMaps)
  120. return a, nil
  121. }
  122. // Return a nil error if the kernel supports aufs
  123. // We cannot modprobe because inside dind modprobe fails
  124. // to run
  125. func supportsAufs() error {
  126. // We can try to modprobe aufs first before looking at
  127. // proc/filesystems for when aufs is supported
  128. exec.Command("modprobe", "aufs").Run()
  129. if rsystem.RunningInUserNS() {
  130. return ErrAufsNested
  131. }
  132. f, err := os.Open("/proc/filesystems")
  133. if err != nil {
  134. return err
  135. }
  136. defer f.Close()
  137. s := bufio.NewScanner(f)
  138. for s.Scan() {
  139. if strings.Contains(s.Text(), "aufs") {
  140. return nil
  141. }
  142. }
  143. return ErrAufsNotSupported
  144. }
  145. func (a *Driver) rootPath() string {
  146. return a.root
  147. }
  148. func (*Driver) String() string {
  149. return "aufs"
  150. }
  151. // Status returns current information about the filesystem such as root directory, number of directories mounted, etc.
  152. func (a *Driver) Status() [][2]string {
  153. ids, _ := loadIds(path.Join(a.rootPath(), "layers"))
  154. return [][2]string{
  155. {"Root Dir", a.rootPath()},
  156. {"Backing Filesystem", backingFs},
  157. {"Dirs", fmt.Sprintf("%d", len(ids))},
  158. {"Dirperm1 Supported", fmt.Sprintf("%v", useDirperm())},
  159. }
  160. }
  161. // GetMetadata not implemented
  162. func (a *Driver) GetMetadata(id string) (map[string]string, error) {
  163. return nil, nil
  164. }
  165. // Exists returns true if the given id is registered with
  166. // this driver
  167. func (a *Driver) Exists(id string) bool {
  168. if _, err := os.Lstat(path.Join(a.rootPath(), "layers", id)); err != nil {
  169. return false
  170. }
  171. return true
  172. }
  173. // CreateReadWrite creates a layer that is writable for use as a container
  174. // file system.
  175. func (a *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error {
  176. return a.Create(id, parent, opts)
  177. }
  178. // Create three folders for each id
  179. // mnt, layers, and diff
  180. func (a *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
  181. if opts != nil && len(opts.StorageOpt) != 0 {
  182. return fmt.Errorf("--storage-opt is not supported for aufs")
  183. }
  184. if err := a.createDirsFor(id); err != nil {
  185. return err
  186. }
  187. // Write the layers metadata
  188. f, err := os.Create(path.Join(a.rootPath(), "layers", id))
  189. if err != nil {
  190. return err
  191. }
  192. defer f.Close()
  193. if parent != "" {
  194. ids, err := getParentIDs(a.rootPath(), parent)
  195. if err != nil {
  196. return err
  197. }
  198. if _, err := fmt.Fprintln(f, parent); err != nil {
  199. return err
  200. }
  201. for _, i := range ids {
  202. if _, err := fmt.Fprintln(f, i); err != nil {
  203. return err
  204. }
  205. }
  206. }
  207. return nil
  208. }
  209. // createDirsFor creates two directories for the given id.
  210. // mnt and diff
  211. func (a *Driver) createDirsFor(id string) error {
  212. paths := []string{
  213. "mnt",
  214. "diff",
  215. }
  216. rootUID, rootGID, err := idtools.GetRootUIDGID(a.uidMaps, a.gidMaps)
  217. if err != nil {
  218. return err
  219. }
  220. // Directory permission is 0755.
  221. // The path of directories are <aufs_root_path>/mnt/<image_id>
  222. // and <aufs_root_path>/diff/<image_id>
  223. for _, p := range paths {
  224. if err := idtools.MkdirAllAs(path.Join(a.rootPath(), p, id), 0755, rootUID, rootGID); err != nil {
  225. return err
  226. }
  227. }
  228. return nil
  229. }
  230. // Helper function to debug EBUSY errors on remove.
  231. func debugEBusy(mountPath string) (out []string, err error) {
  232. // lsof is not part of GNU coreutils. This is a best effort
  233. // attempt to detect offending processes.
  234. c := exec.Command("lsof")
  235. r, err := c.StdoutPipe()
  236. if err != nil {
  237. return nil, fmt.Errorf("Assigning pipes failed with %v", err)
  238. }
  239. if err := c.Start(); err != nil {
  240. return nil, fmt.Errorf("Starting %s failed with %v", c.Path, err)
  241. }
  242. defer func() {
  243. waiterr := c.Wait()
  244. if waiterr != nil && err == nil {
  245. err = fmt.Errorf("Waiting for %s failed with %v", c.Path, waiterr)
  246. }
  247. }()
  248. sc := bufio.NewScanner(r)
  249. for sc.Scan() {
  250. entry := sc.Text()
  251. if strings.Contains(entry, mountPath) {
  252. out = append(out, entry, "\n")
  253. }
  254. }
  255. return out, nil
  256. }
  257. // Remove will unmount and remove the given id.
  258. func (a *Driver) Remove(id string) error {
  259. a.pathCacheLock.Lock()
  260. mountpoint, exists := a.pathCache[id]
  261. a.pathCacheLock.Unlock()
  262. if !exists {
  263. mountpoint = a.getMountpoint(id)
  264. }
  265. var retries int
  266. for {
  267. mounted, err := a.mounted(mountpoint)
  268. if err != nil {
  269. return err
  270. }
  271. if !mounted {
  272. break
  273. }
  274. if err := a.unmount(mountpoint); err != nil {
  275. if err != syscall.EBUSY {
  276. return fmt.Errorf("aufs: unmount error: %s: %v", mountpoint, err)
  277. }
  278. if retries >= 5 {
  279. out, debugErr := debugEBusy(mountpoint)
  280. if debugErr == nil {
  281. logrus.Warnf("debugEBusy returned %v", out)
  282. }
  283. return fmt.Errorf("aufs: unmount error after retries: %s: %v", mountpoint, err)
  284. }
  285. // If unmount returns EBUSY, it could be a transient error. Sleep and retry.
  286. retries++
  287. logrus.Warnf("unmount failed due to EBUSY: retry count: %d", retries)
  288. time.Sleep(100 * time.Millisecond)
  289. continue
  290. }
  291. break
  292. }
  293. // Atomically remove each directory in turn by first moving it out of the
  294. // way (so that docker doesn't find it anymore) before doing removal of
  295. // the whole tree.
  296. tmpMntPath := path.Join(a.mntPath(), fmt.Sprintf("%s-removing", id))
  297. if err := os.Rename(mountpoint, tmpMntPath); err != nil && !os.IsNotExist(err) {
  298. if err == syscall.EBUSY {
  299. logrus.Warn("os.Rename err due to EBUSY")
  300. out, debugErr := debugEBusy(mountpoint)
  301. if debugErr == nil {
  302. logrus.Warnf("debugEBusy returned %v", out)
  303. }
  304. }
  305. return err
  306. }
  307. defer os.RemoveAll(tmpMntPath)
  308. tmpDiffpath := path.Join(a.diffPath(), fmt.Sprintf("%s-removing", id))
  309. if err := os.Rename(a.getDiffPath(id), tmpDiffpath); err != nil && !os.IsNotExist(err) {
  310. return err
  311. }
  312. defer os.RemoveAll(tmpDiffpath)
  313. // Remove the layers file for the id
  314. if err := os.Remove(path.Join(a.rootPath(), "layers", id)); err != nil && !os.IsNotExist(err) {
  315. return err
  316. }
  317. a.pathCacheLock.Lock()
  318. delete(a.pathCache, id)
  319. a.pathCacheLock.Unlock()
  320. return nil
  321. }
  322. // Get returns the rootfs path for the id.
  323. // This will mount the dir at its given path
  324. func (a *Driver) Get(id, mountLabel string) (string, error) {
  325. parents, err := a.getParentLayerPaths(id)
  326. if err != nil && !os.IsNotExist(err) {
  327. return "", err
  328. }
  329. a.pathCacheLock.Lock()
  330. m, exists := a.pathCache[id]
  331. a.pathCacheLock.Unlock()
  332. if !exists {
  333. m = a.getDiffPath(id)
  334. if len(parents) > 0 {
  335. m = a.getMountpoint(id)
  336. }
  337. }
  338. if count := a.ctr.Increment(m); count > 1 {
  339. return m, nil
  340. }
  341. // If a dir does not have a parent ( no layers )do not try to mount
  342. // just return the diff path to the data
  343. if len(parents) > 0 {
  344. if err := a.mount(id, m, mountLabel, parents); err != nil {
  345. return "", err
  346. }
  347. }
  348. a.pathCacheLock.Lock()
  349. a.pathCache[id] = m
  350. a.pathCacheLock.Unlock()
  351. return m, nil
  352. }
  353. // Put unmounts and updates list of active mounts.
  354. func (a *Driver) Put(id string) error {
  355. a.pathCacheLock.Lock()
  356. m, exists := a.pathCache[id]
  357. if !exists {
  358. m = a.getMountpoint(id)
  359. a.pathCache[id] = m
  360. }
  361. a.pathCacheLock.Unlock()
  362. if count := a.ctr.Decrement(m); count > 0 {
  363. return nil
  364. }
  365. err := a.unmount(m)
  366. if err != nil {
  367. logrus.Debugf("Failed to unmount %s aufs: %v", id, err)
  368. }
  369. return err
  370. }
  371. // isParent returns if the passed in parent is the direct parent of the passed in layer
  372. func (a *Driver) isParent(id, parent string) bool {
  373. parents, _ := getParentIDs(a.rootPath(), id)
  374. if parent == "" && len(parents) > 0 {
  375. return false
  376. }
  377. return !(len(parents) > 0 && parent != parents[0])
  378. }
  379. // Diff produces an archive of the changes between the specified
  380. // layer and its parent layer which may be "".
  381. func (a *Driver) Diff(id, parent string) (io.ReadCloser, error) {
  382. if !a.isParent(id, parent) {
  383. return a.naiveDiff.Diff(id, parent)
  384. }
  385. // AUFS doesn't need the parent layer to produce a diff.
  386. return archive.TarWithOptions(path.Join(a.rootPath(), "diff", id), &archive.TarOptions{
  387. Compression: archive.Uncompressed,
  388. ExcludePatterns: []string{archive.WhiteoutMetaPrefix + "*", "!" + archive.WhiteoutOpaqueDir},
  389. UIDMaps: a.uidMaps,
  390. GIDMaps: a.gidMaps,
  391. })
  392. }
  393. type fileGetNilCloser struct {
  394. storage.FileGetter
  395. }
  396. func (f fileGetNilCloser) Close() error {
  397. return nil
  398. }
  399. // DiffGetter returns a FileGetCloser that can read files from the directory that
  400. // contains files for the layer differences. Used for direct access for tar-split.
  401. func (a *Driver) DiffGetter(id string) (graphdriver.FileGetCloser, error) {
  402. p := path.Join(a.rootPath(), "diff", id)
  403. return fileGetNilCloser{storage.NewPathFileGetter(p)}, nil
  404. }
  405. func (a *Driver) applyDiff(id string, diff io.Reader) error {
  406. return chrootarchive.UntarUncompressed(diff, path.Join(a.rootPath(), "diff", id), &archive.TarOptions{
  407. UIDMaps: a.uidMaps,
  408. GIDMaps: a.gidMaps,
  409. })
  410. }
  411. // DiffSize calculates the changes between the specified id
  412. // and its parent and returns the size in bytes of the changes
  413. // relative to its base filesystem directory.
  414. func (a *Driver) DiffSize(id, parent string) (size int64, err error) {
  415. if !a.isParent(id, parent) {
  416. return a.naiveDiff.DiffSize(id, parent)
  417. }
  418. // AUFS doesn't need the parent layer to calculate the diff size.
  419. return directory.Size(path.Join(a.rootPath(), "diff", id))
  420. }
  421. // ApplyDiff extracts the changeset from the given diff into the
  422. // layer with the specified id and parent, returning the size of the
  423. // new layer in bytes.
  424. func (a *Driver) ApplyDiff(id, parent string, diff io.Reader) (size int64, err error) {
  425. if !a.isParent(id, parent) {
  426. return a.naiveDiff.ApplyDiff(id, parent, diff)
  427. }
  428. // AUFS doesn't need the parent id to apply the diff if it is the direct parent.
  429. if err = a.applyDiff(id, diff); err != nil {
  430. return
  431. }
  432. return a.DiffSize(id, parent)
  433. }
  434. // Changes produces a list of changes between the specified layer
  435. // and its parent layer. If parent is "", then all changes will be ADD changes.
  436. func (a *Driver) Changes(id, parent string) ([]archive.Change, error) {
  437. if !a.isParent(id, parent) {
  438. return a.naiveDiff.Changes(id, parent)
  439. }
  440. // AUFS doesn't have snapshots, so we need to get changes from all parent
  441. // layers.
  442. layers, err := a.getParentLayerPaths(id)
  443. if err != nil {
  444. return nil, err
  445. }
  446. return archive.Changes(layers, path.Join(a.rootPath(), "diff", id))
  447. }
  448. func (a *Driver) getParentLayerPaths(id string) ([]string, error) {
  449. parentIds, err := getParentIDs(a.rootPath(), id)
  450. if err != nil {
  451. return nil, err
  452. }
  453. layers := make([]string, len(parentIds))
  454. // Get the diff paths for all the parent ids
  455. for i, p := range parentIds {
  456. layers[i] = path.Join(a.rootPath(), "diff", p)
  457. }
  458. return layers, nil
  459. }
  460. func (a *Driver) mount(id string, target string, mountLabel string, layers []string) error {
  461. a.Lock()
  462. defer a.Unlock()
  463. // If the id is mounted or we get an error return
  464. if mounted, err := a.mounted(target); err != nil || mounted {
  465. return err
  466. }
  467. rw := a.getDiffPath(id)
  468. if err := a.aufsMount(layers, rw, target, mountLabel); err != nil {
  469. return fmt.Errorf("error creating aufs mount to %s: %v", target, err)
  470. }
  471. return nil
  472. }
  473. func (a *Driver) unmount(mountPath string) error {
  474. a.Lock()
  475. defer a.Unlock()
  476. if mounted, err := a.mounted(mountPath); err != nil || !mounted {
  477. return err
  478. }
  479. if err := Unmount(mountPath); err != nil {
  480. return err
  481. }
  482. return nil
  483. }
  484. func (a *Driver) mounted(mountpoint string) (bool, error) {
  485. return graphdriver.Mounted(graphdriver.FsMagicAufs, mountpoint)
  486. }
  487. // Cleanup aufs and unmount all mountpoints
  488. func (a *Driver) Cleanup() error {
  489. var dirs []string
  490. if err := filepath.Walk(a.mntPath(), func(path string, info os.FileInfo, err error) error {
  491. if err != nil {
  492. return err
  493. }
  494. if !info.IsDir() {
  495. return nil
  496. }
  497. dirs = append(dirs, path)
  498. return nil
  499. }); err != nil {
  500. return err
  501. }
  502. for _, m := range dirs {
  503. if err := a.unmount(m); err != nil {
  504. logrus.Debugf("aufs error unmounting %s: %s", m, err)
  505. }
  506. }
  507. return mountpk.Unmount(a.root)
  508. }
  509. func (a *Driver) aufsMount(ro []string, rw, target, mountLabel string) (err error) {
  510. defer func() {
  511. if err != nil {
  512. Unmount(target)
  513. }
  514. }()
  515. // Mount options are clipped to page size(4096 bytes). If there are more
  516. // layers then these are remounted individually using append.
  517. offset := 54
  518. if useDirperm() {
  519. offset += len("dirperm1")
  520. }
  521. b := make([]byte, syscall.Getpagesize()-len(mountLabel)-offset) // room for xino & mountLabel
  522. bp := copy(b, fmt.Sprintf("br:%s=rw", rw))
  523. index := 0
  524. for ; index < len(ro); index++ {
  525. layer := fmt.Sprintf(":%s=ro+wh", ro[index])
  526. if bp+len(layer) > len(b) {
  527. break
  528. }
  529. bp += copy(b[bp:], layer)
  530. }
  531. opts := "dio,xino=/dev/shm/aufs.xino"
  532. if useDirperm() {
  533. opts += ",dirperm1"
  534. }
  535. data := label.FormatMountLabel(fmt.Sprintf("%s,%s", string(b[:bp]), opts), mountLabel)
  536. if err = mount("none", target, "aufs", 0, data); err != nil {
  537. return
  538. }
  539. for ; index < len(ro); index++ {
  540. layer := fmt.Sprintf(":%s=ro+wh", ro[index])
  541. data := label.FormatMountLabel(fmt.Sprintf("append%s", layer), mountLabel)
  542. if err = mount("none", target, "aufs", syscall.MS_REMOUNT, data); err != nil {
  543. return
  544. }
  545. }
  546. return
  547. }
  548. // useDirperm checks dirperm1 mount option can be used with the current
  549. // version of aufs.
  550. func useDirperm() bool {
  551. enableDirpermLock.Do(func() {
  552. base, err := ioutil.TempDir("", "docker-aufs-base")
  553. if err != nil {
  554. logrus.Errorf("error checking dirperm1: %v", err)
  555. return
  556. }
  557. defer os.RemoveAll(base)
  558. union, err := ioutil.TempDir("", "docker-aufs-union")
  559. if err != nil {
  560. logrus.Errorf("error checking dirperm1: %v", err)
  561. return
  562. }
  563. defer os.RemoveAll(union)
  564. opts := fmt.Sprintf("br:%s,dirperm1,xino=/dev/shm/aufs.xino", base)
  565. if err := mount("none", union, "aufs", 0, opts); err != nil {
  566. return
  567. }
  568. enableDirperm = true
  569. if err := Unmount(union); err != nil {
  570. logrus.Errorf("error checking dirperm1: failed to unmount %v", err)
  571. }
  572. })
  573. return enableDirperm
  574. }