containerfs_linux.go 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. package daemon // import "github.com/docker/docker/daemon"
  2. import (
  3. "context"
  4. "os"
  5. "path/filepath"
  6. "runtime"
  7. "strings"
  8. "github.com/hashicorp/go-multierror"
  9. "github.com/moby/sys/mount"
  10. "github.com/moby/sys/symlink"
  11. "golang.org/x/sys/unix"
  12. "github.com/docker/docker/api/types"
  13. "github.com/docker/docker/container"
  14. "github.com/docker/docker/internal/mounttree"
  15. "github.com/docker/docker/internal/unshare"
  16. "github.com/docker/docker/pkg/fileutils"
  17. )
  18. type future struct {
  19. fn func() error
  20. res chan<- error
  21. }
  22. // containerFSView allows functions to be run in the context of a container's
  23. // filesystem. Inside these functions, the root directory is the container root
  24. // for all native OS filesystem APIs, including, but not limited to, the [os]
  25. // and [golang.org/x/sys/unix] packages. The view of the container's filesystem
  26. // is live and read-write. Each view has its own private set of tmpfs mounts.
  27. // Any files written under a tmpfs mount are not visible to processes inside the
  28. // container nor any other view of the container's filesystem, and vice versa.
  29. //
  30. // Each view has its own current working directory which is initialized to the
  31. // root of the container filesystem and can be changed with [os.Chdir]. Changes
  32. // to the current directory persist across successive [*containerFSView.RunInFS]
  33. // and [*containerFSView.GoInFS] calls.
  34. //
  35. // Multiple views of the same container filesystem can coexist at the same time.
  36. // Only one function can be running in a particular filesystem view at any given
  37. // time. Calls to [*containerFSView.RunInFS] or [*containerFSView.GoInFS] will
  38. // block while another function is running. If more than one call is blocked
  39. // concurrently, the order they are unblocked is undefined.
  40. type containerFSView struct {
  41. d *Daemon
  42. ctr *container.Container
  43. todo chan future
  44. done chan error
  45. }
  46. // openContainerFS opens a new view of the container's filesystem.
  47. func (daemon *Daemon) openContainerFS(container *container.Container) (_ *containerFSView, err error) {
  48. if err := daemon.Mount(container); err != nil {
  49. return nil, err
  50. }
  51. defer func() {
  52. if err != nil {
  53. _ = daemon.Unmount(container)
  54. }
  55. }()
  56. mounts, err := daemon.setupMounts(container)
  57. if err != nil {
  58. return nil, err
  59. }
  60. defer func() {
  61. if err != nil {
  62. _ = container.UnmountVolumes(daemon.LogVolumeEvent)
  63. }
  64. }()
  65. // Setup in initial mount namespace complete. We're ready to unshare the
  66. // mount namespace and bind the volume mounts into that private view of
  67. // the container FS.
  68. todo := make(chan future)
  69. done := make(chan error)
  70. err = unshare.Go(unix.CLONE_NEWNS,
  71. func() error {
  72. if err := mount.MakeRSlave("/"); err != nil {
  73. return err
  74. }
  75. for _, m := range mounts {
  76. dest, err := container.GetResourcePath(m.Destination)
  77. if err != nil {
  78. return err
  79. }
  80. var stat os.FileInfo
  81. stat, err = os.Stat(m.Source)
  82. if err != nil {
  83. return err
  84. }
  85. if err := fileutils.CreateIfNotExists(dest, stat.IsDir()); err != nil {
  86. return err
  87. }
  88. bindMode := "rbind"
  89. if m.NonRecursive {
  90. bindMode = "bind"
  91. }
  92. writeMode := "ro"
  93. if m.Writable {
  94. writeMode = "rw"
  95. }
  96. // openContainerFS() is called for temporary mounts
  97. // outside the container. Soon these will be unmounted
  98. // with lazy unmount option and given we have mounted
  99. // them rbind, all the submounts will propagate if these
  100. // are shared. If daemon is running in host namespace
  101. // and has / as shared then these unmounts will
  102. // propagate and unmount original mount as well. So make
  103. // all these mounts rprivate. Do not use propagation
  104. // property of volume as that should apply only when
  105. // mounting happens inside the container.
  106. opts := strings.Join([]string{bindMode, writeMode, "rprivate"}, ",")
  107. if err := mount.Mount(m.Source, dest, "", opts); err != nil {
  108. return err
  109. }
  110. }
  111. return mounttree.SwitchRoot(container.BaseFS)
  112. },
  113. func() {
  114. defer close(done)
  115. for it := range todo {
  116. err := it.fn()
  117. if it.res != nil {
  118. it.res <- err
  119. }
  120. }
  121. // The thread will terminate when this goroutine returns, taking the
  122. // mount namespace and all the volume bind-mounts with it.
  123. },
  124. )
  125. if err != nil {
  126. return nil, err
  127. }
  128. vw := &containerFSView{
  129. d: daemon,
  130. ctr: container,
  131. todo: todo,
  132. done: done,
  133. }
  134. runtime.SetFinalizer(vw, (*containerFSView).Close)
  135. return vw, nil
  136. }
  137. // RunInFS synchronously runs fn in the context of the container filesytem and
  138. // passes through its return value.
  139. //
  140. // The container filesystem is only visible to functions called in the same
  141. // goroutine as fn. Goroutines started from fn will see the host's filesystem.
  142. func (vw *containerFSView) RunInFS(ctx context.Context, fn func() error) error {
  143. res := make(chan error)
  144. select {
  145. case vw.todo <- future{fn: fn, res: res}:
  146. case <-ctx.Done():
  147. return ctx.Err()
  148. }
  149. return <-res
  150. }
  151. // GoInFS starts fn in the container FS. It blocks until fn is started but does
  152. // not wait until fn returns. An error is returned if ctx is canceled before fn
  153. // has been started.
  154. //
  155. // The container filesystem is only visible to functions called in the same
  156. // goroutine as fn. Goroutines started from fn will see the host's filesystem.
  157. func (vw *containerFSView) GoInFS(ctx context.Context, fn func()) error {
  158. select {
  159. case vw.todo <- future{fn: func() error { fn(); return nil }}:
  160. return nil
  161. case <-ctx.Done():
  162. return ctx.Err()
  163. }
  164. }
  165. // Close waits until any in-flight operations complete and frees all
  166. // resources associated with vw.
  167. func (vw *containerFSView) Close() error {
  168. runtime.SetFinalizer(vw, nil)
  169. close(vw.todo)
  170. err := multierror.Append(nil, <-vw.done)
  171. err = multierror.Append(err, vw.ctr.UnmountVolumes(vw.d.LogVolumeEvent))
  172. err = multierror.Append(err, vw.d.Unmount(vw.ctr))
  173. return err.ErrorOrNil()
  174. }
  175. // Stat returns the metadata for path, relative to the current working directory
  176. // of vw inside the container filesystem view.
  177. func (vw *containerFSView) Stat(ctx context.Context, path string) (*types.ContainerPathStat, error) {
  178. var stat *types.ContainerPathStat
  179. err := vw.RunInFS(ctx, func() error {
  180. lstat, err := os.Lstat(path)
  181. if err != nil {
  182. return err
  183. }
  184. var target string
  185. if lstat.Mode()&os.ModeSymlink != 0 {
  186. // Fully evaluate symlinks along path to the ultimate
  187. // target, or as much as possible with broken links.
  188. target, err = symlink.FollowSymlinkInScope(path, "/")
  189. if err != nil {
  190. return err
  191. }
  192. }
  193. stat = &types.ContainerPathStat{
  194. Name: filepath.Base(path),
  195. Size: lstat.Size(),
  196. Mode: lstat.Mode(),
  197. Mtime: lstat.ModTime(),
  198. LinkTarget: target,
  199. }
  200. return nil
  201. })
  202. return stat, err
  203. }