123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261 |
- package daemon // import "github.com/docker/docker/daemon"
- import (
- "context"
- "errors"
- "fmt"
- "os"
- "path/filepath"
- "runtime"
- "strings"
- "github.com/containerd/log"
- "github.com/hashicorp/go-multierror"
- "github.com/moby/sys/mount"
- "github.com/moby/sys/symlink"
- "golang.org/x/sys/unix"
- "github.com/docker/docker/api/types"
- "github.com/docker/docker/container"
- "github.com/docker/docker/internal/mounttree"
- "github.com/docker/docker/internal/unshare"
- "github.com/docker/docker/pkg/fileutils"
- )
- type future struct {
- fn func() error
- res chan<- error
- }
- // containerFSView allows functions to be run in the context of a container's
- // filesystem. Inside these functions, the root directory is the container root
- // for all native OS filesystem APIs, including, but not limited to, the [os]
- // and [golang.org/x/sys/unix] packages. The view of the container's filesystem
- // is live and read-write. Each view has its own private set of tmpfs mounts.
- // Any files written under a tmpfs mount are not visible to processes inside the
- // container nor any other view of the container's filesystem, and vice versa.
- //
- // Each view has its own current working directory which is initialized to the
- // root of the container filesystem and can be changed with [os.Chdir]. Changes
- // to the current directory persist across successive [*containerFSView.RunInFS]
- // and [*containerFSView.GoInFS] calls.
- //
- // Multiple views of the same container filesystem can coexist at the same time.
- // Only one function can be running in a particular filesystem view at any given
- // time. Calls to [*containerFSView.RunInFS] or [*containerFSView.GoInFS] will
- // block while another function is running. If more than one call is blocked
- // concurrently, the order they are unblocked is undefined.
- type containerFSView struct {
- d *Daemon
- ctr *container.Container
- todo chan future
- done chan error
- }
- // openContainerFS opens a new view of the container's filesystem.
- func (daemon *Daemon) openContainerFS(container *container.Container) (_ *containerFSView, err error) {
- if err := daemon.Mount(container); err != nil {
- return nil, err
- }
- defer func() {
- if err != nil {
- _ = daemon.Unmount(container)
- }
- }()
- mounts, err := daemon.setupMounts(container)
- if err != nil {
- return nil, err
- }
- defer func() {
- if err != nil {
- _ = container.UnmountVolumes(daemon.LogVolumeEvent)
- }
- }()
- // Setup in initial mount namespace complete. We're ready to unshare the
- // mount namespace and bind the volume mounts into that private view of
- // the container FS.
- todo := make(chan future)
- done := make(chan error)
- err = unshare.Go(unix.CLONE_NEWNS,
- func() error {
- if err := mount.MakeRSlave("/"); err != nil {
- return err
- }
- for _, m := range mounts {
- dest, err := container.GetResourcePath(m.Destination)
- if err != nil {
- return err
- }
- var stat os.FileInfo
- stat, err = os.Stat(m.Source)
- if err != nil {
- return err
- }
- if err := fileutils.CreateIfNotExists(dest, stat.IsDir()); err != nil {
- return err
- }
- bindMode := "rbind"
- if m.NonRecursive {
- bindMode = "bind"
- }
- writeMode := "ro"
- if m.Writable {
- writeMode = "rw"
- if m.ReadOnlyNonRecursive {
- return errors.New("options conflict: Writable && ReadOnlyNonRecursive")
- }
- if m.ReadOnlyForceRecursive {
- return errors.New("options conflict: Writable && ReadOnlyForceRecursive")
- }
- }
- if m.ReadOnlyNonRecursive && m.ReadOnlyForceRecursive {
- return errors.New("options conflict: ReadOnlyNonRecursive && ReadOnlyForceRecursive")
- }
- // openContainerFS() is called for temporary mounts
- // outside the container. Soon these will be unmounted
- // with lazy unmount option and given we have mounted
- // them rbind, all the submounts will propagate if these
- // are shared. If daemon is running in host namespace
- // and has / as shared then these unmounts will
- // propagate and unmount original mount as well. So make
- // all these mounts rprivate. Do not use propagation
- // property of volume as that should apply only when
- // mounting happens inside the container.
- opts := strings.Join([]string{bindMode, writeMode, "rprivate"}, ",")
- if err := mount.Mount(m.Source, dest, "", opts); err != nil {
- return err
- }
- if !m.Writable && !m.ReadOnlyNonRecursive {
- if err := makeMountRRO(dest); err != nil {
- if m.ReadOnlyForceRecursive {
- return err
- } else {
- log.G(context.TODO()).WithError(err).Debugf("Failed to make %q recursively read-only", dest)
- }
- }
- }
- }
- return mounttree.SwitchRoot(container.BaseFS)
- },
- func() {
- defer close(done)
- for it := range todo {
- err := it.fn()
- if it.res != nil {
- it.res <- err
- }
- }
- // The thread will terminate when this goroutine returns, taking the
- // mount namespace and all the volume bind-mounts with it.
- },
- )
- if err != nil {
- return nil, err
- }
- vw := &containerFSView{
- d: daemon,
- ctr: container,
- todo: todo,
- done: done,
- }
- runtime.SetFinalizer(vw, (*containerFSView).Close)
- return vw, nil
- }
- // RunInFS synchronously runs fn in the context of the container filesytem and
- // passes through its return value.
- //
- // The container filesystem is only visible to functions called in the same
- // goroutine as fn. Goroutines started from fn will see the host's filesystem.
- func (vw *containerFSView) RunInFS(ctx context.Context, fn func() error) error {
- res := make(chan error)
- select {
- case vw.todo <- future{fn: fn, res: res}:
- case <-ctx.Done():
- return ctx.Err()
- }
- return <-res
- }
- // GoInFS starts fn in the container FS. It blocks until fn is started but does
- // not wait until fn returns. An error is returned if ctx is canceled before fn
- // has been started.
- //
- // The container filesystem is only visible to functions called in the same
- // goroutine as fn. Goroutines started from fn will see the host's filesystem.
- func (vw *containerFSView) GoInFS(ctx context.Context, fn func()) error {
- select {
- case vw.todo <- future{fn: func() error { fn(); return nil }}:
- return nil
- case <-ctx.Done():
- return ctx.Err()
- }
- }
- // Close waits until any in-flight operations complete and frees all
- // resources associated with vw.
- func (vw *containerFSView) Close() error {
- runtime.SetFinalizer(vw, nil)
- close(vw.todo)
- err := multierror.Append(nil, <-vw.done)
- err = multierror.Append(err, vw.ctr.UnmountVolumes(vw.d.LogVolumeEvent))
- err = multierror.Append(err, vw.d.Unmount(vw.ctr))
- return err.ErrorOrNil()
- }
- // Stat returns the metadata for path, relative to the current working directory
- // of vw inside the container filesystem view.
- func (vw *containerFSView) Stat(ctx context.Context, path string) (*types.ContainerPathStat, error) {
- var stat *types.ContainerPathStat
- err := vw.RunInFS(ctx, func() error {
- lstat, err := os.Lstat(path)
- if err != nil {
- return err
- }
- var target string
- if lstat.Mode()&os.ModeSymlink != 0 {
- // Fully evaluate symlinks along path to the ultimate
- // target, or as much as possible with broken links.
- target, err = symlink.FollowSymlinkInScope(path, "/")
- if err != nil {
- return err
- }
- }
- stat = &types.ContainerPathStat{
- Name: filepath.Base(path),
- Size: lstat.Size(),
- Mode: lstat.Mode(),
- Mtime: lstat.ModTime(),
- LinkTarget: target,
- }
- return nil
- })
- return stat, err
- }
- // makeMountRRO makes the mount recursively read-only.
- func makeMountRRO(dest string) error {
- attr := &unix.MountAttr{
- Attr_set: unix.MOUNT_ATTR_RDONLY,
- }
- var err error
- for {
- err = unix.MountSetattr(-1, dest, unix.AT_RECURSIVE, attr)
- if !errors.Is(err, unix.EINTR) {
- break
- }
- }
- if err != nil {
- err = fmt.Errorf("failed to apply MOUNT_ATTR_RDONLY with AT_RECURSIVE to %q: %w", dest, err)
- }
- return err
- }
|