From bfb810445c3c111478f5e0e6268ef334c38f38cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Gronowski?= Date: Fri, 9 Jun 2023 20:43:45 +0200 Subject: [PATCH] volumes: Implement subpath mount MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `VolumeOptions` now has a `Subpath` field which allows to specify a path relative to the volume that should be mounted as a destination. Symlinks are supported, but they cannot escape the base volume directory. Signed-off-by: Paweł Gronowski --- .../router/container/container_routes.go | 8 + api/swagger.yaml | 4 + api/types/mount/mount.go | 1 + daemon/containerfs_linux.go | 3 +- daemon/start.go | 3 +- daemon/volumes_unix.go | 36 +++- daemon/volumes_windows.go | 25 ++- docs/api/version-history.md | 3 + integration/internal/container/container.go | 25 +++ integration/volume/mount_test.go | 185 ++++++++++++++++++ internal/safepath/common.go | 66 +++++++ internal/safepath/common_test.go | 31 +++ internal/safepath/errors.go | 42 ++++ internal/safepath/join_test.go | 144 ++++++++++++++ internal/safepath/safepath.go | 63 ++++++ volume/mounts/linux_parser.go | 12 +- volume/mounts/mounts.go | 80 +++++++- volume/mounts/parser.go | 8 + volume/mounts/validate_test.go | 3 + volume/mounts/windows_parser.go | 15 +- 20 files changed, 727 insertions(+), 30 deletions(-) create mode 100644 integration/volume/mount_test.go create mode 100644 internal/safepath/common.go create mode 100644 internal/safepath/common_test.go create mode 100644 internal/safepath/errors.go create mode 100644 internal/safepath/join_test.go create mode 100644 internal/safepath/safepath.go diff --git a/api/server/router/container/container_routes.go b/api/server/router/container/container_routes.go index 08a9f3c0ad..dbb2b9beda 100644 --- a/api/server/router/container/container_routes.go +++ b/api/server/router/container/container_routes.go @@ -628,6 +628,14 @@ func (s *containerRouter) postContainersCreate(ctx context.Context, w http.Respo } } + if versions.LessThan(version, "1.45") { + for _, m := range hostConfig.Mounts { + if m.VolumeOptions != nil && m.VolumeOptions.Subpath != "" { + return errdefs.InvalidParameter(errors.New("VolumeOptions.Subpath needs API v1.45 or newer")) + } + } + } + var warnings []string if warn, err := handleMACAddressBC(config, hostConfig, networkingConfig, version); err != nil { return err diff --git a/api/swagger.yaml b/api/swagger.yaml index 90556d75ed..4cdffb6a38 100644 --- a/api/swagger.yaml +++ b/api/swagger.yaml @@ -423,6 +423,10 @@ definitions: type: "object" additionalProperties: type: "string" + Subpath: + description: "Source path inside the volume. Must be relative without any back traversals." + type: "string" + example: "dir-inside-volume/subdirectory" TmpfsOptions: description: "Optional configuration for the `tmpfs` type." type: "object" diff --git a/api/types/mount/mount.go b/api/types/mount/mount.go index 57edf2ef18..6fe04da257 100644 --- a/api/types/mount/mount.go +++ b/api/types/mount/mount.go @@ -96,6 +96,7 @@ type BindOptions struct { type VolumeOptions struct { NoCopy bool `json:",omitempty"` Labels map[string]string `json:",omitempty"` + Subpath string `json:",omitempty"` DriverConfig *Driver `json:",omitempty"` } diff --git a/daemon/containerfs_linux.go b/daemon/containerfs_linux.go index 384c86b3f0..eb8674ddf7 100644 --- a/daemon/containerfs_linux.go +++ b/daemon/containerfs_linux.go @@ -63,11 +63,12 @@ func (daemon *Daemon) openContainerFS(container *container.Container) (_ *contai } }() - mounts, err := daemon.setupMounts(container) + mounts, cleanup, err := daemon.setupMounts(container) if err != nil { return nil, err } defer func() { + cleanup() if err != nil { _ = container.UnmountVolumes(daemon.LogVolumeEvent) } diff --git a/daemon/start.go b/daemon/start.go index 43f6914d31..ec8b11f843 100644 --- a/daemon/start.go +++ b/daemon/start.go @@ -164,11 +164,12 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore return err } - m, err := daemon.setupMounts(container) + m, cleanup, err := daemon.setupMounts(container) if err != nil { return err } mnts = append(mnts, m...) + defer cleanup() spec, err := daemon.createSpec(ctx, daemonCfg, container, mnts) if err != nil { diff --git a/daemon/volumes_unix.go b/daemon/volumes_unix.go index 959124535a..59a091e6c7 100644 --- a/daemon/volumes_unix.go +++ b/daemon/volumes_unix.go @@ -3,15 +3,18 @@ package daemon // import "github.com/docker/docker/daemon" import ( + "context" "fmt" "os" "sort" "strconv" "strings" + "github.com/containerd/log" "github.com/docker/docker/api/types/events" mounttypes "github.com/docker/docker/api/types/mount" "github.com/docker/docker/container" + "github.com/docker/docker/internal/cleanups" volumemounts "github.com/docker/docker/volume/mounts" "github.com/pkg/errors" ) @@ -19,23 +22,34 @@ import ( // setupMounts iterates through each of the mount points for a container and // calls Setup() on each. It also looks to see if is a network mount such as // /etc/resolv.conf, and if it is not, appends it to the array of mounts. -func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, error) { +// +// The cleanup function should be called as soon as the container has been +// started. +func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, func() error, error) { var mounts []container.Mount // TODO: tmpfs mounts should be part of Mountpoints tmpfsMounts := make(map[string]bool) tmpfsMountInfo, err := c.TmpfsMounts() if err != nil { - return nil, err + return nil, nil, err } for _, m := range tmpfsMountInfo { tmpfsMounts[m.Destination] = true } + + cleanups := cleanups.Composite{} + defer func() { + if err := cleanups.Call(); err != nil { + log.G(context.TODO()).WithError(err).Warn("failed to cleanup temporary mounts created by MountPoint.Setup") + } + }() + for _, m := range c.MountPoints { if tmpfsMounts[m.Destination] { continue } if err := daemon.lazyInitializeVolume(c.ID, m); err != nil { - return nil, err + return nil, nil, err } // If the daemon is being shutdown, we should not let a container start if it is trying to // mount the socket the daemon is listening on. During daemon shutdown, the socket @@ -48,10 +62,12 @@ func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, er return nil } - path, err := m.Setup(c.MountLabel, daemon.idMapping.RootPair(), checkfunc) + path, clean, err := m.Setup(c.MountLabel, daemon.idMapping.RootPair(), checkfunc) if err != nil { - return nil, err + return nil, nil, err } + cleanups.Add(clean) + if !c.TrySetNetworkMount(m.Destination, path) { mnt := container.Mount{ Source: path, @@ -61,13 +77,13 @@ func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, er } if m.Spec.Type == mounttypes.TypeBind && m.Spec.BindOptions != nil { if !m.Spec.ReadOnly && m.Spec.BindOptions.ReadOnlyNonRecursive { - return nil, errors.New("mount options conflict: !ReadOnly && BindOptions.ReadOnlyNonRecursive") + return nil, nil, errors.New("mount options conflict: !ReadOnly && BindOptions.ReadOnlyNonRecursive") } if !m.Spec.ReadOnly && m.Spec.BindOptions.ReadOnlyForceRecursive { - return nil, errors.New("mount options conflict: !ReadOnly && BindOptions.ReadOnlyForceRecursive") + return nil, nil, errors.New("mount options conflict: !ReadOnly && BindOptions.ReadOnlyForceRecursive") } if m.Spec.BindOptions.ReadOnlyNonRecursive && m.Spec.BindOptions.ReadOnlyForceRecursive { - return nil, errors.New("mount options conflict: ReadOnlyNonRecursive && BindOptions.ReadOnlyForceRecursive") + return nil, nil, errors.New("mount options conflict: ReadOnlyNonRecursive && BindOptions.ReadOnlyForceRecursive") } mnt.NonRecursive = m.Spec.BindOptions.NonRecursive mnt.ReadOnlyNonRecursive = m.Spec.BindOptions.ReadOnlyNonRecursive @@ -98,11 +114,11 @@ func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, er // up to the user to make sure the file has proper ownership for userns if strings.Index(mnt.Source, daemon.repository) == 0 { if err := os.Chown(mnt.Source, rootIDs.UID, rootIDs.GID); err != nil { - return nil, err + return nil, nil, err } } } - return append(mounts, netMounts...), nil + return append(mounts, netMounts...), cleanups.Release(), nil } // sortMounts sorts an array of mounts in lexicographic order. This ensure that diff --git a/daemon/volumes_windows.go b/daemon/volumes_windows.go index 574cc48f1c..012ef1cdec 100644 --- a/daemon/volumes_windows.go +++ b/daemon/volumes_windows.go @@ -1,10 +1,13 @@ package daemon // import "github.com/docker/docker/daemon" import ( + "context" "sort" + "github.com/containerd/log" "github.com/docker/docker/api/types/mount" "github.com/docker/docker/container" + "github.com/docker/docker/internal/cleanups" "github.com/docker/docker/pkg/idtools" volumemounts "github.com/docker/docker/volume/mounts" ) @@ -13,21 +16,31 @@ import ( // of the configured mounts on the container to the OCI mount structure // which will ultimately be passed into the oci runtime during container creation. // It also ensures each of the mounts are lexicographically sorted. - +// +// The cleanup function should be called as soon as the container has been +// started. +// // BUGBUG TODO Windows containerd. This would be much better if it returned // an array of runtime spec mounts, not container mounts. Then no need to // do multiple transitions. +func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, func() error, error) { + cleanups := cleanups.Composite{} + defer func() { + if err := cleanups.Call(); err != nil { + log.G(context.TODO()).WithError(err).Warn("failed to cleanup temporary mounts created by MountPoint.Setup") + } + }() -func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, error) { var mnts []container.Mount for _, mount := range c.MountPoints { // type is volumemounts.MountPoint if err := daemon.lazyInitializeVolume(c.ID, mount); err != nil { - return nil, err + return nil, nil, err } - s, err := mount.Setup(c.MountLabel, idtools.Identity{}, nil) + s, c, err := mount.Setup(c.MountLabel, idtools.Identity{}, nil) if err != nil { - return nil, err + return nil, nil, err } + cleanups.Add(c) mnts = append(mnts, container.Mount{ Source: s, @@ -37,7 +50,7 @@ func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, er } sort.Sort(mounts(mnts)) - return mnts, nil + return mnts, cleanups.Release(), nil } // setBindModeIfNull is platform specific processing which is a no-op on diff --git a/docs/api/version-history.md b/docs/api/version-history.md index 16fafdb7b4..deb0fe2d15 100644 --- a/docs/api/version-history.md +++ b/docs/api/version-history.md @@ -17,6 +17,9 @@ keywords: "API, Docker, rcli, REST, documentation" [Docker Engine API v1.45](https://docs.docker.com/engine/api/v1.45/) documentation +* `POST /containers/create` now supports `VolumeOptions.Subpath` which allows a + subpath of a named volume to be mounted. + ## v1.44 API changes [Docker Engine API v1.44](https://docs.docker.com/engine/api/v1.44/) documentation diff --git a/integration/internal/container/container.go b/integration/internal/container/container.go index 0974ce6bf1..dac52999ae 100644 --- a/integration/internal/container/container.go +++ b/integration/internal/container/container.go @@ -170,3 +170,28 @@ func Inspect(ctx context.Context, t *testing.T, apiClient client.APIClient, cont return c } + +type ContainerOutput struct { + Stdout, Stderr string +} + +// Output waits for the container to end running and returns its output. +func Output(ctx context.Context, client client.APIClient, id string) (ContainerOutput, error) { + logs, err := client.ContainerLogs(ctx, id, container.LogsOptions{Follow: true, ShowStdout: true, ShowStderr: true}) + if err != nil { + return ContainerOutput{}, err + } + + defer logs.Close() + + var stdoutBuf, stderrBuf bytes.Buffer + _, err = stdcopy.StdCopy(&stdoutBuf, &stderrBuf, logs) + if err != nil { + return ContainerOutput{}, err + } + + return ContainerOutput{ + Stdout: stdoutBuf.String(), + Stderr: stderrBuf.String(), + }, nil +} diff --git a/integration/volume/mount_test.go b/integration/volume/mount_test.go new file mode 100644 index 0000000000..21e1186523 --- /dev/null +++ b/integration/volume/mount_test.go @@ -0,0 +1,185 @@ +package volume + +import ( + "context" + "path/filepath" + "strings" + "testing" + + containertypes "github.com/docker/docker/api/types/container" + "github.com/docker/docker/api/types/mount" + "github.com/docker/docker/api/types/network" + "github.com/docker/docker/api/types/versions" + "github.com/docker/docker/api/types/volume" + "github.com/docker/docker/client" + "github.com/docker/docker/integration/internal/container" + "github.com/docker/docker/internal/safepath" + "gotest.tools/v3/assert" + is "gotest.tools/v3/assert/cmp" + "gotest.tools/v3/skip" +) + +func TestRunMountVolumeSubdir(t *testing.T) { + skip.If(t, versions.LessThan(testEnv.DaemonAPIVersion(), "1.45"), "skip test from new feature") + + ctx := setupTest(t) + apiClient := testEnv.APIClient() + + testVolumeName := setupTestVolume(t, apiClient) + + for _, tc := range []struct { + name string + opts mount.VolumeOptions + cmd []string + volumeTarget string + createErr string + startErr string + expected string + skipPlatform string + }{ + {name: "subdir", opts: mount.VolumeOptions{Subpath: "subdir"}, cmd: []string{"ls", "/volume"}, expected: "hello.txt"}, + {name: "subdir link", opts: mount.VolumeOptions{Subpath: "hack/good"}, cmd: []string{"ls", "/volume"}, expected: "hello.txt"}, + {name: "subdir with copy data", opts: mount.VolumeOptions{Subpath: "bin"}, volumeTarget: "/bin", cmd: []string{"ls", "/bin/busybox"}, expected: "/bin/busybox", skipPlatform: "windows:copy not supported on Windows"}, + {name: "file", opts: mount.VolumeOptions{Subpath: "bar.txt"}, cmd: []string{"cat", "/volume"}, expected: "foo", skipPlatform: "windows:file bind mounts not supported on Windows"}, + {name: "relative with backtracks", opts: mount.VolumeOptions{Subpath: "../../../../../../etc/passwd"}, cmd: []string{"cat", "/volume"}, createErr: "subpath must be a relative path within the volume"}, + {name: "not existing", opts: mount.VolumeOptions{Subpath: "not-existing-path"}, cmd: []string{"cat", "/volume"}, startErr: (&safepath.ErrNotAccessible{}).Error()}, + + {name: "mount link", opts: mount.VolumeOptions{Subpath: filepath.Join("hack", "root")}, cmd: []string{"ls", "/volume"}, startErr: (&safepath.ErrEscapesBase{}).Error()}, + {name: "mount link link", opts: mount.VolumeOptions{Subpath: filepath.Join("hack", "bad")}, cmd: []string{"ls", "/volume"}, startErr: (&safepath.ErrEscapesBase{}).Error()}, + } { + t.Run(tc.name, func(t *testing.T) { + if tc.skipPlatform != "" { + platform, reason, _ := strings.Cut(tc.skipPlatform, ":") + if testEnv.DaemonInfo.OSType == platform { + t.Skip(reason) + } + } + + cfg := containertypes.Config{ + Image: "busybox", + Cmd: tc.cmd, + } + hostCfg := containertypes.HostConfig{ + Mounts: []mount.Mount{ + { + Type: mount.TypeVolume, + Source: testVolumeName, + Target: "/volume", + VolumeOptions: &tc.opts, + }, + }, + } + if testEnv.DaemonInfo.OSType == "windows" { + hostCfg.Mounts[0].Target = `C:\volume` + } + if tc.volumeTarget != "" { + hostCfg.Mounts[0].Target = tc.volumeTarget + } + + ctrName := strings.ReplaceAll(t.Name(), "/", "_") + create, creatErr := apiClient.ContainerCreate(ctx, &cfg, &hostCfg, &network.NetworkingConfig{}, nil, ctrName) + id := create.ID + if id != "" { + defer apiClient.ContainerRemove(ctx, id, containertypes.RemoveOptions{Force: true}) + } + + if tc.createErr != "" { + assert.ErrorContains(t, creatErr, tc.createErr) + return + } + assert.NilError(t, creatErr, "container creation failed") + + startErr := apiClient.ContainerStart(ctx, id, containertypes.StartOptions{}) + if tc.startErr != "" { + assert.ErrorContains(t, startErr, tc.startErr) + return + } + assert.NilError(t, startErr) + + output, err := container.Output(ctx, apiClient, id) + assert.Check(t, err) + t.Logf("stdout:\n%s", output.Stdout) + t.Logf("stderr:\n%s", output.Stderr) + + inspect, err := apiClient.ContainerInspect(ctx, id) + if assert.Check(t, err) { + assert.Check(t, is.Equal(inspect.State.ExitCode, 0)) + } + + assert.Check(t, is.Equal(strings.TrimSpace(output.Stderr), "")) + assert.Check(t, is.Equal(strings.TrimSpace(output.Stdout), tc.expected)) + }) + } +} + +// setupTestVolume sets up a volume with: +// . +// |-- bar.txt (file with "foo") +// |-- bin (directory) +// |-- subdir (directory) +// | |-- hello.txt (file with "world") +// |-- hack (directory) +// | |-- root (symlink to /) +// | |-- good (symlink to ../subdir) +// | |-- bad (symlink to root) +func setupTestVolume(t *testing.T, client client.APIClient) string { + t.Helper() + ctx := context.Background() + + volumeName := t.Name() + "-volume" + + err := client.VolumeRemove(ctx, volumeName, true) + assert.NilError(t, err, "failed to clean volume") + + _, err = client.VolumeCreate(ctx, volume.CreateOptions{ + Name: volumeName, + }) + assert.NilError(t, err, "failed to setup volume") + + mount := mount.Mount{ + Type: mount.TypeVolume, + Source: volumeName, + Target: "/volume", + } + + rootFs := "/" + if testEnv.DaemonInfo.OSType == "windows" { + mount.Target = `C:\volume` + rootFs = `C:` + } + + initCmd := "echo foo > /volume/bar.txt && " + + "mkdir /volume/bin && " + + "mkdir /volume/subdir && " + + "echo world > /volume/subdir/hello.txt && " + + "mkdir /volume/hack && " + + "ln -s " + rootFs + " /volume/hack/root && " + + "ln -s ../subdir /volume/hack/good && " + + "ln -s root /volume/hack/bad &&" + + "mkdir /volume/hack/iwanttobehackedwithtoctou" + + opts := []func(*container.TestContainerConfig){ + container.WithMount(mount), + container.WithCmd("sh", "-c", initCmd+"; ls -lah /volume /volume/hack/"), + } + if testEnv.DaemonInfo.OSType == "windows" { + // Can't create symlinks under HyperV isolation + opts = append(opts, container.WithIsolation(containertypes.IsolationProcess)) + } + + cid := container.Run(ctx, t, client, opts...) + defer client.ContainerRemove(ctx, cid, containertypes.RemoveOptions{Force: true}) + output, err := container.Output(ctx, client, cid) + + t.Logf("Setup stderr:\n%s", output.Stderr) + t.Logf("Setup stdout:\n%s", output.Stdout) + + assert.NilError(t, err) + assert.Assert(t, is.Equal(output.Stderr, "")) + + inspect, err := client.ContainerInspect(ctx, cid) + assert.NilError(t, err) + assert.Assert(t, is.Equal(inspect.State.ExitCode, 0)) + + return volumeName +} diff --git a/internal/safepath/common.go b/internal/safepath/common.go new file mode 100644 index 0000000000..5beb2e6e43 --- /dev/null +++ b/internal/safepath/common.go @@ -0,0 +1,66 @@ +package safepath + +import ( + "os" + "path/filepath" + + "github.com/pkg/errors" +) + +// evaluatePath evaluates symlinks in the concatenation of path and subpath. If +// err is nil, resolvedBasePath will contain result of resolving all symlinks +// in the given path, and resolvedSubpath will contain a relative path rooted +// at the resolvedBasePath pointing to the concatenation after resolving all +// symlinks. +func evaluatePath(path, subpath string) (resolvedBasePath string, resolvedSubpath string, err error) { + baseResolved, err := filepath.EvalSymlinks(path) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return "", "", &ErrNotAccessible{Path: path, Cause: err} + } + return "", "", errors.Wrapf(err, "error while resolving symlinks in base directory %q", path) + } + + combinedPath := filepath.Join(baseResolved, subpath) + combinedResolved, err := filepath.EvalSymlinks(combinedPath) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return "", "", &ErrNotAccessible{Path: combinedPath, Cause: err} + } + return "", "", errors.Wrapf(err, "error while resolving symlinks in combined path %q", combinedPath) + } + + subpart, err := filepath.Rel(baseResolved, combinedResolved) + if err != nil { + return "", "", &ErrEscapesBase{Base: baseResolved, Subpath: subpath} + } + + if !filepath.IsLocal(subpart) { + return "", "", &ErrEscapesBase{Base: baseResolved, Subpath: subpath} + } + + return baseResolved, subpart, nil +} + +// isLocalTo reports whether path, using lexical analysis only, has all of these properties: +// - is within the subtree rooted at basepath +// - is not empty +// - on Windows, is not a reserved name such as "NUL" +// +// If isLocalTo(path, basepath) returns true, then +// +// filepath.Rel(basepath, path) +// +// will always produce an unrooted path with no `..` elements. +// +// isLocalTo is a purely lexical operation. In particular, it does not account for the effect of any symbolic links that may exist in the filesystem. +// +// Both path and basepath are expected to be absolute paths. +func isLocalTo(path, basepath string) bool { + rel, err := filepath.Rel(basepath, path) + if err != nil { + return false + } + + return filepath.IsLocal(rel) +} diff --git a/internal/safepath/common_test.go b/internal/safepath/common_test.go new file mode 100644 index 0000000000..284481ae7a --- /dev/null +++ b/internal/safepath/common_test.go @@ -0,0 +1,31 @@ +package safepath + +import ( + "testing" + + "gotest.tools/v3/assert" + is "gotest.tools/v3/assert/cmp" +) + +func TestIsLocalTo(t *testing.T) { + for _, tc := range []struct { + name string + subpath string + result bool + }{ + {name: "same", subpath: "/volume", result: true}, + {name: "1 level subpath", subpath: "/volume/sub", result: true}, + {name: "2 level subpath", subpath: "/volume/sub/path", result: true}, + {name: "absolute", subpath: "/etc/passwd", result: false}, + {name: "backtrack", subpath: "/volume/../", result: false}, + {name: "backtrack inside", subpath: "/volume/sub/../", result: true}, + {name: "relative path", subpath: "./rel", result: false}, + {name: "file with dots", subpath: "/volume/file..with.dots", result: true}, + {name: "file starting with dots", subpath: "/volume/..file", result: true}, + } { + t.Run(tc.name, func(t *testing.T) { + result := isLocalTo(tc.subpath, "/volume") + assert.Check(t, is.Equal(result, tc.result)) + }) + } +} diff --git a/internal/safepath/errors.go b/internal/safepath/errors.go new file mode 100644 index 0000000000..8fcfe262ee --- /dev/null +++ b/internal/safepath/errors.go @@ -0,0 +1,42 @@ +package safepath + +// ErrNotAccessible is returned by Join when the resulting path doesn't exist, +// is not accessible, or any of the path components was replaced with a symlink +// during the path traversal. +type ErrNotAccessible struct { + Path string + Cause error +} + +func (*ErrNotAccessible) NotFound() {} + +func (e *ErrNotAccessible) Unwrap() error { + return e.Cause +} + +func (e *ErrNotAccessible) Error() string { + msg := "cannot access path " + e.Path + if e.Cause != nil { + msg += ": " + e.Cause.Error() + } + return msg +} + +// ErrEscapesBase is returned by Join when the resulting concatenation would +// point outside of the specified base directory. +type ErrEscapesBase struct { + Base, Subpath string +} + +func (*ErrEscapesBase) InvalidParameter() {} + +func (e *ErrEscapesBase) Error() string { + msg := "path concatenation escapes the base directory" + if e.Base != "" { + msg += ", base: " + e.Base + } + if e.Subpath != "" { + msg += ", subpath: " + e.Subpath + } + return msg +} diff --git a/internal/safepath/join_test.go b/internal/safepath/join_test.go new file mode 100644 index 0000000000..33adbebc26 --- /dev/null +++ b/internal/safepath/join_test.go @@ -0,0 +1,144 @@ +package safepath + +import ( + "os" + "path/filepath" + "runtime" + "strings" + "testing" + + "gotest.tools/v3/assert" + is "gotest.tools/v3/assert/cmp" +) + +func TestJoinEscapingSymlink(t *testing.T) { + type testCase struct { + name string + target string + } + var cases []testCase + + if runtime.GOOS == "windows" { + cases = []testCase{ + {name: "root", target: `C:\`}, + {name: "absolute file", target: `C:\Windows\System32\cmd.exe`}, + } + } else { + cases = []testCase{ + {name: "root", target: "/"}, + {name: "absolute file", target: "/etc/passwd"}, + } + } + cases = append(cases, testCase{name: "relative", target: "../../"}) + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + tempDir := t.TempDir() + dir, err := filepath.EvalSymlinks(tempDir) + assert.NilError(t, err, "filepath.EvalSymlinks failed for temporary directory %s", tempDir) + + err = os.Symlink(tc.target, filepath.Join(dir, "link")) + assert.NilError(t, err, "failed to create symlink to %s", tc.target) + + safe, err := Join(dir, "link") + if err == nil { + safe.Close() + } + assert.ErrorType(t, err, &ErrEscapesBase{}) + }) + } +} + +func TestJoinGoodSymlink(t *testing.T) { + tempDir := t.TempDir() + dir, err := filepath.EvalSymlinks(tempDir) + assert.NilError(t, err, "filepath.EvalSymlinks failed for temporary directory %s", tempDir) + + assert.Assert(t, os.WriteFile(filepath.Join(dir, "foo"), []byte("bar"), 0o744), "failed to create file 'foo'") + assert.Assert(t, os.Mkdir(filepath.Join(dir, "subdir"), 0o744), "failed to create directory 'subdir'") + assert.Assert(t, os.WriteFile(filepath.Join(dir, "subdir/hello.txt"), []byte("world"), 0o744), "failed to create file 'subdir/hello.txt'") + + assert.Assert(t, os.Symlink(filepath.Join(dir, "subdir"), filepath.Join(dir, "subdir_link_absolute")), "failed to create absolute symlink to directory 'subdir'") + assert.Assert(t, os.Symlink("subdir", filepath.Join(dir, "subdir_link_relative")), "failed to create relative symlink to directory 'subdir'") + + assert.Assert(t, os.Symlink(filepath.Join(dir, "foo"), filepath.Join(dir, "foo_link_absolute")), "failed to create absolute symlink to file 'foo'") + assert.Assert(t, os.Symlink("foo", filepath.Join(dir, "foo_link_relative")), "failed to create relative symlink to file 'foo'") + + for _, target := range []string{ + "foo", "subdir", + "subdir_link_absolute", "foo_link_absolute", + "subdir_link_relative", "foo_link_relative", + } { + t.Run(target, func(t *testing.T) { + safe, err := Join(dir, target) + assert.NilError(t, err) + + defer safe.Close() + if strings.HasPrefix(target, "subdir") { + data, err := os.ReadFile(filepath.Join(safe.Path(), "hello.txt")) + assert.NilError(t, err) + assert.Assert(t, is.Equal(string(data), "world")) + } + }) + } +} + +func TestJoinWithSymlinkReplace(t *testing.T) { + tempDir := t.TempDir() + dir, err := filepath.EvalSymlinks(tempDir) + assert.NilError(t, err, "filepath.EvalSymlinks failed for temporary directory %s", tempDir) + + link := filepath.Join(dir, "link") + target := filepath.Join(dir, "foo") + + err = os.WriteFile(target, []byte("bar"), 0o744) + assert.NilError(t, err, "failed to create test file") + + err = os.Symlink(target, link) + assert.Check(t, err, "failed to create symlink to foo") + + safe, err := Join(dir, "link") + assert.NilError(t, err) + + defer safe.Close() + + // Delete the link target. + err = os.Remove(target) + if runtime.GOOS == "windows" { + // On Windows it shouldn't be possible. + assert.Assert(t, is.ErrorType(err, &os.PathError{}), "link shouldn't be deletable before cleanup") + } else { + // On Linux we can delete it just fine. + assert.NilError(t, err, "failed to remove symlink") + + // Replace target with a symlink to /etc/paswd + err = os.Symlink("/etc/passwd", target) + assert.NilError(t, err, "failed to create symlink") + } + + // The returned safe path should still point to the old file. + data, err := os.ReadFile(safe.Path()) + assert.NilError(t, err, "failed to read file") + + assert.Check(t, is.Equal(string(data), "bar")) + +} + +func TestJoinCloseInvalidates(t *testing.T) { + tempDir := t.TempDir() + dir, err := filepath.EvalSymlinks(tempDir) + assert.NilError(t, err) + + foo := filepath.Join(dir, "foo") + err = os.WriteFile(foo, []byte("bar"), 0o744) + assert.NilError(t, err, "failed to create test file") + + safe, err := Join(dir, "foo") + assert.NilError(t, err) + + assert.Check(t, safe.IsValid()) + + assert.NilError(t, safe.Close()) + + assert.Check(t, !safe.IsValid()) +} diff --git a/internal/safepath/safepath.go b/internal/safepath/safepath.go new file mode 100644 index 0000000000..2b59e49ff1 --- /dev/null +++ b/internal/safepath/safepath.go @@ -0,0 +1,63 @@ +package safepath + +import ( + "context" + "fmt" + "sync" + + "github.com/containerd/log" +) + +type SafePath struct { + path string + cleanup func() error + mutex sync.Mutex + + // Immutable fields + sourceBase, sourceSubpath string +} + +// Close releases the resources used by the path. +func (s *SafePath) Close() error { + s.mutex.Lock() + defer s.mutex.Unlock() + + if s.path == "" { + base, sub := s.SourcePath() + log.G(context.TODO()).WithFields(log.Fields{ + "path": s.Path(), + "sourceBase": base, + "sourceSubpath": sub, + }).Warn("an attempt to close an already closed SafePath") + return nil + } + + s.path = "" + if s.cleanup != nil { + return s.cleanup() + } + return nil +} + +// IsValid return true when path can still be used and wasn't cleaned up by Close. +func (s *SafePath) IsValid() bool { + s.mutex.Lock() + defer s.mutex.Unlock() + return s.path != "" +} + +// Path returns a safe, temporary path that can be used to access the original path. +func (s *SafePath) Path() string { + s.mutex.Lock() + defer s.mutex.Unlock() + if s.path == "" { + panic(fmt.Sprintf("use-after-close attempted for safepath with source [%s, %s]", s.sourceBase, s.sourceSubpath)) + } + return s.path +} + +// SourcePath returns the source path the safepath points to. +func (s *SafePath) SourcePath() (string, string) { + // No mutex lock because these are immutable. + return s.sourceBase, s.sourceSubpath +} diff --git a/volume/mounts/linux_parser.go b/volume/mounts/linux_parser.go index 1b64c23935..eef1cc2ec8 100644 --- a/volume/mounts/linux_parser.go +++ b/volume/mounts/linux_parser.go @@ -94,8 +94,18 @@ func (p *linuxParser) validateMountConfigImpl(mnt *mount.Mount, validateBindSour if mnt.BindOptions != nil { return &errMountConfig{mnt, errExtraField("BindOptions")} } + anonymousVolume := len(mnt.Source) == 0 - if len(mnt.Source) == 0 && mnt.ReadOnly { + if mnt.VolumeOptions != nil && mnt.VolumeOptions.Subpath != "" { + if anonymousVolume { + return &errMountConfig{mnt, errAnonymousVolumeWithSubpath} + } + + if !filepath.IsLocal(mnt.VolumeOptions.Subpath) { + return &errMountConfig{mnt, errInvalidSubpath} + } + } + if mnt.ReadOnly && anonymousVolume { return &errMountConfig{mnt, fmt.Errorf("must not set ReadOnly mode when using anonymous volumes")} } case mount.TypeTmpfs: diff --git a/volume/mounts/mounts.go b/volume/mounts/mounts.go index 75c4f471ca..35eb35c4f6 100644 --- a/volume/mounts/mounts.go +++ b/volume/mounts/mounts.go @@ -9,6 +9,7 @@ import ( "github.com/containerd/log" mounttypes "github.com/docker/docker/api/types/mount" + "github.com/docker/docker/internal/safepath" "github.com/docker/docker/pkg/idtools" "github.com/docker/docker/pkg/stringid" "github.com/docker/docker/volume" @@ -74,14 +75,34 @@ type MountPoint struct { // Specifically needed for containers which are running and calls to `docker cp` // because both these actions require mounting the volumes. active int + + // SafePaths created by Setup that should be cleaned up before unmounting + // the volume. + safePaths []*safepath.SafePath } -// Cleanup frees resources used by the mountpoint +// Cleanup frees resources used by the mountpoint and cleans up all the paths +// returned by Setup that hasn't been cleaned up by the caller. func (m *MountPoint) Cleanup() error { if m.Volume == nil || m.ID == "" { return nil } + for _, p := range m.safePaths { + if !p.IsValid() { + continue + } + + err := p.Close() + base, sub := p.SourcePath() + log.G(context.TODO()).WithFields(log.Fields{ + "error": err, + "path": p.Path(), + "sourceBase": base, + "sourceSubpath": sub, + }).Warn("cleaning up SafePath that hasn't been cleaned up by the caller") + } + if err := m.Volume.Unmount(m.ID); err != nil { return errors.Wrapf(err, "error unmounting volume %s", m.Volume.Name()) } @@ -97,9 +118,17 @@ func (m *MountPoint) Cleanup() error { // configured, or creating the source directory if supplied. // The, optional, checkFun parameter allows doing additional checking // before creating the source directory on the host. -func (m *MountPoint) Setup(mountLabel string, rootIDs idtools.Identity, checkFun func(m *MountPoint) error) (path string, retErr error) { +// +// The returned path can be a temporary path, caller is responsible to +// call the returned cleanup function as soon as the path is not needed. +// Cleanup doesn't unmount the underlying volumes (if any), it only +// frees up the resources that were needed to guarantee that the path +// still points to the same target (to avoid TOCTOU attack). +// +// Cleanup function doesn't need to be called when error is returned. +func (m *MountPoint) Setup(mountLabel string, rootIDs idtools.Identity, checkFun func(m *MountPoint) error) (path string, cleanup func() error, retErr error) { if m.SkipMountpointCreation { - return m.Source, nil + return m.Source, noCleanup, nil } defer func() { @@ -107,16 +136,24 @@ func (m *MountPoint) Setup(mountLabel string, rootIDs idtools.Identity, checkFun return } - sourcePath, err := filepath.EvalSymlinks(m.Source) + sourcePath, err := filepath.EvalSymlinks(path) if err != nil { path = "" retErr = errors.Wrapf(err, "error evaluating symlinks from mount source %q", m.Source) + if cleanupErr := cleanup(); cleanupErr != nil { + log.G(context.TODO()).WithError(cleanupErr).Warn("failed to cleanup after error") + } + cleanup = noCleanup return } err = label.Relabel(sourcePath, mountLabel, label.IsShared(m.Mode)) if err != nil && !errors.Is(err, syscall.ENOTSUP) { path = "" retErr = errors.Wrapf(err, "error setting label on mount source '%s'", sourcePath) + if cleanupErr := cleanup(); cleanupErr != nil { + log.G(context.TODO()).WithError(cleanupErr).Warn("failed to cleanup after error") + } + cleanup = noCleanup } }() @@ -127,16 +164,34 @@ func (m *MountPoint) Setup(mountLabel string, rootIDs idtools.Identity, checkFun } volumePath, err := m.Volume.Mount(id) if err != nil { - return "", errors.Wrapf(err, "error while mounting volume '%s'", m.Source) + return "", noCleanup, errors.Wrapf(err, "error while mounting volume '%s'", m.Source) } m.ID = id + clean := noCleanup + if m.Spec.VolumeOptions != nil && m.Spec.VolumeOptions.Subpath != "" { + subpath := m.Spec.VolumeOptions.Subpath + + safePath, err := safepath.Join(volumePath, subpath) + if err != nil { + if err := m.Volume.Unmount(id); err != nil { + log.G(context.TODO()).WithError(err).Error("failed to unmount after safepath.Join failed") + } + return "", noCleanup, err + } + m.safePaths = append(m.safePaths, safePath) + log.G(context.TODO()).Debugf("mounting (%s|%s) via %s", volumePath, subpath, safePath.Path()) + + clean = safePath.Close + volumePath = safePath.Path() + } + m.active++ - return volumePath, nil + return volumePath, clean, nil } if len(m.Source) == 0 { - return "", fmt.Errorf("Unable to setup mount point, neither source nor volume defined") + return "", noCleanup, fmt.Errorf("Unable to setup mount point, neither source nor volume defined") } if m.Type == mounttypes.TypeBind { @@ -145,7 +200,7 @@ func (m *MountPoint) Setup(mountLabel string, rootIDs idtools.Identity, checkFun // the process of shutting down. if checkFun != nil { if err := checkFun(m); err != nil { - return "", err + return "", noCleanup, err } } @@ -154,12 +209,12 @@ func (m *MountPoint) Setup(mountLabel string, rootIDs idtools.Identity, checkFun if err := idtools.MkdirAllAndChownNew(m.Source, 0o755, rootIDs); err != nil { if perr, ok := err.(*os.PathError); ok { if perr.Err != syscall.ENOTDIR { - return "", errors.Wrapf(err, "error while creating mount source path '%s'", m.Source) + return "", noCleanup, errors.Wrapf(err, "error while creating mount source path '%s'", m.Source) } } } } - return m.Source, nil + return m.Source, noCleanup, nil } func (m *MountPoint) LiveRestore(ctx context.Context) error { @@ -203,3 +258,8 @@ func errInvalidMode(mode string) error { func errInvalidSpec(spec string) error { return errors.Errorf("invalid volume specification: '%s'", spec) } + +// noCleanup is a no-op cleanup function. +func noCleanup() error { + return nil +} diff --git a/volume/mounts/parser.go b/volume/mounts/parser.go index 2bcf9ab053..c4ff6c8c7e 100644 --- a/volume/mounts/parser.go +++ b/volume/mounts/parser.go @@ -11,6 +11,14 @@ import ( // It's used by both LCOW and Linux parsers. var ErrVolumeTargetIsRoot = errors.New("invalid specification: destination can't be '/'") +// errAnonymousVolumeWithSubpath is returned when Subpath is specified for +// anonymous volume. +var errAnonymousVolumeWithSubpath = errors.New("must not set Subpath when using anonymous volumes") + +// errInvalidSubpath is returned when the provided Subpath is not lexically an +// relative path within volume. +var errInvalidSubpath = errors.New("subpath must be a relative path within the volume") + // read-write modes var rwModes = map[string]bool{ "rw": true, diff --git a/volume/mounts/validate_test.go b/volume/mounts/validate_test.go index ea0004b7e4..0af2785e41 100644 --- a/volume/mounts/validate_test.go +++ b/volume/mounts/validate_test.go @@ -28,6 +28,9 @@ func TestValidateMount(t *testing.T) { { input: mount.Mount{Type: mount.TypeVolume, Target: testDestinationPath}, }, + { + input: mount.Mount{Type: mount.TypeVolume, Target: testDestinationPath, Source: "hello", VolumeOptions: &mount.VolumeOptions{Subpath: "world"}}, + }, { input: mount.Mount{Type: mount.TypeBind}, expected: errMissingField("Target"), diff --git a/volume/mounts/windows_parser.go b/volume/mounts/windows_parser.go index f9f0f08f44..c3a6c6bb69 100644 --- a/volume/mounts/windows_parser.go +++ b/volume/mounts/windows_parser.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "os" + "path/filepath" "regexp" "runtime" "strings" @@ -258,7 +259,19 @@ func (p *windowsParser) validateMountConfigReg(mnt *mount.Mount, additionalValid return &errMountConfig{mnt, errExtraField("BindOptions")} } - if len(mnt.Source) == 0 && mnt.ReadOnly { + anonymousVolume := len(mnt.Source) == 0 + if mnt.VolumeOptions != nil && mnt.VolumeOptions.Subpath != "" { + if anonymousVolume { + return errAnonymousVolumeWithSubpath + } + + // Check if path is relative but without any back traversals + if !filepath.IsLocal(mnt.VolumeOptions.Subpath) { + return &errMountConfig{mnt, errInvalidSubpath} + } + } + + if anonymousVolume && mnt.ReadOnly { return &errMountConfig{mnt, fmt.Errorf("must not set ReadOnly mode when using anonymous volumes")} }