Support recursively read-only (RRO) mounts

`docker run -v /foo:/foo:ro` is now recursively read-only on kernel >= 5.12.

Automatically falls back to the legacy non-recursively read-only mount mode on kernel < 5.12.

Use `ro-non-recursive` to disable RRO.
Use `ro-force-recursive` or `rro` to explicitly enable RRO. (Fails on kernel < 5.12)

Fix issue 44978
Fix docker/for-linux issue 788

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
Akihiro Suda 2023-04-05 20:32:03 +09:00
parent 88f6a92d22
commit 5045a2de24
No known key found for this signature in database
GPG key ID: 49524C6F9F638F1A
18 changed files with 361 additions and 30 deletions

View file

@ -588,6 +588,18 @@ func (s *containerRouter) postContainersCreate(ctx context.Context, w http.Respo
hostConfig.PidsLimit = nil
}
if hostConfig != nil && versions.LessThan(version, "1.44") {
for _, m := range hostConfig.Mounts {
if m.BindOptions != nil {
// Ignore ReadOnlyNonRecursive because it was added in API 1.44.
m.BindOptions.ReadOnlyNonRecursive = false
if m.BindOptions.ReadOnlyForceRecursive {
return errdefs.InvalidParameter(errors.New("BindOptions.ReadOnlyForceRecursive needs API v1.44 or newer"))
}
}
}
}
ccr, err := s.backend.ContainerCreate(ctx, types.ContainerCreateConfig{
Name: name,
Config: config,

View file

@ -388,6 +388,16 @@ definitions:
description: "Create mount point on host if missing"
type: "boolean"
default: false
ReadOnlyNonRecursive:
description: |
Make the mount non-recursively read-only, but still leave the mount recursive
(unless NonRecursive is set to true in conjunction).
type: "boolean"
default: false
ReadOnlyForceRecursive:
description: "Raise an error if the mount cannot be made recursively read-only."
type: "boolean"
default: false
VolumeOptions:
description: "Optional configuration for the `volume` type."
type: "object"

View file

@ -29,7 +29,7 @@ type Mount struct {
// Source is not supported for tmpfs (must be an empty value)
Source string `json:",omitempty"`
Target string `json:",omitempty"`
ReadOnly bool `json:",omitempty"`
ReadOnly bool `json:",omitempty"` // attempts recursive read-only if possible
Consistency Consistency `json:",omitempty"`
BindOptions *BindOptions `json:",omitempty"`
@ -85,6 +85,11 @@ type BindOptions struct {
Propagation Propagation `json:",omitempty"`
NonRecursive bool `json:",omitempty"`
CreateMountpoint bool `json:",omitempty"`
// ReadOnlyNonRecursive makes the mount non-recursively read-only, but still leaves the mount recursive
// (unless NonRecursive is set to true in conjunction).
ReadOnlyNonRecursive bool `json:",omitempty"`
// ReadOnlyForceRecursive raises an error if the mount cannot be made recursively read-only.
ReadOnlyForceRecursive bool `json:",omitempty"`
}
// VolumeOptions represents the options for a mount of type volume.

View file

@ -16,6 +16,7 @@ import (
"github.com/docker/docker/api/types/swarm"
"github.com/docker/docker/api/types/volume"
"github.com/docker/go-connections/nat"
"github.com/opencontainers/runtime-spec/specs-go/features"
)
const (
@ -659,6 +660,7 @@ type Runtime struct {
// This is exposed here only for internal use
ShimConfig *ShimConfig `json:"-"`
Features *features.Features `json:"-"`
}
// ShimConfig is used by runtime to configure containerd shims

View file

@ -10,4 +10,6 @@ type Mount struct {
Data string `json:"data"`
Propagation string `json:"mountpropagation"`
NonRecursive bool `json:"nonrecursive"`
ReadOnlyNonRecursive bool `json:"readonlynonrecursive"`
ReadOnlyForceRecursive bool `json:"readonlyforcerecursive"`
}

View file

@ -2,6 +2,8 @@ package daemon // import "github.com/docker/docker/daemon"
import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
@ -10,6 +12,7 @@ import (
"github.com/hashicorp/go-multierror"
"github.com/moby/sys/mount"
"github.com/moby/sys/symlink"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"github.com/docker/docker/api/types"
@ -102,6 +105,15 @@ func (daemon *Daemon) openContainerFS(container *container.Container) (_ *contai
writeMode := "ro"
if m.Writable {
writeMode = "rw"
if m.ReadOnlyNonRecursive {
return errors.New("options conflict: Writable && ReadOnlyNonRecursive")
}
if m.ReadOnlyForceRecursive {
return errors.New("options conflict: Writable && ReadOnlyForceRecursive")
}
}
if m.ReadOnlyNonRecursive && m.ReadOnlyForceRecursive {
return errors.New("options conflict: ReadOnlyNonRecursive && ReadOnlyForceRecursive")
}
// openContainerFS() is called for temporary mounts
@ -118,6 +130,16 @@ func (daemon *Daemon) openContainerFS(container *container.Container) (_ *contai
if err := mount.Mount(m.Source, dest, "", opts); err != nil {
return err
}
if !m.Writable && !m.ReadOnlyNonRecursive {
if err := makeMountRRO(dest); err != nil {
if m.ReadOnlyForceRecursive {
return err
} else {
logrus.WithError(err).Debugf("Failed to make %q recursively read-only", dest)
}
}
}
}
return mounttree.SwitchRoot(container.BaseFS)
@ -219,3 +241,21 @@ func (vw *containerFSView) Stat(ctx context.Context, path string) (*types.Contai
})
return stat, err
}
// makeMountRRO makes the mount recursively read-only.
func makeMountRRO(dest string) error {
attr := &unix.MountAttr{
Attr_set: unix.MOUNT_ATTR_RDONLY,
}
var err error
for {
err = unix.MountSetattr(-1, dest, unix.AT_RECURSIVE, attr)
if !errors.Is(err, unix.EINTR) {
break
}
}
if err != nil {
err = fmt.Errorf("failed to apply MOUNT_ATTR_RDONLY with AT_RECURSIVE to %q: %w", dest, err)
}
return err
}

View file

@ -8,6 +8,7 @@ import (
"os"
"regexp"
"strings"
"sync"
"github.com/docker/docker/daemon/config"
"github.com/docker/docker/libnetwork/ns"
@ -17,6 +18,7 @@ import (
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
)
// On Linux, plugins use a static path for storing execution state,
@ -182,3 +184,76 @@ func ifaceAddrs(linkName string) (v4, v6 []*net.IPNet, err error) {
}
return v4, v6, nil
}
var (
kernelSupportsRROOnce sync.Once
kernelSupportsRROErr error
)
func kernelSupportsRecursivelyReadOnly() error {
fn := func() error {
tmpMnt, err := os.MkdirTemp("", "moby-detect-rro")
if err != nil {
return fmt.Errorf("failed to create a temp directory: %w", err)
}
for {
err = unix.Mount("", tmpMnt, "tmpfs", 0, "")
if !errors.Is(err, unix.EINTR) {
break
}
}
if err != nil {
return fmt.Errorf("failed to mount tmpfs on %q: %w", tmpMnt, err)
}
defer func() {
var umErr error
for {
umErr = unix.Unmount(tmpMnt, 0)
if !errors.Is(umErr, unix.EINTR) {
break
}
}
if umErr != nil {
logrus.WithError(umErr).Warnf("Failed to unmount %q", tmpMnt)
}
}()
attr := &unix.MountAttr{
Attr_set: unix.MOUNT_ATTR_RDONLY,
}
for {
err = unix.MountSetattr(-1, tmpMnt, unix.AT_RECURSIVE, attr)
if !errors.Is(err, unix.EINTR) {
break
}
}
// ENOSYS on kernel < 5.12
if err != nil {
return fmt.Errorf("failed to call mount_setattr: %w", err)
}
return nil
}
kernelSupportsRROOnce.Do(func() {
kernelSupportsRROErr = fn()
})
return kernelSupportsRROErr
}
func (daemon *Daemon) supportsRecursivelyReadOnly(runtime string) error {
if err := kernelSupportsRecursivelyReadOnly(); err != nil {
return fmt.Errorf("rro is not supported: %w (kernel is older than 5.12?)", err)
}
if runtime == "" {
runtime = daemon.configStore.GetDefaultRuntimeName()
}
rt := daemon.configStore.GetRuntime(runtime)
if rt.Features == nil {
return fmt.Errorf("rro is not supported by runtime %q: OCI features struct is not available", runtime)
}
for _, s := range rt.Features.MountOptions {
if s == "rro" {
return nil
}
}
return fmt.Errorf("rro is not supported by runtime %q", runtime)
}

View file

@ -17,3 +17,7 @@ func setupResolvConf(_ *interface{}) {}
func getSysInfo(_ *Daemon) *sysinfo.SysInfo {
return sysinfo.New()
}
func (daemon *Daemon) supportsRecursivelyReadOnly(_ string) error {
return nil
}

View file

@ -604,3 +604,7 @@ func (daemon *Daemon) initLibcontainerd(ctx context.Context) error {
return err
}
func (daemon *Daemon) supportsRecursivelyReadOnly(_ string) error {
return nil
}

View file

@ -645,8 +645,25 @@ func WithMounts(daemon *Daemon, c *container.Container) coci.SpecOpts {
}
opts := []string{bindMode}
if !m.Writable {
rro := true
if m.ReadOnlyNonRecursive {
rro = false
if m.ReadOnlyForceRecursive {
return errors.New("mount options conflict: ReadOnlyNonRecursive && ReadOnlyForceRecursive")
}
}
if rroErr := daemon.supportsRecursivelyReadOnly(c.HostConfig.Runtime); rroErr != nil {
rro = false
if m.ReadOnlyForceRecursive {
return rroErr
}
}
if rro {
opts = append(opts, "rro")
} else {
opts = append(opts, "ro")
}
}
if pFlag != 0 {
opts = append(opts, mountPropagationReverseMap[pFlag])
}

View file

@ -3,6 +3,8 @@
package daemon
import (
"bytes"
"encoding/json"
"fmt"
"os"
"os/exec"
@ -14,6 +16,7 @@ import (
"github.com/docker/docker/daemon/config"
"github.com/docker/docker/errdefs"
"github.com/docker/docker/libcontainerd/shimopts"
"github.com/opencontainers/runtime-spec/specs-go/features"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
@ -109,6 +112,19 @@ func (daemon *Daemon) initRuntimes(runtimes map[string]types.Runtime) (err error
}
}
rt.ShimConfig = defaultV2ShimConfig(daemon.configStore, daemon.rewriteRuntimePath(name, rt.Path, rt.Args))
var featuresStderr bytes.Buffer
featuresCmd := exec.Command(rt.Path, append(rt.Args, "features")...)
featuresCmd.Stderr = &featuresStderr
if featuresB, err := featuresCmd.Output(); err != nil {
logrus.WithError(err).Warnf("Failed to run %v: %q", featuresCmd.Args, featuresStderr.String())
} else {
var features features.Features
if jsonErr := json.Unmarshal(featuresB, &features); jsonErr != nil {
logrus.WithError(err).Warnf("Failed to unmarshal the output of %v as a JSON", featuresCmd.Args)
} else {
rt.Features = &features
}
}
} else {
if len(rt.Args) > 0 {
return errors.Errorf("runtime %s: args cannot be used with a runtimeType runtime", name)

View file

@ -12,6 +12,7 @@ import (
mounttypes "github.com/docker/docker/api/types/mount"
"github.com/docker/docker/container"
volumemounts "github.com/docker/docker/volume/mounts"
"github.com/pkg/errors"
)
// setupMounts iterates through each of the mount points for a container and
@ -58,7 +59,18 @@ func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, er
Propagation: string(m.Propagation),
}
if m.Spec.Type == mounttypes.TypeBind && m.Spec.BindOptions != nil {
if !m.Spec.ReadOnly && m.Spec.BindOptions.ReadOnlyNonRecursive {
return nil, errors.New("mount options conflict: !ReadOnly && BindOptions.ReadOnlyNonRecursive")
}
if !m.Spec.ReadOnly && m.Spec.BindOptions.ReadOnlyForceRecursive {
return nil, errors.New("mount options conflict: !ReadOnly && BindOptions.ReadOnlyForceRecursive")
}
if m.Spec.BindOptions.ReadOnlyNonRecursive && m.Spec.BindOptions.ReadOnlyForceRecursive {
return nil, errors.New("mount options conflict: ReadOnlyNonRecursive && BindOptions.ReadOnlyForceRecursive")
}
mnt.NonRecursive = m.Spec.BindOptions.NonRecursive
mnt.ReadOnlyNonRecursive = m.Spec.BindOptions.ReadOnlyNonRecursive
mnt.ReadOnlyForceRecursive = m.Spec.BindOptions.ReadOnlyForceRecursive
}
if m.Volume != nil {
attributes := map[string]string{

View file

@ -20,6 +20,10 @@ keywords: "API, Docker, rcli, REST, documentation"
* The `VirtualSize` field in the `GET /images/{name}/json`, `GET /images/json`,
and `GET /system/df` responses is now omitted. Use the `Size` field instead,
which contains the same information.
* Read-only bind mounts are now made recursively read-only on kernel >= 5.12
with runtimes which support the feature.
`POST /containers/create`, `GET /containers/{id}/json`, and `GET /containers/json` now supports
`BindOptions.ReadOnlyNonRecursive` and `BindOptions.ReadOnlyForceRecursive` to customize the behavior.
## v1.43 API changes

View file

@ -16,6 +16,7 @@ import (
"github.com/docker/docker/api/types/versions"
"github.com/docker/docker/client"
"github.com/docker/docker/integration/internal/container"
"github.com/docker/docker/pkg/parsers/kernel"
"github.com/moby/sys/mount"
"github.com/moby/sys/mountinfo"
"gotest.tools/v3/assert"
@ -428,3 +429,87 @@ func TestContainerCopyLeaksMounts(t *testing.T) {
assert.Equal(t, mountsBefore, mountsAfter)
}
func TestContainerBindMountRecursivelyReadOnly(t *testing.T) {
skip.If(t, testEnv.IsRemoteDaemon)
skip.If(t, versions.LessThan(testEnv.DaemonAPIVersion(), "1.44"), "requires API v1.44")
defer setupTest(t)()
// 0o777 for allowing rootless containers to write to this directory
tmpDir1 := fs.NewDir(t, "tmpdir1", fs.WithMode(0o777),
fs.WithDir("mnt", fs.WithMode(0o777)))
defer tmpDir1.Remove()
tmpDir1Mnt := filepath.Join(tmpDir1.Path(), "mnt")
tmpDir2 := fs.NewDir(t, "tmpdir2", fs.WithMode(0o777),
fs.WithFile("file", "should not be writable when recursively read only", fs.WithMode(0o666)))
defer tmpDir2.Remove()
if err := mount.Mount(tmpDir2.Path(), tmpDir1Mnt, "none", "bind"); err != nil {
t.Fatal(err)
}
defer func() {
if err := mount.Unmount(tmpDir1Mnt); err != nil {
t.Fatal(err)
}
}()
rroSupported := kernel.CheckKernelVersion(5, 12, 0)
nonRecursiveVerifier := []string{`/bin/sh`, `-xc`, `touch /foo/mnt/file; [ $? = 0 ]`}
forceRecursiveVerifier := []string{`/bin/sh`, `-xc`, `touch /foo/mnt/file; [ $? != 0 ]`}
// ro (recursive if kernel >= 5.12)
ro := mounttypes.Mount{
Type: mounttypes.TypeBind,
Source: tmpDir1.Path(),
Target: "/foo",
ReadOnly: true,
BindOptions: &mounttypes.BindOptions{
Propagation: mounttypes.PropagationRPrivate,
},
}
roAsStr := ro.Source + ":" + ro.Target + ":ro,rprivate"
roVerifier := nonRecursiveVerifier
if rroSupported {
roVerifier = forceRecursiveVerifier
}
// Non-recursive
nonRecursive := ro
nonRecursive.BindOptions = &mounttypes.BindOptions{
ReadOnlyNonRecursive: true,
Propagation: mounttypes.PropagationRPrivate,
}
nonRecursiveAsStr := nonRecursive.Source + ":" + nonRecursive.Target + ":ro-non-recursive,rprivate"
// Force recursive
forceRecursive := ro
forceRecursive.BindOptions = &mounttypes.BindOptions{
ReadOnlyForceRecursive: true,
Propagation: mounttypes.PropagationRPrivate,
}
forceRecursiveAsStr := forceRecursive.Source + ":" + forceRecursive.Target + ":ro-force-recursive,rprivate"
ctx := context.Background()
client := testEnv.APIClient()
containers := []string{
container.Run(ctx, t, client, container.WithMount(ro), container.WithCmd(roVerifier...)),
container.Run(ctx, t, client, container.WithBindRaw(roAsStr), container.WithCmd(roVerifier...)),
container.Run(ctx, t, client, container.WithMount(nonRecursive), container.WithCmd(nonRecursiveVerifier...)),
container.Run(ctx, t, client, container.WithBindRaw(nonRecursiveAsStr), container.WithCmd(nonRecursiveVerifier...)),
}
if rroSupported {
containers = append(containers,
container.Run(ctx, t, client, container.WithMount(forceRecursive), container.WithCmd(forceRecursiveVerifier...)),
container.Run(ctx, t, client, container.WithBindRaw(forceRecursiveAsStr), container.WithCmd(forceRecursiveVerifier...)),
)
}
for _, c := range containers {
poll.WaitOn(t, container.IsSuccessful(ctx, client, c), poll.WithDelay(100*time.Millisecond))
}
}

View file

@ -94,6 +94,13 @@ func WithBind(src, target string) func(*TestContainerConfig) {
}
}
// WithBindRaw sets the bind mount of the container
func WithBindRaw(s string) func(*TestContainerConfig) {
return func(c *TestContainerConfig) {
c.HostConfig.Binds = append(c.HostConfig.Binds, s)
}
}
// WithTmpfs sets a target path in the container to a tmpfs, with optional options
// (separated with a colon).
func WithTmpfs(targetAndOpts string) func(config *TestContainerConfig) {

View file

@ -194,7 +194,7 @@ func (p *linuxParser) ReadWrite(mode string) bool {
}
for _, o := range strings.Split(mode, ",") {
if o == "ro" {
if o == "ro" || strings.HasPrefix(o, "ro-") || o == "rro" {
return false
}
}
@ -262,6 +262,24 @@ func (p *linuxParser) ParseMountRaw(raw, volumeDriver string) (*MountPoint, erro
}
}
for _, m := range strings.Split(mode, ",") {
m = strings.TrimSpace(m)
if strings.HasPrefix(m, "ro-") || m == "rro" {
if spec.Type != mount.TypeBind {
return nil, fmt.Errorf("mount mode %q requires a bind mount: %w", mode, errInvalidSpec(raw))
}
if spec.BindOptions == nil {
spec.BindOptions = &mount.BindOptions{}
}
switch m {
case "ro-non-recursive":
spec.BindOptions.ReadOnlyNonRecursive = true
case "ro-force-recursive", "rro":
spec.BindOptions.ReadOnlyForceRecursive = true
}
}
}
mp, err := p.parseMountSpec(spec, false)
if mp != nil {
mp.Mode = mode
@ -329,6 +347,9 @@ func (p *linuxParser) ParseVolumesFrom(spec string) (string, string, error) {
if !linuxValidMountMode(mode) {
return "", "", errInvalidMode(mode)
}
if strings.HasPrefix(mode, "ro-") || mode == "rro" {
return "", "", fmt.Errorf("mount mode %q is not supported for volumes-from mounts: %w", mode, errInvalidMode(mode))
}
// For now don't allow propagation properties while importing
// volumes from data container. These volumes will inherit
// the same propagation property as of the original volume

View file

@ -107,17 +107,22 @@ func TestLinuxParseMountRawSplit(t *testing.T) {
expName string
expDriver string
expRW bool
expNonRRO bool
expForceRRO bool
fail bool
}{
{"/tmp:/tmp1", "", mount.TypeBind, "/tmp1", "/tmp", "", "", true, false},
{"/tmp:/tmp2:ro", "", mount.TypeBind, "/tmp2", "/tmp", "", "", false, false},
{"/tmp:/tmp3:rw", "", mount.TypeBind, "/tmp3", "/tmp", "", "", true, false},
{"/tmp:/tmp4:foo", "", mount.TypeBind, "", "", "", "", false, true},
{"name:/named1", "", mount.TypeVolume, "/named1", "", "name", "", true, false},
{"name:/named2", "external", mount.TypeVolume, "/named2", "", "name", "external", true, false},
{"name:/named3:ro", "local", mount.TypeVolume, "/named3", "", "name", "local", false, false},
{"local/name:/tmp:rw", "", mount.TypeVolume, "/tmp", "", "local/name", "", true, false},
{"/tmp:tmp", "", mount.TypeBind, "", "", "", "", true, true},
{"/tmp:/tmp1", "", mount.TypeBind, "/tmp1", "/tmp", "", "", true, false, false, false},
{"/tmp:/tmp2:ro", "", mount.TypeBind, "/tmp2", "/tmp", "", "", false, false, false, false},
{"/tmp:/tmp3:rw", "", mount.TypeBind, "/tmp3", "/tmp", "", "", true, false, false, false},
{"/tmp:/tmp4:foo", "", mount.TypeBind, "", "", "", "", false, false, false, true},
{"/tmp:/tmp5:ro-non-recursive", "", mount.TypeBind, "/tmp5", "/tmp", "", "", false, true, false, false},
{"/tmp:/tmp6:ro-force-recursive,rprivate", "", mount.TypeBind, "/tmp6", "/tmp", "", "", false, false, true, false},
{"/tmp:/tmp7:rro", "", mount.TypeBind, "/tmp7", "/tmp", "", "", false, false, true, false},
{"name:/named1", "", mount.TypeVolume, "/named1", "", "name", "", true, false, false, false},
{"name:/named2", "external", mount.TypeVolume, "/named2", "", "name", "external", true, false, false, false},
{"name:/named3:ro", "local", mount.TypeVolume, "/named3", "", "name", "local", false, false, false, false},
{"local/name:/tmp:rw", "", mount.TypeVolume, "/tmp", "", "local/name", "", true, false, false, false},
{"/tmp:tmp", "", mount.TypeBind, "", "", "", "", true, false, false, true},
}
parser := NewLinuxParser()
@ -141,6 +146,13 @@ func TestLinuxParseMountRawSplit(t *testing.T) {
assert.Equal(t, m.Driver, c.expDriver)
assert.Equal(t, m.RW, c.expRW)
assert.Equal(t, m.Type, c.expType)
var nonRRO, forceRRO bool
if m.Spec.BindOptions != nil {
nonRRO = m.Spec.BindOptions.ReadOnlyNonRecursive
forceRRO = m.Spec.BindOptions.ReadOnlyForceRecursive
}
assert.Equal(t, nonRRO, c.expNonRRO)
assert.Equal(t, forceRRO, c.expForceRRO)
})
}
}

View file

@ -14,7 +14,10 @@ var ErrVolumeTargetIsRoot = errors.New("invalid specification: destination can't
// read-write modes
var rwModes = map[string]bool{
"rw": true,
"ro": true,
"ro": true, // attempts recursive read-only if possible
"ro-non-recursive": true, // makes the mount non-recursively read-only, but still leaves the mount recursive
"ro-force-recursive": true, // raises an error if the mount cannot be made recursively read-only
"rro": true, // alias for ro-force-recursive
}
// Parser represents a platform specific parser for mount expressions