Support recursively read-only (RRO) mounts

`docker run -v /foo:/foo:ro` is now recursively read-only on kernel >= 5.12.

Automatically falls back to the legacy non-recursively read-only mount mode on kernel < 5.12.

Use `ro-non-recursive` to disable RRO.
Use `ro-force-recursive` or `rro` to explicitly enable RRO. (Fails on kernel < 5.12)

Fix issue 44978
Fix docker/for-linux issue 788

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
Akihiro Suda 2023-04-05 20:32:03 +09:00
parent 88f6a92d22
commit 5045a2de24
No known key found for this signature in database
GPG key ID: 49524C6F9F638F1A
18 changed files with 361 additions and 30 deletions

View file

@ -588,6 +588,18 @@ func (s *containerRouter) postContainersCreate(ctx context.Context, w http.Respo
hostConfig.PidsLimit = nil
}
if hostConfig != nil && versions.LessThan(version, "1.44") {
for _, m := range hostConfig.Mounts {
if m.BindOptions != nil {
// Ignore ReadOnlyNonRecursive because it was added in API 1.44.
m.BindOptions.ReadOnlyNonRecursive = false
if m.BindOptions.ReadOnlyForceRecursive {
return errdefs.InvalidParameter(errors.New("BindOptions.ReadOnlyForceRecursive needs API v1.44 or newer"))
}
}
}
}
ccr, err := s.backend.ContainerCreate(ctx, types.ContainerCreateConfig{
Name: name,
Config: config,

View file

@ -388,6 +388,16 @@ definitions:
description: "Create mount point on host if missing"
type: "boolean"
default: false
ReadOnlyNonRecursive:
description: |
Make the mount non-recursively read-only, but still leave the mount recursive
(unless NonRecursive is set to true in conjunction).
type: "boolean"
default: false
ReadOnlyForceRecursive:
description: "Raise an error if the mount cannot be made recursively read-only."
type: "boolean"
default: false
VolumeOptions:
description: "Optional configuration for the `volume` type."
type: "object"

View file

@ -29,7 +29,7 @@ type Mount struct {
// Source is not supported for tmpfs (must be an empty value)
Source string `json:",omitempty"`
Target string `json:",omitempty"`
ReadOnly bool `json:",omitempty"`
ReadOnly bool `json:",omitempty"` // attempts recursive read-only if possible
Consistency Consistency `json:",omitempty"`
BindOptions *BindOptions `json:",omitempty"`
@ -85,6 +85,11 @@ type BindOptions struct {
Propagation Propagation `json:",omitempty"`
NonRecursive bool `json:",omitempty"`
CreateMountpoint bool `json:",omitempty"`
// ReadOnlyNonRecursive makes the mount non-recursively read-only, but still leaves the mount recursive
// (unless NonRecursive is set to true in conjunction).
ReadOnlyNonRecursive bool `json:",omitempty"`
// ReadOnlyForceRecursive raises an error if the mount cannot be made recursively read-only.
ReadOnlyForceRecursive bool `json:",omitempty"`
}
// VolumeOptions represents the options for a mount of type volume.

View file

@ -16,6 +16,7 @@ import (
"github.com/docker/docker/api/types/swarm"
"github.com/docker/docker/api/types/volume"
"github.com/docker/go-connections/nat"
"github.com/opencontainers/runtime-spec/specs-go/features"
)
const (
@ -658,7 +659,8 @@ type Runtime struct {
Options map[string]interface{} `json:"options,omitempty"`
// This is exposed here only for internal use
ShimConfig *ShimConfig `json:"-"`
ShimConfig *ShimConfig `json:"-"`
Features *features.Features `json:"-"`
}
// ShimConfig is used by runtime to configure containerd shims

View file

@ -4,10 +4,12 @@ package container // import "github.com/docker/docker/container"
// Mount contains information for a mount operation.
type Mount struct {
Source string `json:"source"`
Destination string `json:"destination"`
Writable bool `json:"writable"`
Data string `json:"data"`
Propagation string `json:"mountpropagation"`
NonRecursive bool `json:"nonrecursive"`
Source string `json:"source"`
Destination string `json:"destination"`
Writable bool `json:"writable"`
Data string `json:"data"`
Propagation string `json:"mountpropagation"`
NonRecursive bool `json:"nonrecursive"`
ReadOnlyNonRecursive bool `json:"readonlynonrecursive"`
ReadOnlyForceRecursive bool `json:"readonlyforcerecursive"`
}

View file

@ -2,6 +2,8 @@ package daemon // import "github.com/docker/docker/daemon"
import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
@ -10,6 +12,7 @@ import (
"github.com/hashicorp/go-multierror"
"github.com/moby/sys/mount"
"github.com/moby/sys/symlink"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"github.com/docker/docker/api/types"
@ -102,6 +105,15 @@ func (daemon *Daemon) openContainerFS(container *container.Container) (_ *contai
writeMode := "ro"
if m.Writable {
writeMode = "rw"
if m.ReadOnlyNonRecursive {
return errors.New("options conflict: Writable && ReadOnlyNonRecursive")
}
if m.ReadOnlyForceRecursive {
return errors.New("options conflict: Writable && ReadOnlyForceRecursive")
}
}
if m.ReadOnlyNonRecursive && m.ReadOnlyForceRecursive {
return errors.New("options conflict: ReadOnlyNonRecursive && ReadOnlyForceRecursive")
}
// openContainerFS() is called for temporary mounts
@ -118,6 +130,16 @@ func (daemon *Daemon) openContainerFS(container *container.Container) (_ *contai
if err := mount.Mount(m.Source, dest, "", opts); err != nil {
return err
}
if !m.Writable && !m.ReadOnlyNonRecursive {
if err := makeMountRRO(dest); err != nil {
if m.ReadOnlyForceRecursive {
return err
} else {
logrus.WithError(err).Debugf("Failed to make %q recursively read-only", dest)
}
}
}
}
return mounttree.SwitchRoot(container.BaseFS)
@ -219,3 +241,21 @@ func (vw *containerFSView) Stat(ctx context.Context, path string) (*types.Contai
})
return stat, err
}
// makeMountRRO makes the mount recursively read-only.
func makeMountRRO(dest string) error {
attr := &unix.MountAttr{
Attr_set: unix.MOUNT_ATTR_RDONLY,
}
var err error
for {
err = unix.MountSetattr(-1, dest, unix.AT_RECURSIVE, attr)
if !errors.Is(err, unix.EINTR) {
break
}
}
if err != nil {
err = fmt.Errorf("failed to apply MOUNT_ATTR_RDONLY with AT_RECURSIVE to %q: %w", dest, err)
}
return err
}

View file

@ -8,6 +8,7 @@ import (
"os"
"regexp"
"strings"
"sync"
"github.com/docker/docker/daemon/config"
"github.com/docker/docker/libnetwork/ns"
@ -17,6 +18,7 @@ import (
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
)
// On Linux, plugins use a static path for storing execution state,
@ -182,3 +184,76 @@ func ifaceAddrs(linkName string) (v4, v6 []*net.IPNet, err error) {
}
return v4, v6, nil
}
var (
kernelSupportsRROOnce sync.Once
kernelSupportsRROErr error
)
func kernelSupportsRecursivelyReadOnly() error {
fn := func() error {
tmpMnt, err := os.MkdirTemp("", "moby-detect-rro")
if err != nil {
return fmt.Errorf("failed to create a temp directory: %w", err)
}
for {
err = unix.Mount("", tmpMnt, "tmpfs", 0, "")
if !errors.Is(err, unix.EINTR) {
break
}
}
if err != nil {
return fmt.Errorf("failed to mount tmpfs on %q: %w", tmpMnt, err)
}
defer func() {
var umErr error
for {
umErr = unix.Unmount(tmpMnt, 0)
if !errors.Is(umErr, unix.EINTR) {
break
}
}
if umErr != nil {
logrus.WithError(umErr).Warnf("Failed to unmount %q", tmpMnt)
}
}()
attr := &unix.MountAttr{
Attr_set: unix.MOUNT_ATTR_RDONLY,
}
for {
err = unix.MountSetattr(-1, tmpMnt, unix.AT_RECURSIVE, attr)
if !errors.Is(err, unix.EINTR) {
break
}
}
// ENOSYS on kernel < 5.12
if err != nil {
return fmt.Errorf("failed to call mount_setattr: %w", err)
}
return nil
}
kernelSupportsRROOnce.Do(func() {
kernelSupportsRROErr = fn()
})
return kernelSupportsRROErr
}
func (daemon *Daemon) supportsRecursivelyReadOnly(runtime string) error {
if err := kernelSupportsRecursivelyReadOnly(); err != nil {
return fmt.Errorf("rro is not supported: %w (kernel is older than 5.12?)", err)
}
if runtime == "" {
runtime = daemon.configStore.GetDefaultRuntimeName()
}
rt := daemon.configStore.GetRuntime(runtime)
if rt.Features == nil {
return fmt.Errorf("rro is not supported by runtime %q: OCI features struct is not available", runtime)
}
for _, s := range rt.Features.MountOptions {
if s == "rro" {
return nil
}
}
return fmt.Errorf("rro is not supported by runtime %q", runtime)
}

View file

@ -17,3 +17,7 @@ func setupResolvConf(_ *interface{}) {}
func getSysInfo(_ *Daemon) *sysinfo.SysInfo {
return sysinfo.New()
}
func (daemon *Daemon) supportsRecursivelyReadOnly(_ string) error {
return nil
}

View file

@ -604,3 +604,7 @@ func (daemon *Daemon) initLibcontainerd(ctx context.Context) error {
return err
}
func (daemon *Daemon) supportsRecursivelyReadOnly(_ string) error {
return nil
}

View file

@ -645,7 +645,24 @@ func WithMounts(daemon *Daemon, c *container.Container) coci.SpecOpts {
}
opts := []string{bindMode}
if !m.Writable {
opts = append(opts, "ro")
rro := true
if m.ReadOnlyNonRecursive {
rro = false
if m.ReadOnlyForceRecursive {
return errors.New("mount options conflict: ReadOnlyNonRecursive && ReadOnlyForceRecursive")
}
}
if rroErr := daemon.supportsRecursivelyReadOnly(c.HostConfig.Runtime); rroErr != nil {
rro = false
if m.ReadOnlyForceRecursive {
return rroErr
}
}
if rro {
opts = append(opts, "rro")
} else {
opts = append(opts, "ro")
}
}
if pFlag != 0 {
opts = append(opts, mountPropagationReverseMap[pFlag])

View file

@ -3,6 +3,8 @@
package daemon
import (
"bytes"
"encoding/json"
"fmt"
"os"
"os/exec"
@ -14,6 +16,7 @@ import (
"github.com/docker/docker/daemon/config"
"github.com/docker/docker/errdefs"
"github.com/docker/docker/libcontainerd/shimopts"
"github.com/opencontainers/runtime-spec/specs-go/features"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
@ -109,6 +112,19 @@ func (daemon *Daemon) initRuntimes(runtimes map[string]types.Runtime) (err error
}
}
rt.ShimConfig = defaultV2ShimConfig(daemon.configStore, daemon.rewriteRuntimePath(name, rt.Path, rt.Args))
var featuresStderr bytes.Buffer
featuresCmd := exec.Command(rt.Path, append(rt.Args, "features")...)
featuresCmd.Stderr = &featuresStderr
if featuresB, err := featuresCmd.Output(); err != nil {
logrus.WithError(err).Warnf("Failed to run %v: %q", featuresCmd.Args, featuresStderr.String())
} else {
var features features.Features
if jsonErr := json.Unmarshal(featuresB, &features); jsonErr != nil {
logrus.WithError(err).Warnf("Failed to unmarshal the output of %v as a JSON", featuresCmd.Args)
} else {
rt.Features = &features
}
}
} else {
if len(rt.Args) > 0 {
return errors.Errorf("runtime %s: args cannot be used with a runtimeType runtime", name)

View file

@ -12,6 +12,7 @@ import (
mounttypes "github.com/docker/docker/api/types/mount"
"github.com/docker/docker/container"
volumemounts "github.com/docker/docker/volume/mounts"
"github.com/pkg/errors"
)
// setupMounts iterates through each of the mount points for a container and
@ -58,7 +59,18 @@ func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, er
Propagation: string(m.Propagation),
}
if m.Spec.Type == mounttypes.TypeBind && m.Spec.BindOptions != nil {
if !m.Spec.ReadOnly && m.Spec.BindOptions.ReadOnlyNonRecursive {
return nil, errors.New("mount options conflict: !ReadOnly && BindOptions.ReadOnlyNonRecursive")
}
if !m.Spec.ReadOnly && m.Spec.BindOptions.ReadOnlyForceRecursive {
return nil, errors.New("mount options conflict: !ReadOnly && BindOptions.ReadOnlyForceRecursive")
}
if m.Spec.BindOptions.ReadOnlyNonRecursive && m.Spec.BindOptions.ReadOnlyForceRecursive {
return nil, errors.New("mount options conflict: ReadOnlyNonRecursive && BindOptions.ReadOnlyForceRecursive")
}
mnt.NonRecursive = m.Spec.BindOptions.NonRecursive
mnt.ReadOnlyNonRecursive = m.Spec.BindOptions.ReadOnlyNonRecursive
mnt.ReadOnlyForceRecursive = m.Spec.BindOptions.ReadOnlyForceRecursive
}
if m.Volume != nil {
attributes := map[string]string{

View file

@ -20,6 +20,10 @@ keywords: "API, Docker, rcli, REST, documentation"
* The `VirtualSize` field in the `GET /images/{name}/json`, `GET /images/json`,
and `GET /system/df` responses is now omitted. Use the `Size` field instead,
which contains the same information.
* Read-only bind mounts are now made recursively read-only on kernel >= 5.12
with runtimes which support the feature.
`POST /containers/create`, `GET /containers/{id}/json`, and `GET /containers/json` now supports
`BindOptions.ReadOnlyNonRecursive` and `BindOptions.ReadOnlyForceRecursive` to customize the behavior.
## v1.43 API changes

View file

@ -16,6 +16,7 @@ import (
"github.com/docker/docker/api/types/versions"
"github.com/docker/docker/client"
"github.com/docker/docker/integration/internal/container"
"github.com/docker/docker/pkg/parsers/kernel"
"github.com/moby/sys/mount"
"github.com/moby/sys/mountinfo"
"gotest.tools/v3/assert"
@ -428,3 +429,87 @@ func TestContainerCopyLeaksMounts(t *testing.T) {
assert.Equal(t, mountsBefore, mountsAfter)
}
func TestContainerBindMountRecursivelyReadOnly(t *testing.T) {
skip.If(t, testEnv.IsRemoteDaemon)
skip.If(t, versions.LessThan(testEnv.DaemonAPIVersion(), "1.44"), "requires API v1.44")
defer setupTest(t)()
// 0o777 for allowing rootless containers to write to this directory
tmpDir1 := fs.NewDir(t, "tmpdir1", fs.WithMode(0o777),
fs.WithDir("mnt", fs.WithMode(0o777)))
defer tmpDir1.Remove()
tmpDir1Mnt := filepath.Join(tmpDir1.Path(), "mnt")
tmpDir2 := fs.NewDir(t, "tmpdir2", fs.WithMode(0o777),
fs.WithFile("file", "should not be writable when recursively read only", fs.WithMode(0o666)))
defer tmpDir2.Remove()
if err := mount.Mount(tmpDir2.Path(), tmpDir1Mnt, "none", "bind"); err != nil {
t.Fatal(err)
}
defer func() {
if err := mount.Unmount(tmpDir1Mnt); err != nil {
t.Fatal(err)
}
}()
rroSupported := kernel.CheckKernelVersion(5, 12, 0)
nonRecursiveVerifier := []string{`/bin/sh`, `-xc`, `touch /foo/mnt/file; [ $? = 0 ]`}
forceRecursiveVerifier := []string{`/bin/sh`, `-xc`, `touch /foo/mnt/file; [ $? != 0 ]`}
// ro (recursive if kernel >= 5.12)
ro := mounttypes.Mount{
Type: mounttypes.TypeBind,
Source: tmpDir1.Path(),
Target: "/foo",
ReadOnly: true,
BindOptions: &mounttypes.BindOptions{
Propagation: mounttypes.PropagationRPrivate,
},
}
roAsStr := ro.Source + ":" + ro.Target + ":ro,rprivate"
roVerifier := nonRecursiveVerifier
if rroSupported {
roVerifier = forceRecursiveVerifier
}
// Non-recursive
nonRecursive := ro
nonRecursive.BindOptions = &mounttypes.BindOptions{
ReadOnlyNonRecursive: true,
Propagation: mounttypes.PropagationRPrivate,
}
nonRecursiveAsStr := nonRecursive.Source + ":" + nonRecursive.Target + ":ro-non-recursive,rprivate"
// Force recursive
forceRecursive := ro
forceRecursive.BindOptions = &mounttypes.BindOptions{
ReadOnlyForceRecursive: true,
Propagation: mounttypes.PropagationRPrivate,
}
forceRecursiveAsStr := forceRecursive.Source + ":" + forceRecursive.Target + ":ro-force-recursive,rprivate"
ctx := context.Background()
client := testEnv.APIClient()
containers := []string{
container.Run(ctx, t, client, container.WithMount(ro), container.WithCmd(roVerifier...)),
container.Run(ctx, t, client, container.WithBindRaw(roAsStr), container.WithCmd(roVerifier...)),
container.Run(ctx, t, client, container.WithMount(nonRecursive), container.WithCmd(nonRecursiveVerifier...)),
container.Run(ctx, t, client, container.WithBindRaw(nonRecursiveAsStr), container.WithCmd(nonRecursiveVerifier...)),
}
if rroSupported {
containers = append(containers,
container.Run(ctx, t, client, container.WithMount(forceRecursive), container.WithCmd(forceRecursiveVerifier...)),
container.Run(ctx, t, client, container.WithBindRaw(forceRecursiveAsStr), container.WithCmd(forceRecursiveVerifier...)),
)
}
for _, c := range containers {
poll.WaitOn(t, container.IsSuccessful(ctx, client, c), poll.WithDelay(100*time.Millisecond))
}
}

View file

@ -94,6 +94,13 @@ func WithBind(src, target string) func(*TestContainerConfig) {
}
}
// WithBindRaw sets the bind mount of the container
func WithBindRaw(s string) func(*TestContainerConfig) {
return func(c *TestContainerConfig) {
c.HostConfig.Binds = append(c.HostConfig.Binds, s)
}
}
// WithTmpfs sets a target path in the container to a tmpfs, with optional options
// (separated with a colon).
func WithTmpfs(targetAndOpts string) func(config *TestContainerConfig) {

View file

@ -194,7 +194,7 @@ func (p *linuxParser) ReadWrite(mode string) bool {
}
for _, o := range strings.Split(mode, ",") {
if o == "ro" {
if o == "ro" || strings.HasPrefix(o, "ro-") || o == "rro" {
return false
}
}
@ -262,6 +262,24 @@ func (p *linuxParser) ParseMountRaw(raw, volumeDriver string) (*MountPoint, erro
}
}
for _, m := range strings.Split(mode, ",") {
m = strings.TrimSpace(m)
if strings.HasPrefix(m, "ro-") || m == "rro" {
if spec.Type != mount.TypeBind {
return nil, fmt.Errorf("mount mode %q requires a bind mount: %w", mode, errInvalidSpec(raw))
}
if spec.BindOptions == nil {
spec.BindOptions = &mount.BindOptions{}
}
switch m {
case "ro-non-recursive":
spec.BindOptions.ReadOnlyNonRecursive = true
case "ro-force-recursive", "rro":
spec.BindOptions.ReadOnlyForceRecursive = true
}
}
}
mp, err := p.parseMountSpec(spec, false)
if mp != nil {
mp.Mode = mode
@ -329,6 +347,9 @@ func (p *linuxParser) ParseVolumesFrom(spec string) (string, string, error) {
if !linuxValidMountMode(mode) {
return "", "", errInvalidMode(mode)
}
if strings.HasPrefix(mode, "ro-") || mode == "rro" {
return "", "", fmt.Errorf("mount mode %q is not supported for volumes-from mounts: %w", mode, errInvalidMode(mode))
}
// For now don't allow propagation properties while importing
// volumes from data container. These volumes will inherit
// the same propagation property as of the original volume

View file

@ -99,25 +99,30 @@ func TestLinuxParseMountRaw(t *testing.T) {
func TestLinuxParseMountRawSplit(t *testing.T) {
cases := []struct {
bind string
driver string
expType mount.Type
expDest string
expSource string
expName string
expDriver string
expRW bool
fail bool
bind string
driver string
expType mount.Type
expDest string
expSource string
expName string
expDriver string
expRW bool
expNonRRO bool
expForceRRO bool
fail bool
}{
{"/tmp:/tmp1", "", mount.TypeBind, "/tmp1", "/tmp", "", "", true, false},
{"/tmp:/tmp2:ro", "", mount.TypeBind, "/tmp2", "/tmp", "", "", false, false},
{"/tmp:/tmp3:rw", "", mount.TypeBind, "/tmp3", "/tmp", "", "", true, false},
{"/tmp:/tmp4:foo", "", mount.TypeBind, "", "", "", "", false, true},
{"name:/named1", "", mount.TypeVolume, "/named1", "", "name", "", true, false},
{"name:/named2", "external", mount.TypeVolume, "/named2", "", "name", "external", true, false},
{"name:/named3:ro", "local", mount.TypeVolume, "/named3", "", "name", "local", false, false},
{"local/name:/tmp:rw", "", mount.TypeVolume, "/tmp", "", "local/name", "", true, false},
{"/tmp:tmp", "", mount.TypeBind, "", "", "", "", true, true},
{"/tmp:/tmp1", "", mount.TypeBind, "/tmp1", "/tmp", "", "", true, false, false, false},
{"/tmp:/tmp2:ro", "", mount.TypeBind, "/tmp2", "/tmp", "", "", false, false, false, false},
{"/tmp:/tmp3:rw", "", mount.TypeBind, "/tmp3", "/tmp", "", "", true, false, false, false},
{"/tmp:/tmp4:foo", "", mount.TypeBind, "", "", "", "", false, false, false, true},
{"/tmp:/tmp5:ro-non-recursive", "", mount.TypeBind, "/tmp5", "/tmp", "", "", false, true, false, false},
{"/tmp:/tmp6:ro-force-recursive,rprivate", "", mount.TypeBind, "/tmp6", "/tmp", "", "", false, false, true, false},
{"/tmp:/tmp7:rro", "", mount.TypeBind, "/tmp7", "/tmp", "", "", false, false, true, false},
{"name:/named1", "", mount.TypeVolume, "/named1", "", "name", "", true, false, false, false},
{"name:/named2", "external", mount.TypeVolume, "/named2", "", "name", "external", true, false, false, false},
{"name:/named3:ro", "local", mount.TypeVolume, "/named3", "", "name", "local", false, false, false, false},
{"local/name:/tmp:rw", "", mount.TypeVolume, "/tmp", "", "local/name", "", true, false, false, false},
{"/tmp:tmp", "", mount.TypeBind, "", "", "", "", true, false, false, true},
}
parser := NewLinuxParser()
@ -141,6 +146,13 @@ func TestLinuxParseMountRawSplit(t *testing.T) {
assert.Equal(t, m.Driver, c.expDriver)
assert.Equal(t, m.RW, c.expRW)
assert.Equal(t, m.Type, c.expType)
var nonRRO, forceRRO bool
if m.Spec.BindOptions != nil {
nonRRO = m.Spec.BindOptions.ReadOnlyNonRecursive
forceRRO = m.Spec.BindOptions.ReadOnlyForceRecursive
}
assert.Equal(t, nonRRO, c.expNonRRO)
assert.Equal(t, forceRRO, c.expForceRRO)
})
}
}

View file

@ -13,8 +13,11 @@ var ErrVolumeTargetIsRoot = errors.New("invalid specification: destination can't
// read-write modes
var rwModes = map[string]bool{
"rw": true,
"ro": true,
"rw": true,
"ro": true, // attempts recursive read-only if possible
"ro-non-recursive": true, // makes the mount non-recursively read-only, but still leaves the mount recursive
"ro-force-recursive": true, // raises an error if the mount cannot be made recursively read-only
"rro": true, // alias for ro-force-recursive
}
// Parser represents a platform specific parser for mount expressions