moby/daemon/daemon_linux.go

261 lines
6.6 KiB
Go
Raw Normal View History

package daemon // import "github.com/docker/docker/daemon"
import (
"bufio"
"context"
"fmt"
"io"
2023-01-16 22:25:13 +00:00
"net"
"os"
"regexp"
"strings"
"sync"
"github.com/containerd/log"
"github.com/docker/docker/daemon/config"
2023-01-16 22:25:13 +00:00
"github.com/docker/docker/libnetwork/ns"
"github.com/docker/docker/libnetwork/resolvconf"
"github.com/moby/sys/mount"
"github.com/moby/sys/mountinfo"
"github.com/pkg/errors"
2023-01-16 22:25:13 +00:00
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
)
// On Linux, plugins use a static path for storing execution state,
// instead of deriving path from daemon's exec-root. This is because
// plugin socket files are created here and they cannot exceed max
// path length of 108 bytes.
func getPluginExecRoot(_ *config.Config) string {
return "/run/docker/plugins"
}
func (daemon *Daemon) cleanupMountsByID(id string) error {
log.G(context.TODO()).Debugf("Cleaning up old mountid %s: start.", id)
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return err
}
defer f.Close()
return daemon.cleanupMountsFromReaderByID(f, id, mount.Unmount)
}
func (daemon *Daemon) cleanupMountsFromReaderByID(reader io.Reader, id string, unmount func(target string) error) error {
if daemon.root == "" {
return nil
}
var errs []string
regexps := getCleanPatterns(id)
sc := bufio.NewScanner(reader)
for sc.Scan() {
if fields := strings.Fields(sc.Text()); len(fields) > 4 {
if mnt := fields[4]; strings.HasPrefix(mnt, daemon.root) {
for _, p := range regexps {
if p.MatchString(mnt) {
if err := unmount(mnt); err != nil {
log.G(context.TODO()).Error(err)
errs = append(errs, err.Error())
}
}
}
}
}
}
if err := sc.Err(); err != nil {
return err
}
if len(errs) > 0 {
return fmt.Errorf("Error cleaning up mounts:\n%v", strings.Join(errs, "\n"))
}
log.G(context.TODO()).Debugf("Cleaning up old mountid %v: done.", id)
return nil
}
// cleanupMounts umounts used by container resources and the daemon root mount
func (daemon *Daemon) cleanupMounts(cfg *config.Config) error {
if err := daemon.cleanupMountsByID(""); err != nil {
return err
}
info, err := mountinfo.GetMounts(mountinfo.SingleEntryFilter(daemon.root))
if err != nil {
return errors.Wrap(err, "error reading mount table for cleanup")
}
if len(info) < 1 {
// no mount found, we're done here
return nil
}
// `info.Root` here is the root mountpoint of the passed in path (`daemon.root`).
// The ony cases that need to be cleaned up is when the daemon has performed a
// `mount --bind /daemon/root /daemon/root && mount --make-shared /daemon/root`
// This is only done when the daemon is started up and `/daemon/root` is not
// already on a shared mountpoint.
if !shouldUnmountRoot(daemon.root, info[0]) {
return nil
}
unmountFile := getUnmountOnShutdownPath(cfg)
if _, err := os.Stat(unmountFile); err != nil {
return nil
}
log.G(context.TODO()).WithField("mountpoint", daemon.root).Debug("unmounting daemon root")
if err := mount.Unmount(daemon.root); err != nil {
return err
}
return os.Remove(unmountFile)
}
func getCleanPatterns(id string) (regexps []*regexp.Regexp) {
var patterns []string
if id == "" {
id = "[0-9a-f]{64}"
daemon: fix daemon.Shutdown, daemon.Cleanup not cleaning up overlay2 mounts While working on deprecation of the `aufs` and `overlay` storage-drivers, the `TestCleanupMounts` had to be updated, as it was currently using `aufs` for testing. When rewriting the test to use `overlay2` instead (using an updated `mountsFixture`), I found out that the test was failing, and it appears that only `overlay`, but not `overlay2` was taken into account. These cleanup functions were added in 05cc737f5411a0effd299429140d031c4ad8dd05, but at the time the `overlay2` storage driver was not yet implemented; https://github.com/moby/moby/tree/05cc737f5411a0effd299429140d031c4ad8dd05/daemon/graphdriver This omission was likely missed in 23e5c94cfb26eb72c097892712d3dbaa93ee9bc0, because the original implementation re-used the `overlay` storage driver, but later on it was decided to make `overlay2` a separate storage driver. As a result of the above, `daemon.cleanupMountsByID()` would ignore any `overlay2` mounts during `daemon.Shutdown()` and `daemon.Cleanup()`. This patch: - Adds a new `mountsFixtureOverlay2` with example mounts for `overlay2` - Rewrites the tests to use `gotest.tools` for more informative output on failures. - Adds the missing regex patterns to `daemon/getCleanPatterns()`. The patterns are added at the start of the list to allow for the fasted match (`overlay2` is the default for most setups, and the code is iterating over possible options). As a follow-up, we could consider adding additional fixtures for different storage drivers. Before the fix is applied: go test -v -run TestCleanupMounts ./daemon/ === RUN TestCleanupMounts === RUN TestCleanupMounts/aufs === RUN TestCleanupMounts/overlay2 daemon_linux_test.go:135: assertion failed: 0 (unmounted int) != 1 (int): Expected to unmount the shm (and the shm only) --- FAIL: TestCleanupMounts (0.01s) --- PASS: TestCleanupMounts/aufs (0.00s) --- FAIL: TestCleanupMounts/overlay2 (0.01s) === RUN TestCleanupMountsByID === RUN TestCleanupMountsByID/aufs === RUN TestCleanupMountsByID/overlay2 daemon_linux_test.go:171: assertion failed: 0 (unmounted int) != 1 (int): Expected to unmount the root (and that only) --- FAIL: TestCleanupMountsByID (0.00s) --- PASS: TestCleanupMountsByID/aufs (0.00s) --- FAIL: TestCleanupMountsByID/overlay2 (0.00s) FAIL FAIL github.com/docker/docker/daemon 0.054s FAIL With the fix applied: go test -v -run TestCleanupMounts ./daemon/ === RUN TestCleanupMounts === RUN TestCleanupMounts/aufs === RUN TestCleanupMounts/overlay2 --- PASS: TestCleanupMounts (0.00s) --- PASS: TestCleanupMounts/aufs (0.00s) --- PASS: TestCleanupMounts/overlay2 (0.00s) === RUN TestCleanupMountsByID === RUN TestCleanupMountsByID/aufs === RUN TestCleanupMountsByID/overlay2 --- PASS: TestCleanupMountsByID (0.00s) --- PASS: TestCleanupMountsByID/aufs (0.00s) --- PASS: TestCleanupMountsByID/overlay2 (0.00s) PASS ok github.com/docker/docker/daemon 0.042s Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2022-05-29 14:20:14 +00:00
patterns = append(patterns, "containers/"+id+"/mounts/shm", "containers/"+id+"/shm")
}
patterns = append(patterns, "overlay2/"+id+"/merged$", "zfs/graph/"+id+"$")
for _, p := range patterns {
r, err := regexp.Compile(p)
if err == nil {
regexps = append(regexps, r)
}
}
return
}
func shouldUnmountRoot(root string, info *mountinfo.Info) bool {
if !strings.HasSuffix(root, info.Root) {
return false
}
return hasMountInfoOption(info.Optional, sharedPropagationOption)
}
// setupResolvConf sets the appropriate resolv.conf file if not specified
// When systemd-resolved is running the default /etc/resolv.conf points to
// localhost. In this case fetch the alternative config file that is in a
// different path so that containers can use it
// In all the other cases fallback to the default one
func setupResolvConf(config *config.Config) {
if config.ResolvConf != "" {
return
}
config.ResolvConf = resolvconf.Path()
}
2023-01-16 22:25:13 +00:00
// ifaceAddrs returns the IPv4 and IPv6 addresses assigned to the network
// interface with name linkName.
//
// No error is returned if the named interface does not exist.
func ifaceAddrs(linkName string) (v4, v6 []*net.IPNet, err error) {
nl := ns.NlHandle()
link, err := nl.LinkByName(linkName)
if err != nil {
if !errors.As(err, new(netlink.LinkNotFoundError)) {
return nil, nil, err
}
return nil, nil, nil
}
get := func(family int) ([]*net.IPNet, error) {
addrs, err := nl.AddrList(link, family)
if err != nil {
return nil, err
}
ipnets := make([]*net.IPNet, len(addrs))
for i := range addrs {
ipnets[i] = addrs[i].IPNet
}
return ipnets, nil
}
v4, err = get(netlink.FAMILY_V4)
if err != nil {
return nil, nil, err
}
v6, err = get(netlink.FAMILY_V6)
if err != nil {
return nil, nil, err
}
return v4, v6, nil
}
var (
kernelSupportsRROOnce sync.Once
kernelSupportsRROErr error
)
func kernelSupportsRecursivelyReadOnly() error {
fn := func() error {
tmpMnt, err := os.MkdirTemp("", "moby-detect-rro")
if err != nil {
return fmt.Errorf("failed to create a temp directory: %w", err)
}
for {
err = unix.Mount("", tmpMnt, "tmpfs", 0, "")
if !errors.Is(err, unix.EINTR) {
break
}
}
if err != nil {
return fmt.Errorf("failed to mount tmpfs on %q: %w", tmpMnt, err)
}
defer func() {
var umErr error
for {
umErr = unix.Unmount(tmpMnt, 0)
if !errors.Is(umErr, unix.EINTR) {
break
}
}
if umErr != nil {
log.G(context.TODO()).WithError(umErr).Warnf("Failed to unmount %q", tmpMnt)
}
}()
attr := &unix.MountAttr{
Attr_set: unix.MOUNT_ATTR_RDONLY,
}
for {
err = unix.MountSetattr(-1, tmpMnt, unix.AT_RECURSIVE, attr)
if !errors.Is(err, unix.EINTR) {
break
}
}
// ENOSYS on kernel < 5.12
if err != nil {
return fmt.Errorf("failed to call mount_setattr: %w", err)
}
return nil
}
kernelSupportsRROOnce.Do(func() {
kernelSupportsRROErr = fn()
})
return kernelSupportsRROErr
}
daemon: reload runtimes w/o breaking containers The existing runtimes reload logic went to great lengths to replace the directory containing runtime wrapper scripts as atomically as possible within the limitations of the Linux filesystem ABI. Trouble is, atomically swapping the wrapper scripts directory solves the wrong problem! The runtime configuration is "locked in" when a container is started, including the path to the runC binary. If a container is started with a runtime which requires a daemon-managed wrapper script and then the daemon is reloaded with a config which no longer requires the wrapper script (i.e. some args -> no args, or the runtime is dropped from the config), that container would become unmanageable. Any attempts to stop, exec or otherwise perform lifecycle management operations on the container are likely to fail due to the wrapper script no longer existing at its original path. Atomically swapping the wrapper scripts is also incompatible with the read-copy-update paradigm for reloading configuration. A handler in the daemon could retain a reference to the pre-reload configuration for an indeterminate amount of time after the daemon configuration has been reloaded and updated. It is possible for the daemon to attempt to start a container using a deleted wrapper script if a request to run a container races a reload. Solve the problem of deleting referenced wrapper scripts by ensuring that all wrapper scripts are *immutable* for the lifetime of the daemon process. Any given runtime wrapper script must always exist with the same contents, no matter how many times the daemon config is reloaded, or what changes are made to the config. This is accomplished by using everyone's favourite design pattern: content-addressable storage. Each wrapper script file name is suffixed with the SHA-256 digest of its contents to (probabilistically) guarantee immutability without needing any concurrency control. Stale runtime wrapper scripts are only cleaned up on the next daemon restart. Split the derived runtimes configuration from the user-supplied configuration to have a place to store derived state without mutating the user-supplied configuration or exposing daemon internals in API struct types. Hold the derived state and the user-supplied configuration in a single struct value so that they can be updated as an atomic unit. Signed-off-by: Cory Snider <csnider@mirantis.com>
2022-08-31 20:12:30 +00:00
func supportsRecursivelyReadOnly(cfg *configStore, runtime string) error {
if err := kernelSupportsRecursivelyReadOnly(); err != nil {
return fmt.Errorf("rro is not supported: %w (kernel is older than 5.12?)", err)
}
if runtime == "" {
daemon: consolidate runtimes config validation The daemon has made a habit of mutating the DefaultRuntime and Runtimes values in the Config struct to merge defaults. This would be fine if it was a part of the regular configuration loading and merging process, as is done with other config options. The trouble is it does so in surprising places, such as in functions with 'verify' or 'validate' in their name. It has been necessary in order to validate that the user has not defined a custom runtime named "runc" which would shadow the built-in runtime of the same name. Other daemon code depends on the runtime named "runc" always being defined in the config, but merging it with the user config at the same time as the other defaults are merged would trip the validation. The root of the issue is that the daemon has used the same config values for both validating the daemon runtime configuration as supplied by the user and for keeping track of which runtimes have been set up by the daemon. Now that a completely separate value is used for the latter purpose, surprising contortions are no longer required to make the validation work as intended. Consolidate the validation of the runtimes config and merging of the built-in runtimes into the daemon.setupRuntimes() function. Set the result of merging the built-in runtimes config and default default runtime on the returned runtimes struct, without back-propagating it onto the config.Config argument. Signed-off-by: Cory Snider <csnider@mirantis.com>
2022-08-31 21:24:22 +00:00
runtime = cfg.Runtimes.Default
}
daemon: reload runtimes w/o breaking containers The existing runtimes reload logic went to great lengths to replace the directory containing runtime wrapper scripts as atomically as possible within the limitations of the Linux filesystem ABI. Trouble is, atomically swapping the wrapper scripts directory solves the wrong problem! The runtime configuration is "locked in" when a container is started, including the path to the runC binary. If a container is started with a runtime which requires a daemon-managed wrapper script and then the daemon is reloaded with a config which no longer requires the wrapper script (i.e. some args -> no args, or the runtime is dropped from the config), that container would become unmanageable. Any attempts to stop, exec or otherwise perform lifecycle management operations on the container are likely to fail due to the wrapper script no longer existing at its original path. Atomically swapping the wrapper scripts is also incompatible with the read-copy-update paradigm for reloading configuration. A handler in the daemon could retain a reference to the pre-reload configuration for an indeterminate amount of time after the daemon configuration has been reloaded and updated. It is possible for the daemon to attempt to start a container using a deleted wrapper script if a request to run a container races a reload. Solve the problem of deleting referenced wrapper scripts by ensuring that all wrapper scripts are *immutable* for the lifetime of the daemon process. Any given runtime wrapper script must always exist with the same contents, no matter how many times the daemon config is reloaded, or what changes are made to the config. This is accomplished by using everyone's favourite design pattern: content-addressable storage. Each wrapper script file name is suffixed with the SHA-256 digest of its contents to (probabilistically) guarantee immutability without needing any concurrency control. Stale runtime wrapper scripts are only cleaned up on the next daemon restart. Split the derived runtimes configuration from the user-supplied configuration to have a place to store derived state without mutating the user-supplied configuration or exposing daemon internals in API struct types. Hold the derived state and the user-supplied configuration in a single struct value so that they can be updated as an atomic unit. Signed-off-by: Cory Snider <csnider@mirantis.com>
2022-08-31 20:12:30 +00:00
features := cfg.Runtimes.Features(runtime)
if features == nil {
return fmt.Errorf("rro is not supported by runtime %q: OCI features struct is not available", runtime)
}
daemon: reload runtimes w/o breaking containers The existing runtimes reload logic went to great lengths to replace the directory containing runtime wrapper scripts as atomically as possible within the limitations of the Linux filesystem ABI. Trouble is, atomically swapping the wrapper scripts directory solves the wrong problem! The runtime configuration is "locked in" when a container is started, including the path to the runC binary. If a container is started with a runtime which requires a daemon-managed wrapper script and then the daemon is reloaded with a config which no longer requires the wrapper script (i.e. some args -> no args, or the runtime is dropped from the config), that container would become unmanageable. Any attempts to stop, exec or otherwise perform lifecycle management operations on the container are likely to fail due to the wrapper script no longer existing at its original path. Atomically swapping the wrapper scripts is also incompatible with the read-copy-update paradigm for reloading configuration. A handler in the daemon could retain a reference to the pre-reload configuration for an indeterminate amount of time after the daemon configuration has been reloaded and updated. It is possible for the daemon to attempt to start a container using a deleted wrapper script if a request to run a container races a reload. Solve the problem of deleting referenced wrapper scripts by ensuring that all wrapper scripts are *immutable* for the lifetime of the daemon process. Any given runtime wrapper script must always exist with the same contents, no matter how many times the daemon config is reloaded, or what changes are made to the config. This is accomplished by using everyone's favourite design pattern: content-addressable storage. Each wrapper script file name is suffixed with the SHA-256 digest of its contents to (probabilistically) guarantee immutability without needing any concurrency control. Stale runtime wrapper scripts are only cleaned up on the next daemon restart. Split the derived runtimes configuration from the user-supplied configuration to have a place to store derived state without mutating the user-supplied configuration or exposing daemon internals in API struct types. Hold the derived state and the user-supplied configuration in a single struct value so that they can be updated as an atomic unit. Signed-off-by: Cory Snider <csnider@mirantis.com>
2022-08-31 20:12:30 +00:00
for _, s := range features.MountOptions {
if s == "rro" {
return nil
}
}
return fmt.Errorf("rro is not supported by runtime %q", runtime)
}