d222bf097c
The existing runtimes reload logic went to great lengths to replace the directory containing runtime wrapper scripts as atomically as possible within the limitations of the Linux filesystem ABI. Trouble is, atomically swapping the wrapper scripts directory solves the wrong problem! The runtime configuration is "locked in" when a container is started, including the path to the runC binary. If a container is started with a runtime which requires a daemon-managed wrapper script and then the daemon is reloaded with a config which no longer requires the wrapper script (i.e. some args -> no args, or the runtime is dropped from the config), that container would become unmanageable. Any attempts to stop, exec or otherwise perform lifecycle management operations on the container are likely to fail due to the wrapper script no longer existing at its original path. Atomically swapping the wrapper scripts is also incompatible with the read-copy-update paradigm for reloading configuration. A handler in the daemon could retain a reference to the pre-reload configuration for an indeterminate amount of time after the daemon configuration has been reloaded and updated. It is possible for the daemon to attempt to start a container using a deleted wrapper script if a request to run a container races a reload. Solve the problem of deleting referenced wrapper scripts by ensuring that all wrapper scripts are *immutable* for the lifetime of the daemon process. Any given runtime wrapper script must always exist with the same contents, no matter how many times the daemon config is reloaded, or what changes are made to the config. This is accomplished by using everyone's favourite design pattern: content-addressable storage. Each wrapper script file name is suffixed with the SHA-256 digest of its contents to (probabilistically) guarantee immutability without needing any concurrency control. Stale runtime wrapper scripts are only cleaned up on the next daemon restart. Split the derived runtimes configuration from the user-supplied configuration to have a place to store derived state without mutating the user-supplied configuration or exposing daemon internals in API struct types. Hold the derived state and the user-supplied configuration in a single struct value so that they can be updated as an atomic unit. Signed-off-by: Cory Snider <csnider@mirantis.com>
395 lines
13 KiB
Go
395 lines
13 KiB
Go
package daemon // import "github.com/docker/docker/daemon"
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"runtime"
|
|
"time"
|
|
|
|
containertypes "github.com/docker/docker/api/types/container"
|
|
"github.com/docker/docker/api/types/strslice"
|
|
"github.com/docker/docker/container"
|
|
"github.com/docker/docker/daemon/config"
|
|
"github.com/docker/docker/daemon/network"
|
|
"github.com/docker/docker/errdefs"
|
|
"github.com/docker/docker/image"
|
|
"github.com/docker/docker/oci/caps"
|
|
"github.com/docker/docker/opts"
|
|
"github.com/docker/docker/pkg/system"
|
|
"github.com/docker/docker/runconfig"
|
|
volumemounts "github.com/docker/docker/volume/mounts"
|
|
"github.com/docker/go-connections/nat"
|
|
"github.com/moby/sys/signal"
|
|
"github.com/opencontainers/selinux/go-selinux"
|
|
"github.com/pkg/errors"
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
// GetContainer looks for a container using the provided information, which could be
|
|
// one of the following inputs from the caller:
|
|
// - A full container ID, which will exact match a container in daemon's list
|
|
// - A container name, which will only exact match via the GetByName() function
|
|
// - A partial container ID prefix (e.g. short ID) of any length that is
|
|
// unique enough to only return a single container object
|
|
// If none of these searches succeed, an error is returned
|
|
func (daemon *Daemon) GetContainer(prefixOrName string) (*container.Container, error) {
|
|
if len(prefixOrName) == 0 {
|
|
return nil, errors.WithStack(invalidIdentifier(prefixOrName))
|
|
}
|
|
|
|
if containerByID := daemon.containers.Get(prefixOrName); containerByID != nil {
|
|
// prefix is an exact match to a full container ID
|
|
return containerByID, nil
|
|
}
|
|
|
|
// GetByName will match only an exact name provided; we ignore errors
|
|
if containerByName, _ := daemon.GetByName(prefixOrName); containerByName != nil {
|
|
// prefix is an exact match to a full container Name
|
|
return containerByName, nil
|
|
}
|
|
|
|
containerID, err := daemon.containersReplica.GetByPrefix(prefixOrName)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
ctr := daemon.containers.Get(containerID)
|
|
if ctr == nil {
|
|
// Updates to the daemon.containersReplica ViewDB are not atomic
|
|
// or consistent w.r.t. the live daemon.containers Store so
|
|
// while reaching this code path may be indicative of a bug,
|
|
// it is not _necessarily_ the case.
|
|
logrus.WithField("prefixOrName", prefixOrName).
|
|
WithField("id", containerID).
|
|
Debugf("daemon.GetContainer: container is known to daemon.containersReplica but not daemon.containers")
|
|
return nil, containerNotFound(prefixOrName)
|
|
}
|
|
return ctr, nil
|
|
}
|
|
|
|
// checkContainer make sure the specified container validates the specified conditions
|
|
func (daemon *Daemon) checkContainer(container *container.Container, conditions ...func(*container.Container) error) error {
|
|
for _, condition := range conditions {
|
|
if err := condition(container); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Exists returns a true if a container of the specified ID or name exists,
|
|
// false otherwise.
|
|
func (daemon *Daemon) Exists(id string) bool {
|
|
c, _ := daemon.GetContainer(id)
|
|
return c != nil
|
|
}
|
|
|
|
// IsPaused returns a bool indicating if the specified container is paused.
|
|
func (daemon *Daemon) IsPaused(id string) bool {
|
|
c, _ := daemon.GetContainer(id)
|
|
return c.State.IsPaused()
|
|
}
|
|
|
|
func (daemon *Daemon) containerRoot(id string) string {
|
|
return filepath.Join(daemon.repository, id)
|
|
}
|
|
|
|
// Load reads the contents of a container from disk
|
|
// This is typically done at startup.
|
|
func (daemon *Daemon) load(id string) (*container.Container, error) {
|
|
ctr := daemon.newBaseContainer(id)
|
|
|
|
if err := ctr.FromDisk(); err != nil {
|
|
return nil, err
|
|
}
|
|
selinux.ReserveLabel(ctr.ProcessLabel)
|
|
|
|
if ctr.ID != id {
|
|
return ctr, fmt.Errorf("Container %s is stored at %s", ctr.ID, id)
|
|
}
|
|
|
|
return ctr, nil
|
|
}
|
|
|
|
// Register makes a container object usable by the daemon as <container.ID>
|
|
func (daemon *Daemon) Register(c *container.Container) error {
|
|
// Attach to stdout and stderr
|
|
if c.Config.OpenStdin {
|
|
c.StreamConfig.NewInputPipes()
|
|
} else {
|
|
c.StreamConfig.NewNopInputPipe()
|
|
}
|
|
|
|
// once in the memory store it is visible to other goroutines
|
|
// grab a Lock until it has been checkpointed to avoid races
|
|
c.Lock()
|
|
defer c.Unlock()
|
|
|
|
daemon.containers.Add(c.ID, c)
|
|
return c.CheckpointTo(daemon.containersReplica)
|
|
}
|
|
|
|
func (daemon *Daemon) newContainer(name string, operatingSystem string, config *containertypes.Config, hostConfig *containertypes.HostConfig, imgID image.ID, managed bool) (*container.Container, error) {
|
|
var (
|
|
id string
|
|
err error
|
|
noExplicitName = name == ""
|
|
)
|
|
id, name, err = daemon.generateIDAndName(name)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if hostConfig.NetworkMode.IsHost() {
|
|
if config.Hostname == "" {
|
|
config.Hostname, err = os.Hostname()
|
|
if err != nil {
|
|
return nil, errdefs.System(err)
|
|
}
|
|
}
|
|
} else {
|
|
daemon.generateHostname(id, config)
|
|
}
|
|
entrypoint, args := daemon.getEntrypointAndArgs(config.Entrypoint, config.Cmd)
|
|
|
|
base := daemon.newBaseContainer(id)
|
|
base.Created = time.Now().UTC()
|
|
base.Managed = managed
|
|
base.Path = entrypoint
|
|
base.Args = args // FIXME: de-duplicate from config
|
|
base.Config = config
|
|
base.HostConfig = &containertypes.HostConfig{}
|
|
base.ImageID = imgID
|
|
base.NetworkSettings = &network.Settings{IsAnonymousEndpoint: noExplicitName}
|
|
base.Name = name
|
|
base.Driver = daemon.imageService.StorageDriver()
|
|
base.OS = operatingSystem
|
|
return base, err
|
|
}
|
|
|
|
// GetByName returns a container given a name.
|
|
func (daemon *Daemon) GetByName(name string) (*container.Container, error) {
|
|
if len(name) == 0 {
|
|
return nil, fmt.Errorf("No container name supplied")
|
|
}
|
|
fullName := name
|
|
if name[0] != '/' {
|
|
fullName = "/" + name
|
|
}
|
|
id, err := daemon.containersReplica.Snapshot().GetID(fullName)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("Could not find entity for %s", name)
|
|
}
|
|
e := daemon.containers.Get(id)
|
|
if e == nil {
|
|
return nil, fmt.Errorf("Could not find container for entity id %s", id)
|
|
}
|
|
return e, nil
|
|
}
|
|
|
|
// newBaseContainer creates a new container with its initial
|
|
// configuration based on the root storage from the daemon.
|
|
func (daemon *Daemon) newBaseContainer(id string) *container.Container {
|
|
return container.NewBaseContainer(id, daemon.containerRoot(id))
|
|
}
|
|
|
|
func (daemon *Daemon) getEntrypointAndArgs(configEntrypoint strslice.StrSlice, configCmd strslice.StrSlice) (string, []string) {
|
|
if len(configEntrypoint) != 0 {
|
|
return configEntrypoint[0], append(configEntrypoint[1:], configCmd...)
|
|
}
|
|
return configCmd[0], configCmd[1:]
|
|
}
|
|
|
|
func (daemon *Daemon) generateHostname(id string, config *containertypes.Config) {
|
|
// Generate default hostname
|
|
if config.Hostname == "" {
|
|
config.Hostname = id[:12]
|
|
}
|
|
}
|
|
|
|
func (daemon *Daemon) setSecurityOptions(cfg *config.Config, container *container.Container, hostConfig *containertypes.HostConfig) error {
|
|
container.Lock()
|
|
defer container.Unlock()
|
|
return daemon.parseSecurityOpt(cfg, &container.SecurityOptions, hostConfig)
|
|
}
|
|
|
|
func (daemon *Daemon) setHostConfig(container *container.Container, hostConfig *containertypes.HostConfig) error {
|
|
// Do not lock while creating volumes since this could be calling out to external plugins
|
|
// Don't want to block other actions, like `docker ps` because we're waiting on an external plugin
|
|
if err := daemon.registerMountPoints(container, hostConfig); err != nil {
|
|
return err
|
|
}
|
|
|
|
container.Lock()
|
|
defer container.Unlock()
|
|
|
|
// Register any links from the host config before starting the container
|
|
if err := daemon.registerLinks(container, hostConfig); err != nil {
|
|
return err
|
|
}
|
|
|
|
runconfig.SetDefaultNetModeIfBlank(hostConfig)
|
|
container.HostConfig = hostConfig
|
|
return nil
|
|
}
|
|
|
|
// verifyContainerSettings performs validation of the hostconfig and config
|
|
// structures.
|
|
func (daemon *Daemon) verifyContainerSettings(daemonCfg *configStore, hostConfig *containertypes.HostConfig, config *containertypes.Config, update bool) (warnings []string, err error) {
|
|
// First perform verification of settings common across all platforms.
|
|
if err = validateContainerConfig(config); err != nil {
|
|
return warnings, err
|
|
}
|
|
if err := validateHostConfig(hostConfig); err != nil {
|
|
return warnings, err
|
|
}
|
|
|
|
// Now do platform-specific verification
|
|
warnings, err = verifyPlatformContainerSettings(daemon, daemonCfg, hostConfig, update)
|
|
for _, w := range warnings {
|
|
logrus.Warn(w)
|
|
}
|
|
return warnings, err
|
|
}
|
|
|
|
func validateContainerConfig(config *containertypes.Config) error {
|
|
if config == nil {
|
|
return nil
|
|
}
|
|
if err := translateWorkingDir(config); err != nil {
|
|
return err
|
|
}
|
|
if len(config.StopSignal) > 0 {
|
|
if _, err := signal.ParseSignal(config.StopSignal); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
// Validate if Env contains empty variable or not (e.g., ``, `=foo`)
|
|
for _, env := range config.Env {
|
|
if _, err := opts.ValidateEnv(env); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return validateHealthCheck(config.Healthcheck)
|
|
}
|
|
|
|
func validateHostConfig(hostConfig *containertypes.HostConfig) error {
|
|
if hostConfig == nil {
|
|
return nil
|
|
}
|
|
|
|
if hostConfig.AutoRemove && !hostConfig.RestartPolicy.IsNone() {
|
|
return errors.Errorf("can't create 'AutoRemove' container with restart policy")
|
|
}
|
|
// Validate mounts; check if host directories still exist
|
|
parser := volumemounts.NewParser()
|
|
for _, c := range hostConfig.Mounts {
|
|
cfg := c
|
|
if err := parser.ValidateMountConfig(&cfg); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
for _, extraHost := range hostConfig.ExtraHosts {
|
|
if _, err := opts.ValidateExtraHost(extraHost); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := validatePortBindings(hostConfig.PortBindings); err != nil {
|
|
return err
|
|
}
|
|
if err := validateRestartPolicy(hostConfig.RestartPolicy); err != nil {
|
|
return err
|
|
}
|
|
if err := validateCapabilities(hostConfig); err != nil {
|
|
return err
|
|
}
|
|
if !hostConfig.Isolation.IsValid() {
|
|
return errors.Errorf("invalid isolation '%s' on %s", hostConfig.Isolation, runtime.GOOS)
|
|
}
|
|
for k := range hostConfig.Annotations {
|
|
if k == "" {
|
|
return errors.Errorf("invalid Annotations: the empty string is not permitted as an annotation key")
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validateCapabilities(hostConfig *containertypes.HostConfig) error {
|
|
if _, err := caps.NormalizeLegacyCapabilities(hostConfig.CapAdd); err != nil {
|
|
return errors.Wrap(err, "invalid CapAdd")
|
|
}
|
|
if _, err := caps.NormalizeLegacyCapabilities(hostConfig.CapDrop); err != nil {
|
|
return errors.Wrap(err, "invalid CapDrop")
|
|
}
|
|
// TODO consider returning warnings if "Privileged" is combined with Capabilities, CapAdd and/or CapDrop
|
|
return nil
|
|
}
|
|
|
|
// validateHealthCheck validates the healthcheck params of Config
|
|
func validateHealthCheck(healthConfig *containertypes.HealthConfig) error {
|
|
if healthConfig == nil {
|
|
return nil
|
|
}
|
|
if healthConfig.Interval != 0 && healthConfig.Interval < containertypes.MinimumDuration {
|
|
return errors.Errorf("Interval in Healthcheck cannot be less than %s", containertypes.MinimumDuration)
|
|
}
|
|
if healthConfig.Timeout != 0 && healthConfig.Timeout < containertypes.MinimumDuration {
|
|
return errors.Errorf("Timeout in Healthcheck cannot be less than %s", containertypes.MinimumDuration)
|
|
}
|
|
if healthConfig.Retries < 0 {
|
|
return errors.Errorf("Retries in Healthcheck cannot be negative")
|
|
}
|
|
if healthConfig.StartPeriod != 0 && healthConfig.StartPeriod < containertypes.MinimumDuration {
|
|
return errors.Errorf("StartPeriod in Healthcheck cannot be less than %s", containertypes.MinimumDuration)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validatePortBindings(ports nat.PortMap) error {
|
|
for port := range ports {
|
|
_, portStr := nat.SplitProtoPort(string(port))
|
|
if _, err := nat.ParsePort(portStr); err != nil {
|
|
return errors.Errorf("invalid port specification: %q", portStr)
|
|
}
|
|
for _, pb := range ports[port] {
|
|
_, err := nat.NewPort(nat.SplitProtoPort(pb.HostPort))
|
|
if err != nil {
|
|
return errors.Errorf("invalid port specification: %q", pb.HostPort)
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validateRestartPolicy(policy containertypes.RestartPolicy) error {
|
|
switch policy.Name {
|
|
case "always", "unless-stopped", "no":
|
|
if policy.MaximumRetryCount != 0 {
|
|
return errors.Errorf("maximum retry count cannot be used with restart policy '%s'", policy.Name)
|
|
}
|
|
case "on-failure":
|
|
if policy.MaximumRetryCount < 0 {
|
|
return errors.Errorf("maximum retry count cannot be negative")
|
|
}
|
|
case "":
|
|
// do nothing
|
|
return nil
|
|
default:
|
|
return errors.Errorf("invalid restart policy '%s'", policy.Name)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// translateWorkingDir translates the working-dir for the target platform,
|
|
// and returns an error if the given path is not an absolute path.
|
|
func translateWorkingDir(config *containertypes.Config) error {
|
|
if config.WorkingDir == "" {
|
|
return nil
|
|
}
|
|
wd := filepath.FromSlash(config.WorkingDir) // Ensure in platform semantics
|
|
if !system.IsAbs(wd) {
|
|
return fmt.Errorf("the working directory '%s' is invalid, it needs to be an absolute path", config.WorkingDir)
|
|
}
|
|
config.WorkingDir = wd
|
|
return nil
|
|
}
|