From ddae20c032058a0fd42c34c2e9750ee8f6296ac8 Mon Sep 17 00:00:00 2001 From: Kenfe-Mickael Laventure Date: Fri, 22 Sep 2017 06:52:41 -0700 Subject: [PATCH] Update libcontainerd to use containerd 1.0 Signed-off-by: Kenfe-Mickael Laventure --- api/server/router/container/exec.go | 2 +- builder/dockerfile/containerbackend.go | 2 +- cmd/dockerd/config.go | 2 + cmd/dockerd/config_unix.go | 2 - cmd/dockerd/config_windows.go | 1 + cmd/dockerd/daemon.go | 17 +- cmd/dockerd/daemon_linux.go | 2 +- cmd/dockerd/daemon_solaris.go | 16 +- cmd/dockerd/daemon_unix.go | 62 +- cmd/dockerd/daemon_windows.go | 15 +- container/container.go | 37 +- container/container_unix.go | 9 - container/container_windows.go | 6 - container/state.go | 17 +- container/state_unix.go | 10 - container/state_windows.go | 7 - container/stream/streams.go | 6 +- daemon/checkpoint.go | 17 +- daemon/config/config.go | 5 + daemon/config/config_common_unix.go | 2 - daemon/daemon.go | 125 +- daemon/daemon_unix.go | 204 ++- daemon/daemon_windows.go | 92 +- daemon/delete.go | 1 + daemon/errors.go | 5 + daemon/exec.go | 40 +- daemon/exec/exec.go | 66 +- daemon/exec_linux.go | 14 +- daemon/exec_solaris.go | 4 +- daemon/exec_windows.go | 4 +- daemon/info_unix.go | 29 +- daemon/kill.go | 5 +- daemon/logger/plugin_unix.go | 2 +- daemon/monitor.go | 195 +-- daemon/monitor_linux.go | 10 +- daemon/monitor_solaris.go | 9 +- daemon/monitor_windows.go | 50 +- daemon/oci_linux.go | 9 +- daemon/pause.go | 15 +- daemon/reload.go | 4 - daemon/resize.go | 5 +- daemon/start.go | 63 +- daemon/start_unix.go | 47 +- daemon/start_windows.go | 13 +- daemon/top_unix.go | 15 +- daemon/top_unix_test.go | 10 +- daemon/top_windows.go | 12 +- daemon/unpause.go | 13 +- daemon/update.go | 3 +- daemon/update_linux.go | 47 +- daemon/update_windows.go | 6 +- hack/make/.go-autogen | 6 +- integration-cli/daemon/daemon.go | 4 +- integration-cli/docker_api_stats_test.go | 2 +- integration-cli/docker_cli_attach_test.go | 5 +- integration-cli/docker_cli_build_unix_test.go | 5 + integration-cli/docker_cli_daemon_test.go | 14 +- integration-cli/docker_cli_events_test.go | 16 +- integration-cli/docker_cli_logs_test.go | 1 + .../docker_cli_network_unix_test.go | 1 + integration-cli/docker_cli_run_test.go | 1 + .../docker_deprecated_api_v124_test.go | 2 + integration-cli/events_utils_test.go | 2 +- integration/service/create_test.go | 12 +- libcontainerd/client.go | 46 - libcontainerd/client_daemon.go | 802 ++++++++++ libcontainerd/client_daemon_linux.go | 96 ++ libcontainerd/client_daemon_windows.go | 53 + libcontainerd/client_linux.go | 616 -------- libcontainerd/client_local_windows.go | 1340 +++++++++++++++++ libcontainerd/client_solaris.go | 104 -- libcontainerd/client_unix.go | 141 -- libcontainerd/client_windows.go | 886 ----------- libcontainerd/container.go | 13 - libcontainerd/container_unix.go | 246 --- libcontainerd/container_windows.go | 338 ----- libcontainerd/errors.go | 46 + libcontainerd/io.go | 36 + libcontainerd/io_unix.go | 60 + libcontainerd/io_windows.go | 138 ++ libcontainerd/oom_linux.go | 31 - libcontainerd/oom_solaris.go | 5 - libcontainerd/pausemonitor_unix.go | 42 - libcontainerd/process.go | 18 - libcontainerd/process_unix.go | 107 -- libcontainerd/process_windows.go | 14 +- libcontainerd/{queue_unix.go => queue.go} | 2 - .../{queue_unix_test.go => queue_test.go} | 2 - libcontainerd/remote.go | 20 - libcontainerd/remote_daemon.go | 317 ++++ libcontainerd/remote_daemon_options.go | 141 ++ libcontainerd/remote_daemon_options_unix.go | 36 + libcontainerd/remote_daemon_process.go | 56 + libcontainerd/remote_daemon_process_unix.go | 61 + libcontainerd/remote_daemon_unix.go | 56 + libcontainerd/remote_daemon_windows.go | 50 + libcontainerd/remote_local.go | 59 + libcontainerd/remote_unix.go | 565 ------- libcontainerd/remote_windows.go | 36 - libcontainerd/types.go | 126 +- libcontainerd/types_linux.go | 61 +- libcontainerd/types_solaris.go | 43 - libcontainerd/types_windows.go | 24 +- libcontainerd/utils_linux.go | 63 +- libcontainerd/utils_solaris.go | 27 - libcontainerd/utils_windows.go | 8 + oci/defaults.go | 18 +- pkg/authorization/plugin.go | 12 +- pkg/mount/mount.go | 10 +- pkg/system/process_windows.go | 18 + pkg/system/rm.go | 2 +- plugin/executor/containerd/containerd.go | 123 +- plugin/manager_linux.go | 15 +- 113 files changed, 4574 insertions(+), 3980 deletions(-) delete mode 100644 container/state_unix.go delete mode 100644 container/state_windows.go delete mode 100644 libcontainerd/client.go create mode 100644 libcontainerd/client_daemon.go create mode 100644 libcontainerd/client_daemon_linux.go create mode 100644 libcontainerd/client_daemon_windows.go delete mode 100644 libcontainerd/client_linux.go create mode 100644 libcontainerd/client_local_windows.go delete mode 100644 libcontainerd/client_solaris.go delete mode 100644 libcontainerd/client_unix.go delete mode 100644 libcontainerd/client_windows.go delete mode 100644 libcontainerd/container.go delete mode 100644 libcontainerd/container_unix.go delete mode 100644 libcontainerd/container_windows.go create mode 100644 libcontainerd/errors.go create mode 100644 libcontainerd/io.go create mode 100644 libcontainerd/io_unix.go create mode 100644 libcontainerd/io_windows.go delete mode 100644 libcontainerd/oom_linux.go delete mode 100644 libcontainerd/oom_solaris.go delete mode 100644 libcontainerd/pausemonitor_unix.go delete mode 100644 libcontainerd/process.go delete mode 100644 libcontainerd/process_unix.go rename libcontainerd/{queue_unix.go => queue.go} (94%) rename libcontainerd/{queue_unix_test.go => queue_test.go} (95%) delete mode 100644 libcontainerd/remote.go create mode 100644 libcontainerd/remote_daemon.go create mode 100644 libcontainerd/remote_daemon_options.go create mode 100644 libcontainerd/remote_daemon_options_unix.go create mode 100644 libcontainerd/remote_daemon_process.go create mode 100644 libcontainerd/remote_daemon_process_unix.go create mode 100644 libcontainerd/remote_daemon_unix.go create mode 100644 libcontainerd/remote_daemon_windows.go create mode 100644 libcontainerd/remote_local.go delete mode 100644 libcontainerd/remote_unix.go delete mode 100644 libcontainerd/remote_windows.go delete mode 100644 libcontainerd/types_solaris.go delete mode 100644 libcontainerd/utils_solaris.go create mode 100644 pkg/system/process_windows.go diff --git a/api/server/router/container/exec.go b/api/server/router/container/exec.go index aa2ebb187b..97c27d844f 100644 --- a/api/server/router/container/exec.go +++ b/api/server/router/container/exec.go @@ -126,7 +126,7 @@ func (s *containerRouter) postContainerExecStart(ctx context.Context, w http.Res return err } stdout.Write([]byte(err.Error() + "\r\n")) - logrus.Errorf("Error running exec in container: %v", err) + logrus.Errorf("Error running exec %s in container: %v", execName, err) } return nil } diff --git a/builder/dockerfile/containerbackend.go b/builder/dockerfile/containerbackend.go index ec1bd6b9f5..add0a876df 100644 --- a/builder/dockerfile/containerbackend.go +++ b/builder/dockerfile/containerbackend.go @@ -102,7 +102,7 @@ func (c *containerManager) Run(ctx context.Context, cID string, stdout, stderr i func logCancellationError(cancelErrCh chan error, msg string) { if cancelErr := <-cancelErrCh; cancelErr != nil { - logrus.Debugf("Build cancelled (%v): ", cancelErr, msg) + logrus.Debugf("Build cancelled (%v): %s", cancelErr, msg) } } diff --git a/cmd/dockerd/config.go b/cmd/dockerd/config.go index f80641b1f6..f142b7538c 100644 --- a/cmd/dockerd/config.go +++ b/cmd/dockerd/config.go @@ -27,6 +27,8 @@ func installCommonConfigFlags(conf *config.Config, flags *pflag.FlagSet) { flags.Var(opts.NewNamedListOptsRef("exec-opts", &conf.ExecOptions, nil), "exec-opt", "Runtime execution options") flags.StringVarP(&conf.Pidfile, "pidfile", "p", defaultPidFile, "Path to use for daemon PID file") flags.StringVarP(&conf.Root, "graph", "g", defaultDataRoot, "Root of the Docker runtime") + flags.StringVar(&conf.ExecRoot, "exec-root", defaultExecRoot, "Root directory for execution state files") + flags.StringVar(&conf.ContainerdAddr, "containerd", "", "containerd grpc address") // "--graph" is "soft-deprecated" in favor of "data-root". This flag was added // before Docker 1.0, so won't be removed, only hidden, to discourage its usage. diff --git a/cmd/dockerd/config_unix.go b/cmd/dockerd/config_unix.go index ad27a46726..dcc7dc5e81 100644 --- a/cmd/dockerd/config_unix.go +++ b/cmd/dockerd/config_unix.go @@ -29,13 +29,11 @@ func installConfigFlags(conf *config.Config, flags *pflag.FlagSet) { flags.BoolVar(&conf.BridgeConfig.EnableIPForward, "ip-forward", true, "Enable net.ipv4.ip_forward") flags.BoolVar(&conf.BridgeConfig.EnableIPMasq, "ip-masq", true, "Enable IP masquerading") flags.BoolVar(&conf.BridgeConfig.EnableIPv6, "ipv6", false, "Enable IPv6 networking") - flags.StringVar(&conf.ExecRoot, "exec-root", defaultExecRoot, "Root directory for execution state files") flags.StringVar(&conf.BridgeConfig.FixedCIDRv6, "fixed-cidr-v6", "", "IPv6 subnet for fixed IPs") flags.BoolVar(&conf.BridgeConfig.EnableUserlandProxy, "userland-proxy", true, "Use userland proxy for loopback traffic") flags.StringVar(&conf.BridgeConfig.UserlandProxyPath, "userland-proxy-path", "", "Path to the userland proxy binary") flags.StringVar(&conf.CgroupParent, "cgroup-parent", "", "Set parent cgroup for all containers") flags.StringVar(&conf.RemappedRoot, "userns-remap", "", "User/Group setting for user namespaces") - flags.StringVar(&conf.ContainerdAddr, "containerd", "", "Path to containerd socket") flags.BoolVar(&conf.LiveRestoreEnabled, "live-restore", false, "Enable live restore of docker when containers are still running") flags.IntVar(&conf.OOMScoreAdjust, "oom-score-adjust", -500, "Set the oom_score_adj for the daemon") flags.BoolVar(&conf.Init, "init", false, "Run an init in the container to forward signals and reap processes") diff --git a/cmd/dockerd/config_windows.go b/cmd/dockerd/config_windows.go index 79cdd25048..36af76645f 100644 --- a/cmd/dockerd/config_windows.go +++ b/cmd/dockerd/config_windows.go @@ -11,6 +11,7 @@ import ( var ( defaultPidFile string defaultDataRoot = filepath.Join(os.Getenv("programdata"), "docker") + defaultExecRoot = filepath.Join(os.Getenv("programdata"), "docker", "exec-root") ) // installConfigFlags adds flags to the pflag.FlagSet to configure the daemon diff --git a/cmd/dockerd/daemon.go b/cmd/dockerd/daemon.go index c8fcafb300..44e16677e7 100644 --- a/cmd/dockerd/daemon.go +++ b/cmd/dockerd/daemon.go @@ -204,7 +204,11 @@ func (cli *DaemonCli) start(opts *daemonOptions) (err error) { return err } - containerdRemote, err := libcontainerd.New(cli.getLibcontainerdRoot(), cli.getPlatformRemoteOptions()...) + rOpts, err := cli.getRemoteOptions() + if err != nil { + return fmt.Errorf("Failed to generate containerd options: %s", err) + } + containerdRemote, err := libcontainerd.New(filepath.Join(cli.Config.Root, "containerd"), filepath.Join(cli.Config.ExecRoot, "containerd"), rOpts...) if err != nil { return err } @@ -560,6 +564,17 @@ func (cli *DaemonCli) initMiddlewares(s *apiserver.Server, cfg *apiserver.Config return nil } +func (cli *DaemonCli) getRemoteOptions() ([]libcontainerd.RemoteOption, error) { + opts := []libcontainerd.RemoteOption{} + + pOpts, err := cli.getPlatformRemoteOptions() + if err != nil { + return nil, err + } + opts = append(opts, pOpts...) + return opts, nil +} + // validates that the plugins requested with the --authorization-plugin flag are valid AuthzDriver // plugins present on the host and available to the daemon func validateAuthzPlugins(requestedPlugins []string, pg plugingetter.PluginGetter) error { diff --git a/cmd/dockerd/daemon_linux.go b/cmd/dockerd/daemon_linux.go index a909ee4fbd..b58f0f08a3 100644 --- a/cmd/dockerd/daemon_linux.go +++ b/cmd/dockerd/daemon_linux.go @@ -11,5 +11,5 @@ func preNotifySystem() { // notifySystem sends a message to the host when the server is ready to be used func notifySystem() { // Tell the init daemon we are accepting requests - go systemdDaemon.SdNotify("READY=1") + go systemdDaemon.SdNotify(false, "READY=1") } diff --git a/cmd/dockerd/daemon_solaris.go b/cmd/dockerd/daemon_solaris.go index 9ee18dad7d..6f82421e97 100644 --- a/cmd/dockerd/daemon_solaris.go +++ b/cmd/dockerd/daemon_solaris.go @@ -41,20 +41,8 @@ func preNotifySystem() { func notifySystem() { } -func (cli *DaemonCli) getPlatformRemoteOptions() []libcontainerd.RemoteOption { - opts := []libcontainerd.RemoteOption{} - if cli.Config.ContainerdAddr != "" { - opts = append(opts, libcontainerd.WithRemoteAddr(cli.Config.ContainerdAddr)) - } else { - opts = append(opts, libcontainerd.WithStartDaemon(true)) - } - return opts -} - -// getLibcontainerdRoot gets the root directory for libcontainerd/containerd to -// store their state. -func (cli *DaemonCli) getLibcontainerdRoot() string { - return filepath.Join(cli.Config.ExecRoot, "libcontainerd") +func (cli *DaemonCli) getPlatformRemoteOptions() ([]libcontainerd.RemoteOption, error) { + return nil, nil } // getSwarmRunRoot gets the root directory for swarm to store runtime state diff --git a/cmd/dockerd/daemon_unix.go b/cmd/dockerd/daemon_unix.go index 7909d98da5..324b299e18 100644 --- a/cmd/dockerd/daemon_unix.go +++ b/cmd/dockerd/daemon_unix.go @@ -10,9 +10,11 @@ import ( "path/filepath" "strconv" + "github.com/containerd/containerd/linux" "github.com/docker/docker/cmd/dockerd/hack" "github.com/docker/docker/daemon" "github.com/docker/docker/libcontainerd" + "github.com/docker/docker/pkg/parsers/kernel" "github.com/docker/libnetwork/portallocator" "golang.org/x/sys/unix" ) @@ -35,6 +37,39 @@ func getDaemonConfDir(_ string) string { return "/etc/docker" } +func (cli *DaemonCli) getPlatformRemoteOptions() ([]libcontainerd.RemoteOption, error) { + // On older kernel, letting putting the containerd-shim in its own + // namespace will effectively prevent operations such as unlink, rename + // and remove on mountpoints that were present at the time the shim + // namespace was created. This would led to a famous EBUSY will trying to + // remove shm mounts. + var noNewNS bool + if !kernel.CheckKernelVersion(3, 18, 0) { + noNewNS = true + } + + opts := []libcontainerd.RemoteOption{ + libcontainerd.WithOOMScore(cli.Config.OOMScoreAdjust), + libcontainerd.WithPlugin("linux", &linux.Config{ + Shim: daemon.DefaultShimBinary, + Runtime: daemon.DefaultRuntimeBinary, + RuntimeRoot: filepath.Join(cli.Config.Root, "runc"), + ShimDebug: cli.Config.Debug, + ShimNoMountNS: noNewNS, + }), + } + if cli.Config.Debug { + opts = append(opts, libcontainerd.WithLogLevel("debug")) + } + if cli.Config.ContainerdAddr != "" { + opts = append(opts, libcontainerd.WithRemoteAddr(cli.Config.ContainerdAddr)) + } else { + opts = append(opts, libcontainerd.WithStartDaemon(true)) + } + + return opts, nil +} + // setupConfigReloadTrap configures the USR2 signal to reload the configuration. func (cli *DaemonCli) setupConfigReloadTrap() { c := make(chan os.Signal, 1) @@ -46,33 +81,6 @@ func (cli *DaemonCli) setupConfigReloadTrap() { }() } -func (cli *DaemonCli) getPlatformRemoteOptions() []libcontainerd.RemoteOption { - opts := []libcontainerd.RemoteOption{ - libcontainerd.WithDebugLog(cli.Config.Debug), - libcontainerd.WithOOMScore(cli.Config.OOMScoreAdjust), - } - if cli.Config.ContainerdAddr != "" { - opts = append(opts, libcontainerd.WithRemoteAddr(cli.Config.ContainerdAddr)) - } else { - opts = append(opts, libcontainerd.WithStartDaemon(true)) - } - if daemon.UsingSystemd(cli.Config) { - args := []string{"--systemd-cgroup=true"} - opts = append(opts, libcontainerd.WithRuntimeArgs(args)) - } - if cli.Config.LiveRestoreEnabled { - opts = append(opts, libcontainerd.WithLiveRestore(true)) - } - opts = append(opts, libcontainerd.WithRuntimePath(daemon.DefaultRuntimeBinary)) - return opts -} - -// getLibcontainerdRoot gets the root directory for libcontainerd/containerd to -// store their state. -func (cli *DaemonCli) getLibcontainerdRoot() string { - return filepath.Join(cli.Config.ExecRoot, "libcontainerd") -} - // getSwarmRunRoot gets the root directory for swarm to store runtime state // For example, the control socket func (cli *DaemonCli) getSwarmRunRoot() string { diff --git a/cmd/dockerd/daemon_windows.go b/cmd/dockerd/daemon_windows.go index 77bade2de3..0007ddef22 100644 --- a/cmd/dockerd/daemon_windows.go +++ b/cmd/dockerd/daemon_windows.go @@ -48,6 +48,10 @@ func notifyShutdown(err error) { } } +func (cli *DaemonCli) getPlatformRemoteOptions() ([]libcontainerd.RemoteOption, error) { + return nil, nil +} + // setupConfigReloadTrap configures a Win32 event to reload the configuration. func (cli *DaemonCli) setupConfigReloadTrap() { go func() { @@ -65,17 +69,6 @@ func (cli *DaemonCli) setupConfigReloadTrap() { }() } -func (cli *DaemonCli) getPlatformRemoteOptions() []libcontainerd.RemoteOption { - return nil -} - -// getLibcontainerdRoot gets the root directory for libcontainerd to store its -// state. The Windows libcontainerd implementation does not need to write a spec -// or state to disk, so this is a no-op. -func (cli *DaemonCli) getLibcontainerdRoot() string { - return "" -} - // getSwarmRunRoot gets the root directory for swarm to store runtime state // For example, the control socket func (cli *DaemonCli) getSwarmRunRoot() string { diff --git a/container/container.go b/container/container.go index f16e6a1bde..10b9bcbe51 100644 --- a/container/container.go +++ b/container/container.go @@ -15,6 +15,7 @@ import ( "syscall" "time" + "github.com/containerd/containerd" containertypes "github.com/docker/docker/api/types/container" mounttypes "github.com/docker/docker/api/types/mount" networktypes "github.com/docker/docker/api/types/network" @@ -61,6 +62,18 @@ var ( errInvalidNetwork = errors.New("invalid network settings while building port map info") ) +// ExitStatus provides exit reasons for a container. +type ExitStatus struct { + // The exit code with which the container exited. + ExitCode int + + // Whether the container encountered an OOM. + OOMKilled bool + + // Time at which the container died + ExitedAt time.Time +} + // Container holds the structure defining a container object. type Container struct { StreamConfig *stream.Config @@ -996,10 +1009,10 @@ func (container *Container) CloseStreams() error { } // InitializeStdio is called by libcontainerd to connect the stdio. -func (container *Container) InitializeStdio(iop libcontainerd.IOPipe) error { +func (container *Container) InitializeStdio(iop *libcontainerd.IOPipe) (containerd.IO, error) { if err := container.startLogging(); err != nil { container.Reset(false) - return err + return nil, err } container.StreamConfig.CopyToPipe(iop) @@ -1012,7 +1025,7 @@ func (container *Container) InitializeStdio(iop libcontainerd.IOPipe) error { } } - return nil + return &cio{IO: iop, sc: container.StreamConfig}, nil } // SecretMountPath returns the path of the secret mount for the container @@ -1069,3 +1082,21 @@ func (container *Container) CreateDaemonEnvironment(tty bool, linkedEnv []string env = ReplaceOrAppendEnvValues(env, container.Config.Env) return env } + +type cio struct { + containerd.IO + + sc *stream.Config +} + +func (i *cio) Close() error { + i.IO.Close() + + return i.sc.CloseStreams() +} + +func (i *cio) Wait() { + i.sc.Wait() + + i.IO.Wait() +} diff --git a/container/container_unix.go b/container/container_unix.go index 796c48d984..611bdfd9fb 100644 --- a/container/container_unix.go +++ b/container/container_unix.go @@ -24,15 +24,6 @@ const ( containerSecretMountPath = "/run/secrets" ) -// ExitStatus provides exit reasons for a container. -type ExitStatus struct { - // The exit code with which the container exited. - ExitCode int - - // Whether the container encountered an OOM. - OOMKilled bool -} - // TrySetNetworkMount attempts to set the network mounts given a provided destination and // the path to use for it; return true if the given destination was a network mount file func (container *Container) TrySetNetworkMount(destination string, path string) bool { diff --git a/container/container_windows.go b/container/container_windows.go index 2dbea5905e..45d51e4f23 100644 --- a/container/container_windows.go +++ b/container/container_windows.go @@ -18,12 +18,6 @@ const ( containerInternalConfigsDirPath = `C:\ProgramData\Docker\internal\configs` ) -// ExitStatus provides exit reasons for a container. -type ExitStatus struct { - // The exit code with which the container exited. - ExitCode int -} - // UnmountIpcMount unmounts Ipc related mounts. // This is a NOOP on windows. func (container *Container) UnmountIpcMount(unmount func(pth string) error) error { diff --git a/container/state.go b/container/state.go index cdf51d37d2..1a4c45cbab 100644 --- a/container/state.go +++ b/container/state.go @@ -276,6 +276,7 @@ func (s *State) SetExitCode(ec int) { // SetRunning sets the state of the container to "running". func (s *State) SetRunning(pid int, initial bool) { s.ErrorMsg = "" + s.Paused = false s.Running = true s.Restarting = false if initial { @@ -294,9 +295,14 @@ func (s *State) SetStopped(exitStatus *ExitStatus) { s.Paused = false s.Restarting = false s.Pid = 0 - s.FinishedAt = time.Now().UTC() - s.setFromExitStatus(exitStatus) - close(s.waitStop) // Fire waiters for stop + if exitStatus.ExitedAt.IsZero() { + s.FinishedAt = time.Now().UTC() + } else { + s.FinishedAt = exitStatus.ExitedAt + } + s.ExitCodeValue = exitStatus.ExitCode + s.OOMKilled = exitStatus.OOMKilled + close(s.waitStop) // fire waiters for stop s.waitStop = make(chan struct{}) } @@ -310,8 +316,9 @@ func (s *State) SetRestarting(exitStatus *ExitStatus) { s.Paused = false s.Pid = 0 s.FinishedAt = time.Now().UTC() - s.setFromExitStatus(exitStatus) - close(s.waitStop) // Fire waiters for stop + s.ExitCodeValue = exitStatus.ExitCode + s.OOMKilled = exitStatus.OOMKilled + close(s.waitStop) // fire waiters for stop s.waitStop = make(chan struct{}) } diff --git a/container/state_unix.go b/container/state_unix.go deleted file mode 100644 index a2fa5afc28..0000000000 --- a/container/state_unix.go +++ /dev/null @@ -1,10 +0,0 @@ -// +build linux freebsd - -package container - -// setFromExitStatus is a platform specific helper function to set the state -// based on the ExitStatus structure. -func (s *State) setFromExitStatus(exitStatus *ExitStatus) { - s.ExitCodeValue = exitStatus.ExitCode - s.OOMKilled = exitStatus.OOMKilled -} diff --git a/container/state_windows.go b/container/state_windows.go deleted file mode 100644 index 1229650efa..0000000000 --- a/container/state_windows.go +++ /dev/null @@ -1,7 +0,0 @@ -package container - -// setFromExitStatus is a platform specific helper function to set the state -// based on the ExitStatus structure. -func (s *State) setFromExitStatus(exitStatus *ExitStatus) { - s.ExitCodeValue = exitStatus.ExitCode -} diff --git a/container/stream/streams.go b/container/stream/streams.go index 7e734d81c4..106e2b1814 100644 --- a/container/stream/streams.go +++ b/container/stream/streams.go @@ -114,12 +114,12 @@ func (c *Config) CloseStreams() error { } // CopyToPipe connects streamconfig with a libcontainerd.IOPipe -func (c *Config) CopyToPipe(iop libcontainerd.IOPipe) { +func (c *Config) CopyToPipe(iop *libcontainerd.IOPipe) { copyFunc := func(w io.Writer, r io.ReadCloser) { c.Add(1) go func() { if _, err := pools.Copy(w, r); err != nil { - logrus.Errorf("stream copy error: %+v", err) + logrus.Errorf("stream copy error: %v", err) } r.Close() c.Done() @@ -138,7 +138,7 @@ func (c *Config) CopyToPipe(iop libcontainerd.IOPipe) { go func() { pools.Copy(iop.Stdin, stdin) if err := iop.Stdin.Close(); err != nil { - logrus.Warnf("failed to close stdin: %+v", err) + logrus.Warnf("failed to close stdin: %v", err) } }() } diff --git a/daemon/checkpoint.go b/daemon/checkpoint.go index 7bdcae5154..5765af7c5a 100644 --- a/daemon/checkpoint.go +++ b/daemon/checkpoint.go @@ -1,6 +1,7 @@ package daemon import ( + "context" "encoding/json" "fmt" "io/ioutil" @@ -17,7 +18,7 @@ var ( ) // getCheckpointDir verifies checkpoint directory for create,remove, list options and checks if checkpoint already exists -func getCheckpointDir(checkDir, checkpointID string, ctrName string, ctrID string, ctrCheckpointDir string, create bool) (string, error) { +func getCheckpointDir(checkDir, checkpointID, ctrName, ctrID, ctrCheckpointDir string, create bool) (string, error) { var checkpointDir string var err2 error if checkDir != "" { @@ -32,7 +33,10 @@ func getCheckpointDir(checkDir, checkpointID string, ctrName string, ctrID strin case err == nil && stat.IsDir(): err2 = fmt.Errorf("checkpoint with name %s already exists for container %s", checkpointID, ctrName) case err != nil && os.IsNotExist(err): - err2 = nil + err2 = os.MkdirAll(checkpointAbsDir, 0700) + if os.IsExist(err2) { + err2 = nil + } case err != nil: err2 = err case err == nil: @@ -48,7 +52,7 @@ func getCheckpointDir(checkDir, checkpointID string, ctrName string, ctrID strin err2 = fmt.Errorf("%s exists and is not a directory", checkpointAbsDir) } } - return checkpointDir, err2 + return checkpointAbsDir, err2 } // CheckpointCreate checkpoints the process running in a container with CRIU @@ -62,6 +66,10 @@ func (daemon *Daemon) CheckpointCreate(name string, config types.CheckpointCreat return fmt.Errorf("Container %s not running", name) } + if container.Config.Tty { + return fmt.Errorf("checkpoint not support on containers with tty") + } + if !validCheckpointNamePattern.MatchString(config.CheckpointID) { return fmt.Errorf("Invalid checkpoint ID (%s), only %s are allowed", config.CheckpointID, validCheckpointNameChars) } @@ -71,8 +79,9 @@ func (daemon *Daemon) CheckpointCreate(name string, config types.CheckpointCreat return fmt.Errorf("cannot checkpoint container %s: %s", name, err) } - err = daemon.containerd.CreateCheckpoint(container.ID, config.CheckpointID, checkpointDir, config.Exit) + err = daemon.containerd.CreateCheckpoint(context.Background(), container.ID, checkpointDir, config.Exit) if err != nil { + os.RemoveAll(checkpointDir) return fmt.Errorf("Cannot checkpoint container %s: %s", name, err) } diff --git a/daemon/config/config.go b/daemon/config/config.go index 501c07af76..3408e4dc50 100644 --- a/daemon/config/config.go +++ b/daemon/config/config.go @@ -101,6 +101,7 @@ type CommonConfig struct { RawLogs bool `json:"raw-logs,omitempty"` RootDeprecated string `json:"graph,omitempty"` Root string `json:"data-root,omitempty"` + ExecRoot string `json:"exec-root,omitempty"` SocketGroup string `json:"group,omitempty"` CorsHeaders string `json:"api-cors-header,omitempty"` @@ -172,6 +173,10 @@ type CommonConfig struct { NodeGenericResources string `json:"node-generic-resources,omitempty"` // NetworkControlPlaneMTU allows to specify the control plane MTU, this will allow to optimize the network use in some components NetworkControlPlaneMTU int `json:"network-control-plane-mtu,omitempty"` + + // ContainerAddr is the address used to connect to containerd if we're + // not starting it ourselves + ContainerdAddr string `json:"containerd,omitempty"` } // IsValueSet returns true if a configuration value diff --git a/daemon/config/config_common_unix.go b/daemon/config/config_common_unix.go index d11cceba20..cea3fffdda 100644 --- a/daemon/config/config_common_unix.go +++ b/daemon/config/config_common_unix.go @@ -11,8 +11,6 @@ import ( // CommonUnixConfig defines configuration of a docker daemon that is // common across Unix platforms. type CommonUnixConfig struct { - ExecRoot string `json:"exec-root,omitempty"` - ContainerdAddr string `json:"containerd,omitempty"` Runtimes map[string]types.Runtime `json:"runtimes,omitempty"` DefaultRuntime string `json:"default-runtime,omitempty"` DefaultInitBinary string `json:"default-init,omitempty"` diff --git a/daemon/daemon.go b/daemon/daemon.go index fb6ac1feb3..ece1f2a8fe 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -18,7 +18,7 @@ import ( "sync" "time" - containerd "github.com/containerd/containerd/api/grpc/types" + "github.com/docker/docker/api/errdefs" "github.com/docker/docker/api/types" containertypes "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/swarm" @@ -62,11 +62,10 @@ import ( "github.com/pkg/errors" ) -var ( - // DefaultRuntimeBinary is the default runtime to be used by - // containerd if none is specified - DefaultRuntimeBinary = "docker-runc" +// MainNamespace is the name of the namespace used for users containers +const MainNamespace = "moby" +var ( errSystemNotSupported = errors.New("the Docker daemon is not supported on this platform") ) @@ -170,7 +169,7 @@ func (daemon *Daemon) restore() error { continue } container.RWLayer = rwlayer - logrus.Debugf("Loaded container %v", container.ID) + logrus.Debugf("Loaded container %v, isRunning: %v", container.ID, container.IsRunning()) containers[container.ID] = container } else { @@ -209,8 +208,10 @@ func (daemon *Daemon) restore() error { } } - var wg sync.WaitGroup - var mapLock sync.Mutex + var ( + wg sync.WaitGroup + mapLock sync.Mutex + ) for _, c := range containers { wg.Add(1) go func(c *container.Container) { @@ -221,11 +222,74 @@ func (daemon *Daemon) restore() error { } daemon.setStateCounter(c) + + logrus.WithFields(logrus.Fields{ + "container": c.ID, + "running": c.IsRunning(), + "paused": c.IsPaused(), + }).Debug("restoring container") + + var ( + err error + alive bool + ec uint32 + exitedAt time.Time + ) + + alive, _, err = daemon.containerd.Restore(context.Background(), c.ID, c.InitializeStdio) + if err != nil && !errdefs.IsNotFound(err) { + logrus.Errorf("Failed to restore container %s with containerd: %s", c.ID, err) + return + } + if !alive { + ec, exitedAt, err = daemon.containerd.DeleteTask(context.Background(), c.ID) + if err != nil && !errdefs.IsNotFound(err) { + logrus.WithError(err).Errorf("Failed to delete container %s from containerd", c.ID) + return + } + } + if c.IsRunning() || c.IsPaused() { c.RestartManager().Cancel() // manually start containers because some need to wait for swarm networking - if err := daemon.containerd.Restore(c.ID, c.InitializeStdio); err != nil { - logrus.Errorf("Failed to restore %s with containerd: %s", c.ID, err) - return + + if c.IsPaused() && alive { + s, err := daemon.containerd.Status(context.Background(), c.ID) + if err != nil { + logrus.WithError(err).WithField("container", c.ID). + Errorf("Failed to get container status") + } else { + logrus.WithField("container", c.ID).WithField("state", s). + Info("restored container paused") + switch s { + case libcontainerd.StatusPaused, libcontainerd.StatusPausing: + // nothing to do + case libcontainerd.StatusStopped: + alive = false + case libcontainerd.StatusUnknown: + logrus.WithField("container", c.ID). + Error("Unknown status for container during restore") + default: + // running + c.Lock() + c.Paused = false + daemon.setStateCounter(c) + if err := c.CheckpointTo(daemon.containersReplica); err != nil { + logrus.WithError(err).WithField("container", c.ID). + Error("Failed to update stopped container state") + } + c.Unlock() + } + } + } + + if !alive { + c.Lock() + c.SetStopped(&container.ExitStatus{ExitCode: int(ec), ExitedAt: exitedAt}) + daemon.Cleanup(c) + if err := c.CheckpointTo(daemon.containersReplica); err != nil { + logrus.Errorf("Failed to update stopped container %s state: %v", c.ID, err) + } + c.Unlock() } // we call Mount and then Unmount to get BaseFs of the container @@ -253,11 +317,9 @@ func (daemon *Daemon) restore() error { activeSandboxes[c.NetworkSettings.SandboxID] = options mapLock.Unlock() } + } else { + // get list of containers we need to restart - } - // fixme: only if not running - // get list of containers we need to restart - if !c.IsRunning() && !c.IsPaused() { // Do not autostart containers which // has endpoints in a swarm scope // network yet since the cluster is @@ -289,7 +351,7 @@ func (daemon *Daemon) restore() error { c.RemovalInProgress = false c.Dead = true if err := c.CheckpointTo(daemon.containersReplica); err != nil { - logrus.Errorf("Failed to update container %s state: %v", c.ID, err) + logrus.Errorf("Failed to update RemovalInProgress container %s state: %v", c.ID, err) } } c.Unlock() @@ -559,6 +621,7 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe d := &Daemon{ configStore: config, + PluginStore: pluginStore, startupDone: make(chan struct{}), } // Ensure the daemon is properly shutdown if there is a failure during @@ -606,6 +669,16 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe return nil, err } + // Create the directory where we'll store the runtime scripts (i.e. in + // order to support runtimeArgs) + daemonRuntimes := filepath.Join(config.Root, "runtimes") + if err := system.MkdirAll(daemonRuntimes, 0700, ""); err != nil && !os.IsExist(err) { + return nil, err + } + if err := d.loadRuntimes(); err != nil { + return nil, err + } + if runtime.GOOS == "windows" { if err := system.MkdirAll(filepath.Join(config.Root, "credentialspecs"), 0, ""); err != nil && !os.IsExist(err) { return nil, err @@ -635,7 +708,6 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe } d.RegistryService = registryService - d.PluginStore = pluginStore logger.RegisterPluginGetter(d.PluginStore) metricsSockPath, err := d.listenMetricsSock() @@ -645,7 +717,7 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe registerMetricsPluginCallback(d.PluginStore, metricsSockPath) createPluginExec := func(m *plugin.Manager) (plugin.Executor, error) { - return pluginexec.New(containerdRemote, m) + return pluginexec.New(getPluginExecRoot(config.Root), containerdRemote, m) } // Plugin system initialization should happen before restore. Do not change order. @@ -802,13 +874,13 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe d.idMappings = idMappings d.seccompEnabled = sysInfo.Seccomp d.apparmorEnabled = sysInfo.AppArmor + d.containerdRemote = containerdRemote d.linkIndex = newLinkIndex() - d.containerdRemote = containerdRemote go d.execCommandGC() - d.containerd, err = containerdRemote.Client(d) + d.containerd, err = containerdRemote.NewClient(MainNamespace, d) if err != nil { return nil, err } @@ -1171,19 +1243,6 @@ func (daemon *Daemon) networkOptions(dconfig *config.Config, pg plugingetter.Plu return options, nil } -func copyBlkioEntry(entries []*containerd.BlkioStatsEntry) []types.BlkioStatEntry { - out := make([]types.BlkioStatEntry, len(entries)) - for i, re := range entries { - out[i] = types.BlkioStatEntry{ - Major: re.Major, - Minor: re.Minor, - Op: re.Op, - Value: re.Value, - } - } - return out -} - // GetCluster returns the cluster func (daemon *Daemon) GetCluster() Cluster { return daemon.cluster diff --git a/daemon/daemon_unix.go b/daemon/daemon_unix.go index 2b0e206629..b0b624772a 100644 --- a/daemon/daemon_unix.go +++ b/daemon/daemon_unix.go @@ -5,6 +5,7 @@ package daemon import ( "bufio" "bytes" + "context" "fmt" "io/ioutil" "net" @@ -16,6 +17,7 @@ import ( "strings" "time" + containerd_cgroups "github.com/containerd/cgroups" "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/blkiodev" pblkiodev "github.com/docker/docker/api/types/blkiodev" @@ -26,6 +28,7 @@ import ( "github.com/docker/docker/opts" "github.com/docker/docker/pkg/containerfs" "github.com/docker/docker/pkg/idtools" + "github.com/docker/docker/pkg/ioutils" "github.com/docker/docker/pkg/parsers" "github.com/docker/docker/pkg/parsers/kernel" "github.com/docker/docker/pkg/sysinfo" @@ -38,7 +41,6 @@ import ( "github.com/docker/libnetwork/netutils" "github.com/docker/libnetwork/options" lntypes "github.com/docker/libnetwork/types" - "github.com/golang/protobuf/ptypes" "github.com/opencontainers/runc/libcontainer/cgroups" rsystem "github.com/opencontainers/runc/libcontainer/system" specs "github.com/opencontainers/runtime-spec/specs-go" @@ -50,6 +52,14 @@ import ( ) const ( + // DefaultShimBinary is the default shim to be used by containerd if none + // is specified + DefaultShimBinary = "docker-containerd-shim" + + // DefaultRuntimeBinary is the default runtime to be used by + // containerd if none is specified + DefaultRuntimeBinary = "docker-runc" + // See https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/tree/kernel/sched/sched.h?id=8cd9234c64c584432f6992fe944ca9e46ca8ea76#n269 linuxMinCPUShares = 2 linuxMaxCPUShares = 262144 @@ -63,6 +73,10 @@ const ( // constant for cgroup drivers cgroupFsDriver = "cgroupfs" cgroupSystemdDriver = "systemd" + + // DefaultRuntimeName is the default runtime to be used by + // containerd if none is specified + DefaultRuntimeName = "docker-runc" ) type containerGetter interface { @@ -623,6 +637,54 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes. return warnings, nil } +func (daemon *Daemon) loadRuntimes() error { + return daemon.initRuntimes(daemon.configStore.Runtimes) +} + +func (daemon *Daemon) initRuntimes(runtimes map[string]types.Runtime) (err error) { + runtimeDir := filepath.Join(daemon.configStore.Root, "runtimes") + // Remove old temp directory if any + os.RemoveAll(runtimeDir + "-old") + tmpDir, err := ioutils.TempDir(daemon.configStore.Root, "gen-runtimes") + if err != nil { + return errors.Wrapf(err, "failed to get temp dir to generate runtime scripts") + } + defer func() { + if err != nil { + if err1 := os.RemoveAll(tmpDir); err1 != nil { + logrus.WithError(err1).WithField("dir", tmpDir). + Warnf("failed to remove tmp dir") + } + return + } + + if err = os.Rename(runtimeDir, runtimeDir+"-old"); err != nil { + return + } + if err = os.Rename(tmpDir, runtimeDir); err != nil { + err = errors.Wrapf(err, "failed to setup runtimes dir, new containers may not start") + return + } + if err = os.RemoveAll(runtimeDir + "-old"); err != nil { + logrus.WithError(err).WithField("dir", tmpDir). + Warnf("failed to remove old runtimes dir") + } + }() + + for name, rt := range runtimes { + if len(rt.Args) == 0 { + continue + } + + script := filepath.Join(tmpDir, name) + content := fmt.Sprintf("#!/bin/sh\n%s %s $@\n", rt.Path, strings.Join(rt.Args, " ")) + if err := ioutil.WriteFile(script, []byte(content), 0700); err != nil { + return err + } + } + return nil +} + // reloadPlatform updates configuration with platform specific options // and updates the passed attributes func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string]string) error { @@ -631,9 +693,12 @@ func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string] } if conf.IsValueSet("runtimes") { - daemon.configStore.Runtimes = conf.Runtimes // Always set the default one - daemon.configStore.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary} + conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary} + if err := daemon.initRuntimes(conf.Runtimes); err != nil { + return err + } + daemon.configStore.Runtimes = conf.Runtimes } if conf.DefaultRuntime != "" { @@ -692,7 +757,7 @@ func verifyDaemonSettings(conf *config.Config) error { if conf.Runtimes == nil { conf.Runtimes = make(map[string]types.Runtime) } - conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary} + conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeName} return nil } @@ -1214,11 +1279,24 @@ func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container return daemon.Unmount(container) } +func copyBlkioEntry(entries []*containerd_cgroups.BlkIOEntry) []types.BlkioStatEntry { + out := make([]types.BlkioStatEntry, len(entries)) + for i, re := range entries { + out[i] = types.BlkioStatEntry{ + Major: re.Major, + Minor: re.Minor, + Op: re.Op, + Value: re.Value, + } + } + return out +} + func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) { if !c.IsRunning() { return nil, errNotRunning(c.ID) } - stats, err := daemon.containerd.Stats(c.ID) + cs, err := daemon.containerd.Stats(context.Background(), c.ID) if err != nil { if strings.Contains(err.Error(), "container not found") { return nil, containerNotFound(c.ID) @@ -1226,54 +1304,98 @@ func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) { return nil, err } s := &types.StatsJSON{} - cgs := stats.CgroupStats - if cgs != nil { + s.Read = cs.Read + stats := cs.Metrics + if stats.Blkio != nil { s.BlkioStats = types.BlkioStats{ - IoServiceBytesRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceBytesRecursive), - IoServicedRecursive: copyBlkioEntry(cgs.BlkioStats.IoServicedRecursive), - IoQueuedRecursive: copyBlkioEntry(cgs.BlkioStats.IoQueuedRecursive), - IoServiceTimeRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceTimeRecursive), - IoWaitTimeRecursive: copyBlkioEntry(cgs.BlkioStats.IoWaitTimeRecursive), - IoMergedRecursive: copyBlkioEntry(cgs.BlkioStats.IoMergedRecursive), - IoTimeRecursive: copyBlkioEntry(cgs.BlkioStats.IoTimeRecursive), - SectorsRecursive: copyBlkioEntry(cgs.BlkioStats.SectorsRecursive), + IoServiceBytesRecursive: copyBlkioEntry(stats.Blkio.IoServiceBytesRecursive), + IoServicedRecursive: copyBlkioEntry(stats.Blkio.IoServicedRecursive), + IoQueuedRecursive: copyBlkioEntry(stats.Blkio.IoQueuedRecursive), + IoServiceTimeRecursive: copyBlkioEntry(stats.Blkio.IoServiceTimeRecursive), + IoWaitTimeRecursive: copyBlkioEntry(stats.Blkio.IoWaitTimeRecursive), + IoMergedRecursive: copyBlkioEntry(stats.Blkio.IoMergedRecursive), + IoTimeRecursive: copyBlkioEntry(stats.Blkio.IoTimeRecursive), + SectorsRecursive: copyBlkioEntry(stats.Blkio.SectorsRecursive), } - cpu := cgs.CpuStats + } + if stats.CPU != nil { s.CPUStats = types.CPUStats{ CPUUsage: types.CPUUsage{ - TotalUsage: cpu.CpuUsage.TotalUsage, - PercpuUsage: cpu.CpuUsage.PercpuUsage, - UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode, - UsageInUsermode: cpu.CpuUsage.UsageInUsermode, + TotalUsage: stats.CPU.Usage.Total, + PercpuUsage: stats.CPU.Usage.PerCPU, + UsageInKernelmode: stats.CPU.Usage.Kernel, + UsageInUsermode: stats.CPU.Usage.User, }, ThrottlingData: types.ThrottlingData{ - Periods: cpu.ThrottlingData.Periods, - ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods, - ThrottledTime: cpu.ThrottlingData.ThrottledTime, + Periods: stats.CPU.Throttling.Periods, + ThrottledPeriods: stats.CPU.Throttling.ThrottledPeriods, + ThrottledTime: stats.CPU.Throttling.ThrottledTime, }, } - mem := cgs.MemoryStats.Usage - s.MemoryStats = types.MemoryStats{ - Usage: mem.Usage, - MaxUsage: mem.MaxUsage, - Stats: cgs.MemoryStats.Stats, - Failcnt: mem.Failcnt, - Limit: mem.Limit, - } - // if the container does not set memory limit, use the machineMemory - if mem.Limit > daemon.machineMemory && daemon.machineMemory > 0 { - s.MemoryStats.Limit = daemon.machineMemory - } - if cgs.PidsStats != nil { - s.PidsStats = types.PidsStats{ - Current: cgs.PidsStats.Current, + } + + if stats.Memory != nil { + raw := make(map[string]uint64) + raw["cache"] = stats.Memory.Cache + raw["rss"] = stats.Memory.RSS + raw["rss_huge"] = stats.Memory.RSSHuge + raw["mapped_file"] = stats.Memory.MappedFile + raw["dirty"] = stats.Memory.Dirty + raw["writeback"] = stats.Memory.Writeback + raw["pgpgin"] = stats.Memory.PgPgIn + raw["pgpgout"] = stats.Memory.PgPgOut + raw["pgfault"] = stats.Memory.PgFault + raw["pgmajfault"] = stats.Memory.PgMajFault + raw["inactive_anon"] = stats.Memory.InactiveAnon + raw["active_anon"] = stats.Memory.ActiveAnon + raw["inactive_file"] = stats.Memory.InactiveFile + raw["active_file"] = stats.Memory.ActiveFile + raw["unevictable"] = stats.Memory.Unevictable + raw["hierarchical_memory_limit"] = stats.Memory.HierarchicalMemoryLimit + raw["hierarchical_memsw_limit"] = stats.Memory.HierarchicalSwapLimit + raw["total_cache"] = stats.Memory.TotalCache + raw["total_rss"] = stats.Memory.TotalRSS + raw["total_rss_huge"] = stats.Memory.TotalRSSHuge + raw["total_mapped_file"] = stats.Memory.TotalMappedFile + raw["total_dirty"] = stats.Memory.TotalDirty + raw["total_writeback"] = stats.Memory.TotalWriteback + raw["total_pgpgin"] = stats.Memory.TotalPgPgIn + raw["total_pgpgout"] = stats.Memory.TotalPgPgOut + raw["total_pgfault"] = stats.Memory.TotalPgFault + raw["total_pgmajfault"] = stats.Memory.TotalPgMajFault + raw["total_inactive_anon"] = stats.Memory.TotalInactiveAnon + raw["total_active_anon"] = stats.Memory.TotalActiveAnon + raw["total_inactive_file"] = stats.Memory.TotalInactiveFile + raw["total_active_file"] = stats.Memory.TotalActiveFile + raw["total_unevictable"] = stats.Memory.TotalUnevictable + + if stats.Memory.Usage != nil { + s.MemoryStats = types.MemoryStats{ + Stats: raw, + Usage: stats.Memory.Usage.Usage, + MaxUsage: stats.Memory.Usage.Max, + Limit: stats.Memory.Usage.Limit, + Failcnt: stats.Memory.Usage.Failcnt, + } + } else { + s.MemoryStats = types.MemoryStats{ + Stats: raw, } } + + // if the container does not set memory limit, use the machineMemory + if s.MemoryStats.Limit > daemon.machineMemory && daemon.machineMemory > 0 { + s.MemoryStats.Limit = daemon.machineMemory + } } - s.Read, err = ptypes.Timestamp(stats.Timestamp) - if err != nil { - return nil, err + + if stats.Pids != nil { + s.PidsStats = types.PidsStats{ + Current: stats.Pids.Current, + Limit: stats.Pids.Limit, + } } + return s, nil } diff --git a/daemon/daemon_windows.go b/daemon/daemon_windows.go index 3c179ccc3e..a79ed4f071 100644 --- a/daemon/daemon_windows.go +++ b/daemon/daemon_windows.go @@ -1,6 +1,7 @@ package daemon import ( + "context" "fmt" "os" "path/filepath" @@ -532,7 +533,7 @@ func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) { } // Obtain the stats from HCS via libcontainerd - stats, err := daemon.containerd.Stats(c.ID) + stats, err := daemon.containerd.Stats(context.Background(), c.ID) if err != nil { if strings.Contains(err.Error(), "container not found") { return nil, containerNotFound(c.ID) @@ -542,49 +543,48 @@ func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) { // Start with an empty structure s := &types.StatsJSON{} - - // Populate the CPU/processor statistics - s.CPUStats = types.CPUStats{ - CPUUsage: types.CPUUsage{ - TotalUsage: stats.Processor.TotalRuntime100ns, - UsageInKernelmode: stats.Processor.RuntimeKernel100ns, - UsageInUsermode: stats.Processor.RuntimeKernel100ns, - }, - } - - // Populate the memory statistics - s.MemoryStats = types.MemoryStats{ - Commit: stats.Memory.UsageCommitBytes, - CommitPeak: stats.Memory.UsageCommitPeakBytes, - PrivateWorkingSet: stats.Memory.UsagePrivateWorkingSetBytes, - } - - // Populate the storage statistics - s.StorageStats = types.StorageStats{ - ReadCountNormalized: stats.Storage.ReadCountNormalized, - ReadSizeBytes: stats.Storage.ReadSizeBytes, - WriteCountNormalized: stats.Storage.WriteCountNormalized, - WriteSizeBytes: stats.Storage.WriteSizeBytes, - } - - // Populate the network statistics - s.Networks = make(map[string]types.NetworkStats) - - for _, nstats := range stats.Network { - s.Networks[nstats.EndpointId] = types.NetworkStats{ - RxBytes: nstats.BytesReceived, - RxPackets: nstats.PacketsReceived, - RxDropped: nstats.DroppedPacketsIncoming, - TxBytes: nstats.BytesSent, - TxPackets: nstats.PacketsSent, - TxDropped: nstats.DroppedPacketsOutgoing, - } - } - - // Set the timestamp - s.Stats.Read = stats.Timestamp + s.Stats.Read = stats.Read s.Stats.NumProcs = platform.NumProcs() + if stats.HCSStats != nil { + hcss := stats.HCSStats + // Populate the CPU/processor statistics + s.CPUStats = types.CPUStats{ + CPUUsage: types.CPUUsage{ + TotalUsage: hcss.Processor.TotalRuntime100ns, + UsageInKernelmode: hcss.Processor.RuntimeKernel100ns, + UsageInUsermode: hcss.Processor.RuntimeKernel100ns, + }, + } + + // Populate the memory statistics + s.MemoryStats = types.MemoryStats{ + Commit: hcss.Memory.UsageCommitBytes, + CommitPeak: hcss.Memory.UsageCommitPeakBytes, + PrivateWorkingSet: hcss.Memory.UsagePrivateWorkingSetBytes, + } + + // Populate the storage statistics + s.StorageStats = types.StorageStats{ + ReadCountNormalized: hcss.Storage.ReadCountNormalized, + ReadSizeBytes: hcss.Storage.ReadSizeBytes, + WriteCountNormalized: hcss.Storage.WriteCountNormalized, + WriteSizeBytes: hcss.Storage.WriteSizeBytes, + } + + // Populate the network statistics + s.Networks = make(map[string]types.NetworkStats) + for _, nstats := range hcss.Network { + s.Networks[nstats.EndpointId] = types.NetworkStats{ + RxBytes: nstats.BytesReceived, + RxPackets: nstats.PacketsReceived, + RxDropped: nstats.DroppedPacketsIncoming, + TxBytes: nstats.BytesSent, + TxPackets: nstats.PacketsSent, + TxDropped: nstats.DroppedPacketsOutgoing, + } + } + } return s, nil } @@ -664,3 +664,11 @@ func getRealPath(path string) (string, error) { } return fileutils.ReadSymlinkedDirectory(path) } + +func (daemon *Daemon) loadRuntimes() error { + return nil +} + +func (daemon *Daemon) initRuntimes(_ map[string]types.Runtime) error { + return nil +} diff --git a/daemon/delete.go b/daemon/delete.go index 3009400c09..6db08f38cd 100644 --- a/daemon/delete.go +++ b/daemon/delete.go @@ -141,6 +141,7 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo } container.SetRemoved() stateCtr.del(container.ID) + daemon.LogContainerEvent(container, "destroy") return nil } diff --git a/daemon/errors.go b/daemon/errors.go index 9a9d45598d..889261fa35 100644 --- a/daemon/errors.go +++ b/daemon/errors.go @@ -64,6 +64,11 @@ func errExecPaused(id string) error { return stateConflictError{cause} } +func errNotPaused(id string) error { + cause := errors.Errorf("Container %s is already paused", id) + return stateConflictError{cause} +} + type nameConflictError struct { id string name string diff --git a/daemon/exec.go b/daemon/exec.go index 9b3e583bf9..afdfc9c2bf 100644 --- a/daemon/exec.go +++ b/daemon/exec.go @@ -13,10 +13,10 @@ import ( "github.com/docker/docker/container" "github.com/docker/docker/container/stream" "github.com/docker/docker/daemon/exec" - "github.com/docker/docker/libcontainerd" "github.com/docker/docker/pkg/pools" "github.com/docker/docker/pkg/signal" "github.com/docker/docker/pkg/term" + specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" "github.com/sirupsen/logrus" ) @@ -31,6 +31,14 @@ func (d *Daemon) registerExecCommand(container *container.Container, config *exe d.execCommands.Add(config.ID, config) } +func (d *Daemon) registerExecPidUnlocked(container *container.Container, config *exec.Config) { + logrus.Debugf("registering pid %v for exec %v", config.Pid, config.ID) + // Storing execs in container in order to kill them gracefully whenever the container is stopped or removed. + container.ExecCommands.SetPidUnlocked(config.ID, config.Pid) + // Storing execs in daemon for easy access via Engine API. + d.execCommands.SetPidUnlocked(config.ID, config.Pid) +} + // ExecExists looks up the exec instance and returns a bool if it exists or not. // It will also return the error produced by `getConfig` func (d *Daemon) ExecExists(name string) (bool, error) { @@ -70,8 +78,8 @@ func (d *Daemon) getExecConfig(name string) (*exec.Config, error) { } func (d *Daemon) unregisterExecCommand(container *container.Container, execConfig *exec.Config) { - container.ExecCommands.Delete(execConfig.ID) - d.execCommands.Delete(execConfig.ID) + container.ExecCommands.Delete(execConfig.ID, execConfig.Pid) + d.execCommands.Delete(execConfig.ID, execConfig.Pid) } func (d *Daemon) getActiveContainer(name string) (*container.Container, error) { @@ -181,7 +189,7 @@ func (d *Daemon) ContainerExecStart(ctx context.Context, name string, stdin io.R logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err) } ec.Unlock() - c.ExecCommands.Delete(ec.ID) + c.ExecCommands.Delete(ec.ID, ec.Pid) } }() @@ -207,13 +215,17 @@ func (d *Daemon) ContainerExecStart(ctx context.Context, name string, stdin io.R ec.StreamConfig.NewNopInputPipe() } - p := libcontainerd.Process{ + p := &specs.Process{ Args: append([]string{ec.Entrypoint}, ec.Args...), Env: ec.Env, Terminal: ec.Tty, + Cwd: c.Config.WorkingDir, + } + if p.Cwd == "" { + p.Cwd = "/" } - if err := execSetPlatformOpt(c, ec, &p); err != nil { + if err := d.execSetPlatformOpt(c, ec, p); err != nil { return err } @@ -231,22 +243,28 @@ func (d *Daemon) ContainerExecStart(ctx context.Context, name string, stdin io.R ec.StreamConfig.AttachStreams(&attachConfig) attachErr := ec.StreamConfig.CopyStreams(ctx, &attachConfig) - systemPid, err := d.containerd.AddProcess(ctx, c.ID, name, p, ec.InitializeStdio) + // Synchronize with libcontainerd event loop + ec.Lock() + c.ExecCommands.Lock() + systemPid, err := d.containerd.Exec(ctx, c.ID, ec.ID, p, cStdin != nil, ec.InitializeStdio) if err != nil { + c.ExecCommands.Unlock() + ec.Unlock() return translateContainerdStartErr(ec.Entrypoint, ec.SetExitCode, err) } - ec.Lock() ec.Pid = systemPid + d.registerExecPidUnlocked(c, ec) + c.ExecCommands.Unlock() ec.Unlock() select { case <-ctx.Done(): logrus.Debugf("Sending TERM signal to process %v in container %v", name, c.ID) - d.containerd.SignalProcess(c.ID, name, int(signal.SignalMap["TERM"])) + d.containerd.SignalProcess(ctx, c.ID, name, int(signal.SignalMap["TERM"])) select { case <-time.After(termProcessTimeout * time.Second): logrus.Infof("Container %v, process %v failed to exit within %d seconds of signal TERM - using the force", c.ID, name, termProcessTimeout) - d.containerd.SignalProcess(c.ID, name, int(signal.SignalMap["KILL"])) + d.containerd.SignalProcess(ctx, c.ID, name, int(signal.SignalMap["KILL"])) case <-attachErr: // TERM signal worked } @@ -273,7 +291,7 @@ func (d *Daemon) execCommandGC() { for id, config := range d.execCommands.Commands() { if config.CanRemove { cleaned++ - d.execCommands.Delete(id) + d.execCommands.Delete(id, config.Pid) } else { if _, exists := liveExecCommands[id]; !exists { config.CanRemove = true diff --git a/daemon/exec/exec.go b/daemon/exec/exec.go index f4efb4d54e..7aa2383e32 100644 --- a/daemon/exec/exec.go +++ b/daemon/exec/exec.go @@ -4,6 +4,7 @@ import ( "runtime" "sync" + "github.com/containerd/containerd" "github.com/docker/docker/container/stream" "github.com/docker/docker/libcontainerd" "github.com/docker/docker/pkg/stringid" @@ -42,8 +43,26 @@ func NewConfig() *Config { } } +type cio struct { + containerd.IO + + sc *stream.Config +} + +func (i *cio) Close() error { + i.IO.Close() + + return i.sc.CloseStreams() +} + +func (i *cio) Wait() { + i.sc.Wait() + + i.IO.Wait() +} + // InitializeStdio is called by libcontainerd to connect the stdio. -func (c *Config) InitializeStdio(iop libcontainerd.IOPipe) error { +func (c *Config) InitializeStdio(iop *libcontainerd.IOPipe) (containerd.IO, error) { c.StreamConfig.CopyToPipe(iop) if c.StreamConfig.Stdin() == nil && !c.Tty && runtime.GOOS == "windows" { @@ -54,7 +73,7 @@ func (c *Config) InitializeStdio(iop libcontainerd.IOPipe) error { } } - return nil + return &cio{IO: iop, sc: c.StreamConfig}, nil } // CloseStreams closes the stdio streams for the exec @@ -69,45 +88,66 @@ func (c *Config) SetExitCode(code int) { // Store keeps track of the exec configurations. type Store struct { - commands map[string]*Config + byID map[string]*Config + byPid map[int]*Config sync.RWMutex } // NewStore initializes a new exec store. func NewStore() *Store { - return &Store{commands: make(map[string]*Config)} + return &Store{ + byID: make(map[string]*Config), + byPid: make(map[int]*Config), + } } // Commands returns the exec configurations in the store. func (e *Store) Commands() map[string]*Config { e.RLock() - commands := make(map[string]*Config, len(e.commands)) - for id, config := range e.commands { - commands[id] = config + byID := make(map[string]*Config, len(e.byID)) + for id, config := range e.byID { + byID[id] = config } e.RUnlock() - return commands + return byID } // Add adds a new exec configuration to the store. func (e *Store) Add(id string, Config *Config) { e.Lock() - e.commands[id] = Config + e.byID[id] = Config e.Unlock() } +// SetPidUnlocked adds an association between a Pid and a config, it does not +// synchronized with other operations. +func (e *Store) SetPidUnlocked(id string, pid int) { + if config, ok := e.byID[id]; ok { + e.byPid[pid] = config + } +} + // Get returns an exec configuration by its id. func (e *Store) Get(id string) *Config { e.RLock() - res := e.commands[id] + res := e.byID[id] + e.RUnlock() + return res +} + +// ByPid returns an exec configuration by its pid. +func (e *Store) ByPid(pid int) *Config { + e.RLock() + res := e.byPid[pid] e.RUnlock() return res } // Delete removes an exec configuration from the store. -func (e *Store) Delete(id string) { +func (e *Store) Delete(id string, pid int) { e.Lock() - delete(e.commands, id) + delete(e.byPid, pid) + delete(e.byID, id) e.Unlock() } @@ -115,7 +155,7 @@ func (e *Store) Delete(id string) { func (e *Store) List() []string { var IDs []string e.RLock() - for id := range e.commands { + for id := range e.byID { IDs = append(IDs, id) } e.RUnlock() diff --git a/daemon/exec_linux.go b/daemon/exec_linux.go index bb11c11e44..525ce01050 100644 --- a/daemon/exec_linux.go +++ b/daemon/exec_linux.go @@ -4,25 +4,30 @@ import ( "github.com/docker/docker/container" "github.com/docker/docker/daemon/caps" "github.com/docker/docker/daemon/exec" - "github.com/docker/docker/libcontainerd" "github.com/opencontainers/runc/libcontainer/apparmor" "github.com/opencontainers/runtime-spec/specs-go" ) -func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainerd.Process) error { +func (daemon *Daemon) execSetPlatformOpt(c *container.Container, ec *exec.Config, p *specs.Process) error { if len(ec.User) > 0 { uid, gid, additionalGids, err := getUser(c, ec.User) if err != nil { return err } - p.User = &specs.User{ + p.User = specs.User{ UID: uid, GID: gid, AdditionalGids: additionalGids, } } if ec.Privileged { - p.Capabilities = caps.GetAllCapabilities() + if p.Capabilities == nil { + p.Capabilities = &specs.LinuxCapabilities{} + } + p.Capabilities.Bounding = caps.GetAllCapabilities() + p.Capabilities.Permitted = p.Capabilities.Bounding + p.Capabilities.Inheritable = p.Capabilities.Bounding + p.Capabilities.Effective = p.Capabilities.Bounding } if apparmor.IsEnabled() { var appArmorProfile string @@ -46,5 +51,6 @@ func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainer } } } + daemon.setRlimits(&specs.Spec{Process: p}, c) return nil } diff --git a/daemon/exec_solaris.go b/daemon/exec_solaris.go index 7003355d91..7c1fc20a0c 100644 --- a/daemon/exec_solaris.go +++ b/daemon/exec_solaris.go @@ -3,9 +3,9 @@ package daemon import ( "github.com/docker/docker/container" "github.com/docker/docker/daemon/exec" - "github.com/docker/docker/libcontainerd" + specs "github.com/opencontainers/runtime-spec/specs-go" ) -func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainerd.Process) error { +func (daemon *Daemon) execSetPlatformOpt(_ *container.Container, _ *exec.Config, _ *specs.Process) error { return nil } diff --git a/daemon/exec_windows.go b/daemon/exec_windows.go index 03246d91cd..d8754eb18d 100644 --- a/daemon/exec_windows.go +++ b/daemon/exec_windows.go @@ -3,10 +3,10 @@ package daemon import ( "github.com/docker/docker/container" "github.com/docker/docker/daemon/exec" - "github.com/docker/docker/libcontainerd" + specs "github.com/opencontainers/runtime-spec/specs-go" ) -func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainerd.Process) error { +func (daemon *Daemon) execSetPlatformOpt(c *container.Container, ec *exec.Config, p *specs.Process) error { // Process arguments need to be escaped before sending to OCI. if c.OS == "windows" { p.Args = escapeArgs(p.Args) diff --git a/daemon/info_unix.go b/daemon/info_unix.go index f43af6274f..fd2bbb45c3 100644 --- a/daemon/info_unix.go +++ b/daemon/info_unix.go @@ -3,7 +3,6 @@ package daemon import ( - "context" "os/exec" "strings" @@ -28,16 +27,8 @@ func (daemon *Daemon) FillPlatformInfo(v *types.Info, sysInfo *sysinfo.SysInfo) v.DefaultRuntime = daemon.configStore.GetDefaultRuntimeName() v.InitBinary = daemon.configStore.GetInitPath() - v.ContainerdCommit.Expected = dockerversion.ContainerdCommitID - if sv, err := daemon.containerd.GetServerVersion(context.Background()); err == nil { - v.ContainerdCommit.ID = sv.Revision - } else { - logrus.Warnf("failed to retrieve containerd version: %v", err) - v.ContainerdCommit.ID = "N/A" - } - v.RuncCommit.Expected = dockerversion.RuncCommitID - defaultRuntimeBinary := daemon.configStore.GetRuntime(daemon.configStore.GetDefaultRuntimeName()).Path + defaultRuntimeBinary := daemon.configStore.GetRuntime(v.DefaultRuntime).Path if rv, err := exec.Command(defaultRuntimeBinary, "--version").Output(); err == nil { parts := strings.Split(strings.TrimSpace(string(rv)), "\n") if len(parts) == 3 { @@ -56,6 +47,24 @@ func (daemon *Daemon) FillPlatformInfo(v *types.Info, sysInfo *sysinfo.SysInfo) v.RuncCommit.ID = "N/A" } + v.ContainerdCommit.Expected = dockerversion.ContainerdCommitID + if rv, err := exec.Command("docker-containerd", "--version").Output(); err == nil { + parts := strings.Split(strings.TrimSpace(string(rv)), " ") + if len(parts) == 3 { + v.ContainerdCommit.ID = parts[2] + } + switch { + case v.ContainerdCommit.ID == "": + logrus.Warnf("failed to retrieve docker-containerd version: unknown format", string(rv)) + v.ContainerdCommit.ID = "N/A" + case strings.HasSuffix(v.ContainerdCommit.ID, "-g"+v.ContainerdCommit.ID[len(v.ContainerdCommit.ID)-7:]): + v.ContainerdCommit.ID = v.ContainerdCommit.Expected + } + } else { + logrus.Warnf("failed to retrieve docker-containerd version: %v", err) + v.ContainerdCommit.ID = "N/A" + } + defaultInitBinary := daemon.configStore.GetInitPath() if rv, err := exec.Command(defaultInitBinary, "--version").Output(); err == nil { ver, err := parseInitVersion(string(rv)) diff --git a/daemon/kill.go b/daemon/kill.go index bb3e87cae3..a230eaa76e 100644 --- a/daemon/kill.go +++ b/daemon/kill.go @@ -9,6 +9,7 @@ import ( "time" containerpkg "github.com/docker/docker/container" + "github.com/docker/docker/libcontainerd" "github.com/docker/docker/pkg/signal" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -108,7 +109,7 @@ func (daemon *Daemon) killWithSignal(container *containerpkg.Container, sig int) if unpause { // above kill signal will be sent once resume is finished - if err := daemon.containerd.Resume(container.ID); err != nil { + if err := daemon.containerd.Resume(context.Background(), container.ID); err != nil { logrus.Warn("Cannot unpause container %s: %s", container.ID, err) } } @@ -177,5 +178,5 @@ func (daemon *Daemon) killPossiblyDeadProcess(container *containerpkg.Container, } func (daemon *Daemon) kill(c *containerpkg.Container, sig int) error { - return daemon.containerd.Signal(c.ID, sig) + return daemon.containerd.SignalProcess(context.Background(), c.ID, libcontainerd.InitProcessName, sig) } diff --git a/daemon/logger/plugin_unix.go b/daemon/logger/plugin_unix.go index f254c9c57d..f93d7af0ee 100644 --- a/daemon/logger/plugin_unix.go +++ b/daemon/logger/plugin_unix.go @@ -6,8 +6,8 @@ import ( "context" "io" + "github.com/containerd/fifo" "github.com/pkg/errors" - "github.com/tonistiigi/fifo" "golang.org/x/sys/unix" ) diff --git a/daemon/monitor.go b/daemon/monitor.go index 3946e7aaec..c0a265dac5 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -1,6 +1,7 @@ package daemon import ( + "context" "errors" "fmt" "runtime" @@ -25,15 +26,15 @@ func (daemon *Daemon) setStateCounter(c *container.Container) { } } -// StateChanged updates daemon state changes from containerd -func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error { - c := daemon.containers.Get(id) - if c == nil { +// ProcessEvent is called by libcontainerd whenever an event occurs +func (daemon *Daemon) ProcessEvent(id string, e libcontainerd.EventType, ei libcontainerd.EventInfo) error { + c, err := daemon.GetContainer(id) + if c == nil || err != nil { return fmt.Errorf("no such container: %s", id) } - switch e.State { - case libcontainerd.StateOOM: + switch e { + case libcontainerd.EventOOM: // StateOOM is Linux specific and should never be hit on Windows if runtime.GOOS == "windows" { return errors.New("received StateOOM from libcontainerd on Windows. This should never happen") @@ -43,63 +44,72 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error { return err } daemon.LogContainerEvent(c, "oom") - case libcontainerd.StateExit: + case libcontainerd.EventExit: + if int(ei.Pid) == c.Pid { + _, _, err := daemon.containerd.DeleteTask(context.Background(), c.ID) + if err != nil { + logrus.WithError(err).Warnf("failed to delete container %s from containerd", c.ID) + } - c.Lock() - c.StreamConfig.Wait() - c.Reset(false) + c.Lock() + c.StreamConfig.Wait() + c.Reset(false) - // If daemon is being shutdown, don't let the container restart - restart, wait, err := c.RestartManager().ShouldRestart(e.ExitCode, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt)) - if err == nil && restart { - c.RestartCount++ - c.SetRestarting(platformConstructExitStatus(e)) - } else { - c.SetStopped(platformConstructExitStatus(e)) - defer daemon.autoRemove(c) - } + exitStatus := container.ExitStatus{ + ExitCode: int(ei.ExitCode), + ExitedAt: ei.ExitedAt, + OOMKilled: ei.OOMKilled, + } + restart, wait, err := c.RestartManager().ShouldRestart(ei.ExitCode, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt)) + if err == nil && restart { + c.RestartCount++ + c.SetRestarting(&exitStatus) + } else { + c.SetStopped(&exitStatus) + defer daemon.autoRemove(c) + } - // cancel healthcheck here, they will be automatically - // restarted if/when the container is started again - daemon.stopHealthchecks(c) - attributes := map[string]string{ - "exitCode": strconv.Itoa(int(e.ExitCode)), - } - daemon.LogContainerEventWithAttributes(c, "die", attributes) - daemon.Cleanup(c) + // cancel healthcheck here, they will be automatically + // restarted if/when the container is started again + daemon.stopHealthchecks(c) + attributes := map[string]string{ + "exitCode": strconv.Itoa(int(ei.ExitCode)), + } + daemon.LogContainerEventWithAttributes(c, "die", attributes) + daemon.Cleanup(c) - if err == nil && restart { - go func() { - err := <-wait - if err == nil { - // daemon.netController is initialized when daemon is restoring containers. - // But containerStart will use daemon.netController segment. - // So to avoid panic at startup process, here must wait util daemon restore done. - daemon.waitForStartupDone() - if err = daemon.containerStart(c, "", "", false); err != nil { - logrus.Debugf("failed to restart container: %+v", err) + if err == nil && restart { + go func() { + err := <-wait + if err == nil { + // daemon.netController is initialized when daemon is restoring containers. + // But containerStart will use daemon.netController segment. + // So to avoid panic at startup process, here must wait util daemon restore done. + daemon.waitForStartupDone() + if err = daemon.containerStart(c, "", "", false); err != nil { + logrus.Debugf("failed to restart container: %+v", err) + } } - } - if err != nil { - c.SetStopped(platformConstructExitStatus(e)) - defer daemon.autoRemove(c) - if err != restartmanager.ErrRestartCanceled { - logrus.Errorf("restartmanger wait error: %+v", err) + if err != nil { + c.SetStopped(&exitStatus) + defer daemon.autoRemove(c) + if err != restartmanager.ErrRestartCanceled { + logrus.Errorf("restartmanger wait error: %+v", err) + } } - } - }() + }() + } + + daemon.setStateCounter(c) + defer c.Unlock() + if err := c.CheckpointTo(daemon.containersReplica); err != nil { + return err + } + return daemon.postRunProcessing(c, ei) } - daemon.setStateCounter(c) - - defer c.Unlock() - if err := c.CheckpointTo(daemon.containersReplica); err != nil { - return err - } - return daemon.postRunProcessing(c, e) - case libcontainerd.StateExitProcess: - if execConfig := c.ExecCommands.Get(e.ProcessID); execConfig != nil { - ec := int(e.ExitCode) + if execConfig := c.ExecCommands.ByPid(int(ei.Pid)); execConfig != nil { + ec := int(ei.ExitCode) execConfig.Lock() defer execConfig.Unlock() execConfig.ExitCode = &ec @@ -111,42 +121,59 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error { // remove the exec command from the container's store only and not the // daemon's store so that the exec command can be inspected. - c.ExecCommands.Delete(execConfig.ID) + c.ExecCommands.Delete(execConfig.ID, execConfig.Pid) } else { - logrus.Warnf("Ignoring StateExitProcess for %v but no exec command found", e) + logrus.WithFields(logrus.Fields{ + "container": c.ID, + "exec-pid": ei.Pid, + }).Warnf("Ignoring Exit Event, no such exec command found") } - case libcontainerd.StateStart, libcontainerd.StateRestore: - // Container is already locked in this case - c.SetRunning(int(e.Pid), e.State == libcontainerd.StateStart) - c.HasBeenManuallyStopped = false - c.HasBeenStartedBefore = true - daemon.setStateCounter(c) + case libcontainerd.EventStart: + c.Lock() + defer c.Unlock() - daemon.initHealthMonitor(c) - if err := c.CheckpointTo(daemon.containersReplica); err != nil { - c.Reset(false) - return err + // This is here to handle start not generated by docker + if !c.Running { + c.SetRunning(int(ei.Pid), false) + c.HasBeenManuallyStopped = false + c.HasBeenStartedBefore = true + daemon.setStateCounter(c) + + daemon.initHealthMonitor(c) + + if err := c.CheckpointTo(daemon.containersReplica); err != nil { + return err + } + daemon.LogContainerEvent(c, "start") } - daemon.LogContainerEvent(c, "start") - case libcontainerd.StatePause: - // Container is already locked in this case - c.Paused = true - daemon.setStateCounter(c) - daemon.updateHealthMonitor(c) - if err := c.CheckpointTo(daemon.containersReplica); err != nil { - return err + case libcontainerd.EventPaused: + c.Lock() + defer c.Unlock() + + if !c.Paused { + c.Paused = true + daemon.setStateCounter(c) + daemon.updateHealthMonitor(c) + if err := c.CheckpointTo(daemon.containersReplica); err != nil { + return err + } + daemon.LogContainerEvent(c, "pause") } - daemon.LogContainerEvent(c, "pause") - case libcontainerd.StateResume: - // Container is already locked in this case - c.Paused = false - daemon.setStateCounter(c) - daemon.updateHealthMonitor(c) - if err := c.CheckpointTo(daemon.containersReplica); err != nil { - return err + case libcontainerd.EventResumed: + c.Lock() + defer c.Unlock() + + if c.Paused { + c.Paused = false + daemon.setStateCounter(c) + daemon.updateHealthMonitor(c) + + if err := c.CheckpointTo(daemon.containersReplica); err != nil { + return err + } + daemon.LogContainerEvent(c, "unpause") } - daemon.LogContainerEvent(c, "unpause") } return nil } diff --git a/daemon/monitor_linux.go b/daemon/monitor_linux.go index 09f5af50c6..0995758000 100644 --- a/daemon/monitor_linux.go +++ b/daemon/monitor_linux.go @@ -5,15 +5,7 @@ import ( "github.com/docker/docker/libcontainerd" ) -// platformConstructExitStatus returns a platform specific exit status structure -func platformConstructExitStatus(e libcontainerd.StateInfo) *container.ExitStatus { - return &container.ExitStatus{ - ExitCode: int(e.ExitCode), - OOMKilled: e.OOMKilled, - } -} - // postRunProcessing perfoms any processing needed on the container after it has stopped. -func (daemon *Daemon) postRunProcessing(container *container.Container, e libcontainerd.StateInfo) error { +func (daemon *Daemon) postRunProcessing(_ *container.Container, _ libcontainerd.EventInfo) error { return nil } diff --git a/daemon/monitor_solaris.go b/daemon/monitor_solaris.go index 5ccfada76a..0995758000 100644 --- a/daemon/monitor_solaris.go +++ b/daemon/monitor_solaris.go @@ -5,14 +5,7 @@ import ( "github.com/docker/docker/libcontainerd" ) -// platformConstructExitStatus returns a platform specific exit status structure -func platformConstructExitStatus(e libcontainerd.StateInfo) *container.ExitStatus { - return &container.ExitStatus{ - ExitCode: int(e.ExitCode), - } -} - // postRunProcessing perfoms any processing needed on the container after it has stopped. -func (daemon *Daemon) postRunProcessing(container *container.Container, e libcontainerd.StateInfo) error { +func (daemon *Daemon) postRunProcessing(_ *container.Container, _ libcontainerd.EventInfo) error { return nil } diff --git a/daemon/monitor_windows.go b/daemon/monitor_windows.go index 15d656de0e..dd5a09ada8 100644 --- a/daemon/monitor_windows.go +++ b/daemon/monitor_windows.go @@ -1,40 +1,52 @@ package daemon import ( - "fmt" + "context" "github.com/docker/docker/container" "github.com/docker/docker/libcontainerd" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" ) -// platformConstructExitStatus returns a platform specific exit status structure -func platformConstructExitStatus(e libcontainerd.StateInfo) *container.ExitStatus { - return &container.ExitStatus{ - ExitCode: int(e.ExitCode), - } -} - -// postRunProcessing perfoms any processing needed on the container after it has stopped. -func (daemon *Daemon) postRunProcessing(container *container.Container, e libcontainerd.StateInfo) error { - if e.ExitCode == 0 && e.UpdatePending { - spec, err := daemon.createSpec(container) +// postRunProcessing starts a servicing container if required +func (daemon *Daemon) postRunProcessing(c *container.Container, ei libcontainerd.EventInfo) error { + if ei.ExitCode == 0 && ei.UpdatePending { + spec, err := daemon.createSpec(c) if err != nil { return err } - // Turn on servicing spec.Windows.Servicing = true - copts, err := daemon.getLibcontainerdCreateOptions(container) + copts, err := daemon.getLibcontainerdCreateOptions(c) if err != nil { return err } - // Create a new servicing container, which will start, complete the update, and merge back the - // results if it succeeded, all as part of the below function call. - if err := daemon.containerd.Create((container.ID + "_servicing"), "", "", *spec, container.InitializeStdio, copts...); err != nil { - container.SetExitCode(-1) - return fmt.Errorf("Post-run update servicing failed: %s", err) + // Create a new servicing container, which will start, complete the + // update, and merge back the results if it succeeded, all as part of + // the below function call. + ctx := context.Background() + svcID := c.ID + "_servicing" + logger := logrus.WithField("container", svcID) + if err := daemon.containerd.Create(ctx, svcID, spec, copts); err != nil { + c.SetExitCode(-1) + return errors.Wrap(err, "post-run update servicing failed") + } + _, err = daemon.containerd.Start(ctx, svcID, "", false, nil) + if err != nil { + logger.WithError(err).Warn("failed to run servicing container") + if err := daemon.containerd.Delete(ctx, svcID); err != nil { + logger.WithError(err).Warn("failed to delete servicing container") + } + } else { + if _, _, err := daemon.containerd.DeleteTask(ctx, svcID); err != nil { + logger.WithError(err).Warn("failed to delete servicing container task") + } + if err := daemon.containerd.Delete(ctx, svcID); err != nil { + logger.WithError(err).Warn("failed to delete servicing container") + } } } return nil diff --git a/daemon/oci_linux.go b/daemon/oci_linux.go index 89ac627ff0..b4a6bf60d2 100644 --- a/daemon/oci_linux.go +++ b/daemon/oci_linux.go @@ -156,7 +156,7 @@ func setDevices(s *specs.Spec, c *container.Container) error { return nil } -func setRlimits(daemon *Daemon, s *specs.Spec, c *container.Container) error { +func (daemon *Daemon) setRlimits(s *specs.Spec, c *container.Container) error { var rlimits []specs.POSIXRlimit // We want to leave the original HostConfig alone so make a copy here @@ -755,6 +755,7 @@ func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) { if err := setResources(&s, c.HostConfig.Resources); err != nil { return nil, fmt.Errorf("linux runtime spec resources: %v", err) } + s.Process.OOMScoreAdj = &c.HostConfig.OomScoreAdj s.Linux.Sysctl = c.HostConfig.Sysctls p := s.Linux.CgroupsPath @@ -763,11 +764,11 @@ func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) { if err != nil { return nil, err } - p, _ = cgroups.GetOwnCgroup("cpu") + _, err = cgroups.GetOwnCgroup("cpu") if err != nil { return nil, err } - p = filepath.Join(initPath, p) + p = filepath.Join(initPath, s.Linux.CgroupsPath) } // Clean path to guard against things like ../../../BAD @@ -782,7 +783,7 @@ func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) { if err := setDevices(&s, c); err != nil { return nil, fmt.Errorf("linux runtime spec devices: %v", err) } - if err := setRlimits(daemon, &s, c); err != nil { + if err := daemon.setRlimits(&s, c); err != nil { return nil, fmt.Errorf("linux runtime spec rlimits: %v", err) } if err := setUser(&s, c); err != nil { diff --git a/daemon/pause.go b/daemon/pause.go index 3fecea59c9..b751cc4880 100644 --- a/daemon/pause.go +++ b/daemon/pause.go @@ -1,9 +1,11 @@ package daemon import ( + "context" "fmt" "github.com/docker/docker/container" + "github.com/sirupsen/logrus" ) // ContainerPause pauses a container @@ -33,7 +35,7 @@ func (daemon *Daemon) containerPause(container *container.Container) error { // We cannot Pause the container which is already paused if container.Paused { - return fmt.Errorf("Container %s is already paused", container.ID) + return errNotPaused(container.ID) } // We cannot Pause the container which is restarting @@ -41,9 +43,18 @@ func (daemon *Daemon) containerPause(container *container.Container) error { return errContainerIsRestarting(container.ID) } - if err := daemon.containerd.Pause(container.ID); err != nil { + if err := daemon.containerd.Pause(context.Background(), container.ID); err != nil { return fmt.Errorf("Cannot pause container %s: %s", container.ID, err) } + container.Paused = true + daemon.setStateCounter(container) + daemon.updateHealthMonitor(container) + daemon.LogContainerEvent(container, "pause") + + if err := container.CheckpointTo(daemon.containersReplica); err != nil { + logrus.WithError(err).Warn("could not save container to disk") + } + return nil } diff --git a/daemon/reload.go b/daemon/reload.go index a6674ec951..0d16bc8d02 100644 --- a/daemon/reload.go +++ b/daemon/reload.go @@ -6,7 +6,6 @@ import ( "github.com/docker/docker/daemon/config" "github.com/docker/docker/daemon/discovery" - "github.com/docker/docker/libcontainerd" "github.com/sirupsen/logrus" ) @@ -303,9 +302,6 @@ func (daemon *Daemon) reloadLiveRestore(conf *config.Config, attributes map[stri // update corresponding configuration if conf.IsValueSet("live-restore") { daemon.configStore.LiveRestoreEnabled = conf.LiveRestoreEnabled - if err := daemon.containerdRemote.UpdateOptions(libcontainerd.WithLiveRestore(conf.LiveRestoreEnabled)); err != nil { - return err - } } // prepare reload event attributes with updatable configurations diff --git a/daemon/resize.go b/daemon/resize.go index 0923d0fe12..a992a073a5 100644 --- a/daemon/resize.go +++ b/daemon/resize.go @@ -1,6 +1,7 @@ package daemon import ( + "context" "fmt" "github.com/docker/docker/libcontainerd" @@ -18,7 +19,7 @@ func (daemon *Daemon) ContainerResize(name string, height, width int) error { return errNotRunning(container.ID) } - if err = daemon.containerd.Resize(container.ID, libcontainerd.InitFriendlyName, width, height); err == nil { + if err = daemon.containerd.ResizeTerminal(context.Background(), container.ID, libcontainerd.InitProcessName, width, height); err == nil { attributes := map[string]string{ "height": fmt.Sprintf("%d", height), "width": fmt.Sprintf("%d", width), @@ -36,5 +37,5 @@ func (daemon *Daemon) ContainerExecResize(name string, height, width int) error if err != nil { return err } - return daemon.containerd.Resize(ec.ContainerID, ec.ID, width, height) + return daemon.containerd.ResizeTerminal(context.Background(), ec.ContainerID, ec.ID, width, height) } diff --git a/daemon/start.go b/daemon/start.go index ab8443c855..3b9f0f9f63 100644 --- a/daemon/start.go +++ b/daemon/start.go @@ -1,6 +1,7 @@ package daemon import ( + "context" "runtime" "time" @@ -113,6 +114,11 @@ func (daemon *Daemon) containerStart(container *container.Container, checkpoint return stateConflictError{errors.New("container is marked for removal and cannot be started")} } + if checkpointDir != "" { + // TODO(mlaventure): how would we support that? + return notAllowedError{errors.New("custom checkpointdir is not supported")} + } + // if we encounter an error during start we need to ensure that any other // setup has been cleaned up properly defer func() { @@ -152,28 +158,56 @@ func (daemon *Daemon) containerStart(container *container.Container, checkpoint return systemError{err} } - createOptions, err := daemon.getLibcontainerdCreateOptions(container) - if err != nil { - return err - } - if resetRestartManager { container.ResetRestartManager(true) } - if checkpointDir == "" { - checkpointDir = container.CheckpointDir() - } - if daemon.saveApparmorConfig(container); err != nil { return err } - if err := daemon.containerd.Create(container.ID, checkpoint, checkpointDir, *spec, container.InitializeStdio, createOptions...); err != nil { - return translateContainerdStartErr(container.Path, container.SetExitCode, err) - + if checkpoint != "" { + checkpointDir, err = getCheckpointDir(checkpointDir, checkpoint, container.Name, container.ID, container.CheckpointDir(), false) + if err != nil { + return err + } } + createOptions, err := daemon.getLibcontainerdCreateOptions(container) + if err != nil { + return err + } + + err = daemon.containerd.Create(context.Background(), container.ID, spec, createOptions) + if err != nil { + return translateContainerdStartErr(container.Path, container.SetExitCode, err) + } + + // TODO(mlaventure): we need to specify checkpoint options here + pid, err := daemon.containerd.Start(context.Background(), container.ID, checkpointDir, + container.StreamConfig.Stdin() != nil || container.Config.Tty, + container.InitializeStdio) + if err != nil { + if err := daemon.containerd.Delete(context.Background(), container.ID); err != nil { + logrus.WithError(err).WithField("container", container.ID). + Error("failed to delete failed start container") + } + return translateContainerdStartErr(container.Path, container.SetExitCode, err) + } + + container.SetRunning(pid, true) + container.HasBeenManuallyStopped = false + container.HasBeenStartedBefore = true + daemon.setStateCounter(container) + + daemon.initHealthMonitor(container) + + if err := container.CheckpointTo(daemon.containersReplica); err != nil { + logrus.WithError(err).WithField("container", container.ID). + Errorf("failed to store container") + } + + daemon.LogContainerEvent(container, "start") containerActions.WithValues("start").UpdateSince(start) return nil @@ -209,5 +243,10 @@ func (daemon *Daemon) Cleanup(container *container.Container) { logrus.Warnf("%s cleanup: Failed to umount volumes: %v", container.ID, err) } } + container.CancelAttachContext() + + if err := daemon.containerd.Delete(context.Background(), container.ID); err != nil { + logrus.Errorf("%s cleanup: failed to delete container from containerd: %v", container.ID, err) + } } diff --git a/daemon/start_unix.go b/daemon/start_unix.go index 87ab0850c2..a8402bb303 100644 --- a/daemon/start_unix.go +++ b/daemon/start_unix.go @@ -3,29 +3,54 @@ package daemon import ( + "fmt" + "os/exec" + "path/filepath" + + "github.com/containerd/containerd/linux/runcopts" "github.com/docker/docker/container" - "github.com/docker/docker/libcontainerd" "github.com/pkg/errors" ) -// getLibcontainerdCreateOptions callers must hold a lock on the container -func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) ([]libcontainerd.CreateOption, error) { - createOptions := []libcontainerd.CreateOption{} +func (daemon *Daemon) getRuntimeScript(container *container.Container) (string, error) { + name := container.HostConfig.Runtime + rt := daemon.configStore.GetRuntime(name) + if rt == nil { + return "", validationError{errors.Errorf("no such runtime '%s'", name)} + } + if len(rt.Args) > 0 { + // First check that the target exist, as using it in a script won't + // give us the right error + if _, err := exec.LookPath(rt.Path); err != nil { + return "", translateContainerdStartErr(container.Path, container.SetExitCode, err) + } + return filepath.Join(daemon.configStore.Root, "runtimes", name), nil + } + return rt.Path, nil +} + +// getLibcontainerdCreateOptions callers must hold a lock on the container +func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) (interface{}, error) { // Ensure a runtime has been assigned to this container if container.HostConfig.Runtime == "" { container.HostConfig.Runtime = daemon.configStore.GetDefaultRuntimeName() container.CheckpointTo(daemon.containersReplica) } - rt := daemon.configStore.GetRuntime(container.HostConfig.Runtime) - if rt == nil { - return nil, validationError{errors.Errorf("no such runtime '%s'", container.HostConfig.Runtime)} + path, err := daemon.getRuntimeScript(container) + if err != nil { + return nil, err } - if UsingSystemd(daemon.configStore) { - rt.Args = append(rt.Args, "--systemd-cgroup=true") + opts := &runcopts.RuncOptions{ + Runtime: path, + RuntimeRoot: filepath.Join(daemon.configStore.ExecRoot, + fmt.Sprintf("runtime-%s", container.HostConfig.Runtime)), } - createOptions = append(createOptions, libcontainerd.WithRuntime(rt.Path, rt.Args)) - return createOptions, nil + if UsingSystemd(daemon.configStore) { + opts.SystemdCgroup = true + } + + return opts, nil } diff --git a/daemon/start_windows.go b/daemon/start_windows.go index 3de6391eae..55588be6ca 100644 --- a/daemon/start_windows.go +++ b/daemon/start_windows.go @@ -3,12 +3,9 @@ package daemon import ( "github.com/Microsoft/opengcs/client" "github.com/docker/docker/container" - "github.com/docker/docker/libcontainerd" ) -func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) ([]libcontainerd.CreateOption, error) { - createOptions := []libcontainerd.CreateOption{} - +func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) (interface{}, error) { // LCOW options. if container.OS == "linux" { config := &client.Config{} @@ -33,11 +30,9 @@ func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Contain if err := config.Validate(); err != nil { return nil, err } - lcowOpts := &libcontainerd.LCOWOption{ - Config: config, - } - createOptions = append(createOptions, lcowOpts) + + return config, nil } - return createOptions, nil + return nil, nil } diff --git a/daemon/top_unix.go b/daemon/top_unix.go index 22e88b702e..cbb993f658 100644 --- a/daemon/top_unix.go +++ b/daemon/top_unix.go @@ -3,6 +3,7 @@ package daemon import ( + "context" "fmt" "os/exec" "regexp" @@ -50,16 +51,16 @@ func appendProcess2ProcList(procList *container.ContainerTopOKBody, fields []str procList.Processes = append(procList.Processes, process) } -func hasPid(pids []int, pid int) bool { - for _, i := range pids { - if i == pid { +func hasPid(procs []uint32, pid int) bool { + for _, p := range procs { + if int(p) == pid { return true } } return false } -func parsePSOutput(output []byte, pids []int) (*container.ContainerTopOKBody, error) { +func parsePSOutput(output []byte, procs []uint32) (*container.ContainerTopOKBody, error) { procList := &container.ContainerTopOKBody{} lines := strings.Split(string(output), "\n") @@ -101,7 +102,7 @@ func parsePSOutput(output []byte, pids []int) (*container.ContainerTopOKBody, er return nil, fmt.Errorf("Unexpected pid '%s': %s", fields[pidIndex], err) } - if hasPid(pids, p) { + if hasPid(procs, p) { preContainedPidFlag = true appendProcess2ProcList(procList, fields) continue @@ -138,7 +139,7 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*container.Conta return nil, errContainerIsRestarting(container.ID) } - pids, err := daemon.containerd.GetPidsForContainer(container.ID) + procs, err := daemon.containerd.ListPids(context.Background(), container.ID) if err != nil { return nil, err } @@ -147,7 +148,7 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*container.Conta if err != nil { return nil, fmt.Errorf("Error running ps: %v", err) } - procList, err := parsePSOutput(output, pids) + procList, err := parsePSOutput(output, procs) if err != nil { return nil, err } diff --git a/daemon/top_unix_test.go b/daemon/top_unix_test.go index 9a3749f711..4cc4a20700 100644 --- a/daemon/top_unix_test.go +++ b/daemon/top_unix_test.go @@ -36,7 +36,7 @@ func TestContainerTopValidatePSArgs(t *testing.T) { func TestContainerTopParsePSOutput(t *testing.T) { tests := []struct { output []byte - pids []int + pids []uint32 errExpected bool }{ {[]byte(` PID COMMAND @@ -44,26 +44,26 @@ func TestContainerTopParsePSOutput(t *testing.T) { 43 bar - - 100 baz -`), []int{42, 43}, false}, +`), []uint32{42, 43}, false}, {[]byte(` UID COMMAND 42 foo 43 bar - - 100 baz -`), []int{42, 43}, true}, +`), []uint32{42, 43}, true}, // unicode space (U+2003, 0xe2 0x80 0x83) {[]byte(` PID COMMAND 42 foo 43 bar - - 100 baz -`), []int{42, 43}, true}, +`), []uint32{42, 43}, true}, // the first space is U+2003, the second one is ascii. {[]byte(` PID COMMAND 42 foo 43 bar 100 baz -`), []int{42, 43}, true}, +`), []uint32{42, 43}, true}, } for _, f := range tests { diff --git a/daemon/top_windows.go b/daemon/top_windows.go index 000720b004..40828ffb81 100644 --- a/daemon/top_windows.go +++ b/daemon/top_windows.go @@ -1,6 +1,7 @@ package daemon import ( + "context" "errors" "fmt" "time" @@ -34,7 +35,15 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*containertypes. return nil, err } - s, err := daemon.containerd.Summary(container.ID) + if !container.IsRunning() { + return nil, errNotRunning(container.ID) + } + + if container.IsRestarting() { + return nil, errContainerIsRestarting(container.ID) + } + + s, err := daemon.containerd.Summary(context.Background(), container.ID) if err != nil { return nil, err } @@ -49,5 +58,6 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*containertypes. fmt.Sprintf("%02d:%02d:%02d.%03d", int(d.Hours()), int(d.Minutes())%60, int(d.Seconds())%60, int(d.Nanoseconds()/1000000)%1000), units.HumanSize(float64(j.MemoryWorkingSetPrivateBytes))}) } + return procList, nil } diff --git a/daemon/unpause.go b/daemon/unpause.go index e66b3868dc..2e41f2070b 100644 --- a/daemon/unpause.go +++ b/daemon/unpause.go @@ -1,9 +1,11 @@ package daemon import ( + "context" "fmt" "github.com/docker/docker/container" + "github.com/sirupsen/logrus" ) // ContainerUnpause unpauses a container @@ -30,9 +32,18 @@ func (daemon *Daemon) containerUnpause(container *container.Container) error { return fmt.Errorf("Container %s is not paused", container.ID) } - if err := daemon.containerd.Resume(container.ID); err != nil { + if err := daemon.containerd.Resume(context.Background(), container.ID); err != nil { return fmt.Errorf("Cannot unpause container %s: %s", container.ID, err) } + container.Paused = false + daemon.setStateCounter(container) + daemon.updateHealthMonitor(container) + daemon.LogContainerEvent(container, "unpause") + + if err := container.CheckpointTo(daemon.containersReplica); err != nil { + logrus.WithError(err).Warnf("could not save container to disk") + } + return nil } diff --git a/daemon/update.go b/daemon/update.go index c969ebb21c..0a79c199f3 100644 --- a/daemon/update.go +++ b/daemon/update.go @@ -1,6 +1,7 @@ package daemon import ( + "context" "fmt" "github.com/docker/docker/api/types/container" @@ -76,7 +77,7 @@ func (daemon *Daemon) update(name string, hostConfig *container.HostConfig) erro // If container is running (including paused), we need to update configs // to the real world. if container.IsRunning() && !container.IsRestarting() { - if err := daemon.containerd.UpdateResources(container.ID, toContainerdResources(hostConfig.Resources)); err != nil { + if err := daemon.containerd.UpdateResources(context.Background(), container.ID, toContainerdResources(hostConfig.Resources)); err != nil { restoreConfig = true // TODO: it would be nice if containerd responded with better errors here so we can classify this better. return errCannotUpdate(container.ID, systemError{err}) diff --git a/daemon/update_linux.go b/daemon/update_linux.go index c128967218..41d3b5324a 100644 --- a/daemon/update_linux.go +++ b/daemon/update_linux.go @@ -7,26 +7,43 @@ import ( "github.com/docker/docker/api/types/container" "github.com/docker/docker/libcontainerd" + specs "github.com/opencontainers/runtime-spec/specs-go" ) -func toContainerdResources(resources container.Resources) libcontainerd.Resources { +func toContainerdResources(resources container.Resources) *libcontainerd.Resources { var r libcontainerd.Resources - r.BlkioWeight = uint64(resources.BlkioWeight) - r.CpuShares = uint64(resources.CPUShares) + + r.BlockIO = &specs.LinuxBlockIO{ + Weight: &resources.BlkioWeight, + } + + shares := uint64(resources.CPUShares) + r.CPU = &specs.LinuxCPU{ + Shares: &shares, + Cpus: resources.CpusetCpus, + Mems: resources.CpusetMems, + } + + var ( + period uint64 + quota int64 + ) if resources.NanoCPUs != 0 { - r.CpuPeriod = uint64(100 * time.Millisecond / time.Microsecond) - r.CpuQuota = uint64(resources.NanoCPUs) * r.CpuPeriod / 1e9 - } else { - r.CpuPeriod = uint64(resources.CPUPeriod) - r.CpuQuota = uint64(resources.CPUQuota) + period = uint64(100 * time.Millisecond / time.Microsecond) + quota = resources.NanoCPUs * int64(period) / 1e9 } - r.CpusetCpus = resources.CpusetCpus - r.CpusetMems = resources.CpusetMems - r.MemoryLimit = uint64(resources.Memory) + r.CPU.Period = &period + r.CPU.Quota = "a + + r.Memory = &specs.LinuxMemory{ + Limit: &resources.Memory, + Reservation: &resources.MemoryReservation, + Kernel: &resources.KernelMemory, + } + if resources.MemorySwap > 0 { - r.MemorySwap = uint64(resources.MemorySwap) + r.Memory.Swap = &resources.MemorySwap } - r.MemoryReservation = uint64(resources.MemoryReservation) - r.KernelMemoryLimit = uint64(resources.KernelMemory) - return r + + return &r } diff --git a/daemon/update_windows.go b/daemon/update_windows.go index 01466260bb..4f85f41dda 100644 --- a/daemon/update_windows.go +++ b/daemon/update_windows.go @@ -7,7 +7,7 @@ import ( "github.com/docker/docker/libcontainerd" ) -func toContainerdResources(resources container.Resources) libcontainerd.Resources { - var r libcontainerd.Resources - return r +func toContainerdResources(resources container.Resources) *libcontainerd.Resources { + // We don't support update, so do nothing + return nil } diff --git a/hack/make/.go-autogen b/hack/make/.go-autogen index ec20180672..b68e3a7534 100644 --- a/hack/make/.go-autogen +++ b/hack/make/.go-autogen @@ -17,6 +17,7 @@ const ( Version string = "$VERSION" BuildTime string = "$BUILDTIME" IAmStatic string = "${IAMSTATIC:-true}" + ContainerdCommitID string = "${CONTAINERD_COMMIT}" ) // AUTOGENERATED FILE; see /go/src/github.com/docker/docker/hack/make/.go-autogen @@ -31,9 +32,8 @@ package dockerversion // Default build-time variable for library-import. // This file is overridden on build with build-time informations. const ( - ContainerdCommitID string = "${CONTAINERD_COMMIT}" - RuncCommitID string = "${RUNC_COMMIT}" - InitCommitID string = "${TINI_COMMIT}" + RuncCommitID string = "${RUNC_COMMIT}" + InitCommitID string = "${TINI_COMMIT}" ) // AUTOGENERATED FILE; see /go/src/github.com/docker/docker/hack/make/.go-autogen diff --git a/integration-cli/daemon/daemon.go b/integration-cli/daemon/daemon.go index 06bf504fa6..f6ad6559b2 100644 --- a/integration-cli/daemon/daemon.go +++ b/integration-cli/daemon/daemon.go @@ -222,7 +222,7 @@ func (d *Daemon) StartWithLogFile(out *os.File, providedArgs ...string) error { return errors.Wrapf(err, "[%s] could not find docker binary in $PATH", d.id) } args := append(d.GlobalFlags, - "--containerd", "/var/run/docker/libcontainerd/docker-containerd.sock", + "--containerd", "/var/run/docker/containerd/docker-containerd.sock", "--data-root", d.Root, "--exec-root", d.execRoot, "--pidfile", fmt.Sprintf("%s/docker.pid", d.Folder), @@ -457,6 +457,8 @@ out2: return err } + d.cmd.Wait() + if err := os.Remove(fmt.Sprintf("%s/docker.pid", d.Folder)); err != nil { return err } diff --git a/integration-cli/docker_api_stats_test.go b/integration-cli/docker_api_stats_test.go index 2e8515a3f6..7c9de1c083 100644 --- a/integration-cli/docker_api_stats_test.go +++ b/integration-cli/docker_api_stats_test.go @@ -285,7 +285,7 @@ func (s *DockerSuite) TestAPIStatsNoStreamConnectedContainers(c *check.C) { id2 := strings.TrimSpace(out2) c.Assert(waitRun(id2), checker.IsNil) - ch := make(chan error) + ch := make(chan error, 1) go func() { resp, body, err := request.Get(fmt.Sprintf("/containers/%s/stats?stream=false", id2)) defer body.Close() diff --git a/integration-cli/docker_cli_attach_test.go b/integration-cli/docker_cli_attach_test.go index db43beb7d2..353cb65e5d 100644 --- a/integration-cli/docker_cli_attach_test.go +++ b/integration-cli/docker_cli_attach_test.go @@ -147,7 +147,10 @@ func (s *DockerSuite) TestAttachDisconnect(c *check.C) { c.Assert(err, check.IsNil) defer stdout.Close() c.Assert(cmd.Start(), check.IsNil) - defer cmd.Process.Kill() + defer func() { + cmd.Process.Kill() + cmd.Wait() + }() _, err = stdin.Write([]byte("hello\n")) c.Assert(err, check.IsNil) diff --git a/integration-cli/docker_cli_build_unix_test.go b/integration-cli/docker_cli_build_unix_test.go index 91a329fae8..d857bd2f2c 100644 --- a/integration-cli/docker_cli_build_unix_test.go +++ b/integration-cli/docker_cli_build_unix_test.go @@ -149,6 +149,11 @@ func (s *DockerSuite) TestBuildCancellationKillsSleep(c *check.C) { if err := buildCmd.Start(); err != nil { c.Fatalf("failed to run build: %s", err) } + // always clean up + defer func() { + buildCmd.Process.Kill() + buildCmd.Wait() + }() matchCID := regexp.MustCompile("Running in (.+)") scanner := bufio.NewScanner(stdoutBuild) diff --git a/integration-cli/docker_cli_daemon_test.go b/integration-cli/docker_cli_daemon_test.go index ccf50543e9..a974b6fede 100644 --- a/integration-cli/docker_cli_daemon_test.go +++ b/integration-cli/docker_cli_daemon_test.go @@ -28,6 +28,7 @@ import ( "github.com/docker/docker/api" "github.com/docker/docker/api/types" "github.com/docker/docker/client" + moby_daemon "github.com/docker/docker/daemon" "github.com/docker/docker/integration-cli/checker" "github.com/docker/docker/integration-cli/cli" "github.com/docker/docker/integration-cli/daemon" @@ -1448,7 +1449,8 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonAndContainerKill(c *chec c.Assert(strings.Contains(string(mountOut), id), check.Equals, true, comment) // kill the container - icmd.RunCommand(ctrBinary, "--address", "unix:///var/run/docker/libcontainerd/docker-containerd.sock", "containers", "kill", id).Assert(c, icmd.Success) + icmd.RunCommand(ctrBinary, "--address", "/var/run/docker/containerd/docker-containerd.sock", + "--namespace", moby_daemon.MainNamespace, "tasks", "kill", id).Assert(c, icmd.Success) // restart daemon. d.Restart(c) @@ -1987,7 +1989,6 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithNames(c *check.C) { // TestDaemonRestartWithKilledRunningContainer requires live restore of running containers func (s *DockerDaemonSuite) TestDaemonRestartWithKilledRunningContainer(t *check.C) { - // TODO(mlaventure): Not sure what would the exit code be on windows testRequires(t, DaemonIsLinux) s.d.StartWithBusybox(t) @@ -2008,7 +2009,8 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithKilledRunningContainer(t *check } // kill the container - icmd.RunCommand(ctrBinary, "--address", "unix:///var/run/docker/libcontainerd/docker-containerd.sock", "containers", "kill", cid).Assert(t, icmd.Success) + icmd.RunCommand(ctrBinary, "--address", "/var/run/docker/containerd/docker-containerd.sock", + "--namespace", moby_daemon.MainNamespace, "tasks", "kill", cid).Assert(t, icmd.Success) // Give time to containerd to process the command if we don't // the exit event might be received after we do the inspect @@ -2076,7 +2078,6 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonCrash(c *check.C) { // TestDaemonRestartWithUnpausedRunningContainer requires live restore of running containers. func (s *DockerDaemonSuite) TestDaemonRestartWithUnpausedRunningContainer(t *check.C) { - // TODO(mlaventure): Not sure what would the exit code be on windows testRequires(t, DaemonIsLinux) s.d.StartWithBusybox(t, "--live-restore") @@ -2103,8 +2104,9 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithUnpausedRunningContainer(t *che // resume the container result := icmd.RunCommand( ctrBinary, - "--address", "unix:///var/run/docker/libcontainerd/docker-containerd.sock", - "containers", "resume", cid) + "--address", "/var/run/docker/containerd/docker-containerd.sock", + "--namespace", moby_daemon.MainNamespace, + "tasks", "resume", cid) result.Assert(t, icmd.Success) // Give time to containerd to process the command if we don't diff --git a/integration-cli/docker_cli_events_test.go b/integration-cli/docker_cli_events_test.go index e179a0ebd3..dff54a4463 100644 --- a/integration-cli/docker_cli_events_test.go +++ b/integration-cli/docker_cli_events_test.go @@ -86,6 +86,7 @@ func (s *DockerSuite) TestEventsLimit(c *check.C) { // timeouts creating so many containers simultaneously. This is a due to // a bug in the Windows platform. It will be fixed in a Windows Update. numContainers := 17 + eventPerContainer := 7 // create, attach, network connect, start, die, network disconnect, destroy numConcurrentContainers := numContainers if testEnv.DaemonPlatform() == "windows" { numConcurrentContainers = 4 @@ -93,17 +94,19 @@ func (s *DockerSuite) TestEventsLimit(c *check.C) { sem := make(chan bool, numConcurrentContainers) errChan := make(chan error, numContainers) + startTime := daemonUnixTime(c) + args := []string{"run", "--rm", "busybox", "true"} for i := 0; i < numContainers; i++ { sem <- true - go func() { + go func(i int) { defer func() { <-sem }() out, err := exec.Command(dockerBinary, args...).CombinedOutput() if err != nil { err = fmt.Errorf("%v: %s", err, string(out)) } errChan <- err - }() + }(i) } // Wait for all goroutines to finish @@ -116,10 +119,10 @@ func (s *DockerSuite) TestEventsLimit(c *check.C) { c.Assert(err, checker.IsNil, check.Commentf("%q failed with error", strings.Join(args, " "))) } - out, _ := dockerCmd(c, "events", "--since=0", "--until", daemonUnixTime(c)) + out, _ := dockerCmd(c, "events", "--since="+startTime, "--until", daemonUnixTime(c)) events := strings.Split(out, "\n") nEvents := len(events) - 1 - c.Assert(nEvents, checker.Equals, 256, check.Commentf("events should be limited to 256, but received %d", nEvents)) + c.Assert(nEvents, checker.Equals, numContainers*eventPerContainer, check.Commentf("events should be limited to 256, but received %d", nEvents)) } func (s *DockerSuite) TestEventsContainerEvents(c *check.C) { @@ -533,7 +536,10 @@ func (s *DockerSuite) TestEventsAttach(c *check.C) { c.Assert(err, checker.IsNil) defer stdout.Close() c.Assert(cmd.Start(), checker.IsNil) - defer cmd.Process.Kill() + defer func() { + cmd.Process.Kill() + cmd.Wait() + }() // Make sure we're done attaching by writing/reading some stuff _, err = stdin.Write([]byte("hello\n")) diff --git a/integration-cli/docker_cli_logs_test.go b/integration-cli/docker_cli_logs_test.go index f75da1849c..41927a2806 100644 --- a/integration-cli/docker_cli_logs_test.go +++ b/integration-cli/docker_cli_logs_test.go @@ -230,6 +230,7 @@ func (s *DockerSuite) TestLogsFollowSlowStdoutConsumer(c *check.C) { stdout, err := logCmd.StdoutPipe() c.Assert(err, checker.IsNil) c.Assert(logCmd.Start(), checker.IsNil) + defer func() { go logCmd.Wait() }() // First read slowly bytes1, err := ConsumeWithSpeed(stdout, 10, 50*time.Millisecond, stopSlowRead) diff --git a/integration-cli/docker_cli_network_unix_test.go b/integration-cli/docker_cli_network_unix_test.go index 4762e3993c..4bb542386b 100644 --- a/integration-cli/docker_cli_network_unix_test.go +++ b/integration-cli/docker_cli_network_unix_test.go @@ -1625,6 +1625,7 @@ func (s *DockerSuite) TestEmbeddedDNSInvalidInput(c *check.C) { func (s *DockerSuite) TestDockerNetworkConnectFailsNoInspectChange(c *check.C) { dockerCmd(c, "run", "-d", "--name=bb", "busybox", "top") c.Assert(waitRun("bb"), check.IsNil) + defer dockerCmd(c, "stop", "bb") ns0 := inspectField(c, "bb", "NetworkSettings.Networks.bridge") diff --git a/integration-cli/docker_cli_run_test.go b/integration-cli/docker_cli_run_test.go index 8198fded72..67bf585d04 100644 --- a/integration-cli/docker_cli_run_test.go +++ b/integration-cli/docker_cli_run_test.go @@ -2249,6 +2249,7 @@ func (s *DockerSuite) TestRunSlowStdoutConsumer(c *check.C) { if err := cont.Start(); err != nil { c.Fatal(err) } + defer func() { go cont.Wait() }() n, err := ConsumeWithSpeed(stdout, 10000, 5*time.Millisecond, nil) if err != nil { c.Fatal(err) diff --git a/integration-cli/docker_deprecated_api_v124_test.go b/integration-cli/docker_deprecated_api_v124_test.go index edf3e570f5..214ae08667 100644 --- a/integration-cli/docker_deprecated_api_v124_test.go +++ b/integration-cli/docker_deprecated_api_v124_test.go @@ -206,8 +206,10 @@ func (s *DockerSuite) TestDeprecatedPostContainersStartWithLinksInHostConfigIdLi testRequires(c, DaemonIsLinux) name := "test-host-config-links" out, _ := dockerCmd(c, "run", "--name", "link0", "-d", "busybox", "top") + defer dockerCmd(c, "stop", "link0") id := strings.TrimSpace(out) dockerCmd(c, "create", "--name", name, "--link", id, "busybox", "top") + defer dockerCmd(c, "stop", name) hc := inspectFieldJSON(c, name, "HostConfig") config := `{"HostConfig":` + hc + `}` diff --git a/integration-cli/events_utils_test.go b/integration-cli/events_utils_test.go index 580188950a..356b2c326d 100644 --- a/integration-cli/events_utils_test.go +++ b/integration-cli/events_utils_test.go @@ -69,7 +69,7 @@ func (e *eventObserver) Start() error { // Stop stops the events command. func (e *eventObserver) Stop() { e.command.Process.Kill() - e.command.Process.Release() + e.command.Wait() } // Match tries to match the events output with a given matcher. diff --git a/integration/service/create_test.go b/integration/service/create_test.go index cb0823dfbd..e94185a542 100644 --- a/integration/service/create_test.go +++ b/integration/service/create_test.go @@ -1,6 +1,7 @@ package service import ( + "runtime" "testing" "time" @@ -42,8 +43,15 @@ func TestCreateWithLBSandbox(t *testing.T) { }) require.NoError(t, err) + pollSettings := func(config *poll.Settings) { + if runtime.GOARCH == "arm" { + config.Timeout = 30 * time.Second + config.Delay = 100 * time.Millisecond + } + } + serviceID := serviceResp.ID - poll.WaitOn(t, serviceRunningTasksCount(client, serviceID, instances)) + poll.WaitOn(t, serviceRunningTasksCount(client, serviceID, instances), pollSettings) _, _, err = client.ServiceInspectWithRaw(context.Background(), serviceID, types.ServiceInspectOptions{}) require.NoError(t, err) @@ -55,7 +63,7 @@ func TestCreateWithLBSandbox(t *testing.T) { err = client.ServiceRemove(context.Background(), serviceID) require.NoError(t, err) - poll.WaitOn(t, serviceIsRemoved(client, serviceID)) + poll.WaitOn(t, serviceIsRemoved(client, serviceID), pollSettings) err = client.NetworkRemove(context.Background(), overlayID) require.NoError(t, err) diff --git a/libcontainerd/client.go b/libcontainerd/client.go deleted file mode 100644 index c9004b813b..0000000000 --- a/libcontainerd/client.go +++ /dev/null @@ -1,46 +0,0 @@ -package libcontainerd - -import ( - "fmt" - "sync" - - "github.com/docker/docker/pkg/locker" -) - -// clientCommon contains the platform agnostic fields used in the client structure -type clientCommon struct { - backend Backend - containers map[string]*container - locker *locker.Locker - mapMutex sync.RWMutex // protects read/write operations from containers map -} - -func (clnt *client) lock(containerID string) { - clnt.locker.Lock(containerID) -} - -func (clnt *client) unlock(containerID string) { - clnt.locker.Unlock(containerID) -} - -// must hold a lock for cont.containerID -func (clnt *client) appendContainer(cont *container) { - clnt.mapMutex.Lock() - clnt.containers[cont.containerID] = cont - clnt.mapMutex.Unlock() -} -func (clnt *client) deleteContainer(containerID string) { - clnt.mapMutex.Lock() - delete(clnt.containers, containerID) - clnt.mapMutex.Unlock() -} - -func (clnt *client) getContainer(containerID string) (*container, error) { - clnt.mapMutex.RLock() - container, ok := clnt.containers[containerID] - defer clnt.mapMutex.RUnlock() - if !ok { - return nil, fmt.Errorf("invalid container: %s", containerID) // fixme: typed error - } - return container, nil -} diff --git a/libcontainerd/client_daemon.go b/libcontainerd/client_daemon.go new file mode 100644 index 0000000000..e6514374ce --- /dev/null +++ b/libcontainerd/client_daemon.go @@ -0,0 +1,802 @@ +// +build !windows + +package libcontainerd + +import ( + "context" + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + "reflect" + "runtime" + "strings" + "sync" + "syscall" + "time" + + "google.golang.org/grpc" + + "github.com/containerd/containerd" + eventsapi "github.com/containerd/containerd/api/services/events/v1" + "github.com/containerd/containerd/api/types" + "github.com/containerd/containerd/archive" + "github.com/containerd/containerd/content" + "github.com/containerd/containerd/images" + "github.com/containerd/containerd/linux/runcopts" + "github.com/containerd/typeurl" + "github.com/docker/docker/pkg/ioutils" + "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +// InitProcessName is the name given to the first process of a +// container +const InitProcessName = "init" + +type container struct { + sync.Mutex + + bundleDir string + ctr containerd.Container + task containerd.Task + execs map[string]containerd.Process + oomKilled bool +} + +type client struct { + sync.RWMutex // protects containers map + + remote *containerd.Client + stateDir string + logger *logrus.Entry + + namespace string + backend Backend + eventQ queue + containers map[string]*container +} + +func (c *client) Restore(ctx context.Context, id string, attachStdio StdioCallback) (alive bool, pid int, err error) { + c.Lock() + defer c.Unlock() + + var cio containerd.IO + defer func() { + err = wrapError(err) + }() + + ctr, err := c.remote.LoadContainer(ctx, id) + if err != nil { + return false, -1, errors.WithStack(err) + } + + defer func() { + if err != nil && cio != nil { + cio.Cancel() + cio.Close() + } + }() + + t, err := ctr.Task(ctx, func(fifos *containerd.FIFOSet) (containerd.IO, error) { + io, err := newIOPipe(fifos) + if err != nil { + return nil, err + } + + cio, err = attachStdio(io) + return cio, err + }) + if err != nil && !strings.Contains(err.Error(), "no running task found") { + return false, -1, err + } + + if t != nil { + s, err := t.Status(ctx) + if err != nil { + return false, -1, err + } + + alive = s.Status != containerd.Stopped + pid = int(t.Pid()) + } + c.containers[id] = &container{ + bundleDir: filepath.Join(c.stateDir, id), + ctr: ctr, + task: t, + // TODO(mlaventure): load execs + } + + c.logger.WithFields(logrus.Fields{ + "container": id, + "alive": alive, + "pid": pid, + }).Debug("restored container") + + return alive, pid, nil +} + +func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, runtimeOptions interface{}) error { + if ctr := c.getContainer(id); ctr != nil { + return errors.WithStack(newConflictError("id already in use")) + } + + bdir, err := prepareBundleDir(filepath.Join(c.stateDir, id), ociSpec) + if err != nil { + return wrapSystemError(errors.Wrap(err, "prepare bundle dir failed")) + } + + c.logger.WithField("bundle", bdir).WithField("root", ociSpec.Root.Path).Debug("bundle dir created") + + cdCtr, err := c.remote.NewContainer(ctx, id, + containerd.WithSpec(ociSpec), + // TODO(mlaventure): when containerd support lcow, revisit runtime value + containerd.WithRuntime(fmt.Sprintf("io.containerd.runtime.v1.%s", runtime.GOOS), runtimeOptions)) + if err != nil { + return err + } + + c.Lock() + c.containers[id] = &container{ + bundleDir: bdir, + ctr: cdCtr, + } + c.Unlock() + + return nil +} + +// Start create and start a task for the specified containerd id +func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin bool, attachStdio StdioCallback) (int, error) { + ctr := c.getContainer(id) + switch { + case ctr == nil: + return -1, errors.WithStack(newNotFoundError("no such container")) + case ctr.task != nil: + return -1, errors.WithStack(newConflictError("container already started")) + } + + var ( + cp *types.Descriptor + t containerd.Task + cio containerd.IO + err error + stdinCloseSync = make(chan struct{}) + ) + + if checkpointDir != "" { + // write checkpoint to the content store + tar := archive.Diff(ctx, "", checkpointDir) + cp, err = c.writeContent(ctx, images.MediaTypeContainerd1Checkpoint, checkpointDir, tar) + // remove the checkpoint when we're done + defer func() { + if cp != nil { + err := c.remote.ContentStore().Delete(context.Background(), cp.Digest) + if err != nil { + c.logger.WithError(err).WithFields(logrus.Fields{ + "ref": checkpointDir, + "digest": cp.Digest, + }).Warnf("failed to delete temporary checkpoint entry") + } + } + }() + if err := tar.Close(); err != nil { + return -1, errors.Wrap(err, "failed to close checkpoint tar stream") + } + if err != nil { + return -1, errors.Wrapf(err, "failed to upload checkpoint to containerd") + } + } + + spec, err := ctr.ctr.Spec(ctx) + if err != nil { + return -1, errors.Wrap(err, "failed to retrieve spec") + } + uid, gid := getSpecUser(spec) + t, err = ctr.ctr.NewTask(ctx, + func(id string) (containerd.IO, error) { + cio, err = c.createIO(ctr.bundleDir, id, InitProcessName, stdinCloseSync, withStdin, spec.Process.Terminal, attachStdio) + return cio, err + }, + func(_ context.Context, _ *containerd.Client, info *containerd.TaskInfo) error { + info.Checkpoint = cp + info.Options = &runcopts.CreateOptions{ + IoUid: uint32(uid), + IoGid: uint32(gid), + } + return nil + }) + if err != nil { + close(stdinCloseSync) + if cio != nil { + cio.Cancel() + cio.Close() + } + return -1, err + } + + c.Lock() + c.containers[id].task = t + c.Unlock() + + // Signal c.createIO that it can call CloseIO + close(stdinCloseSync) + + if err := t.Start(ctx); err != nil { + if _, err := t.Delete(ctx); err != nil { + c.logger.WithError(err).WithField("container", id). + Error("failed to delete task after fail start") + } + c.Lock() + c.containers[id].task = nil + c.Unlock() + return -1, err + } + + return int(t.Pid()), nil +} + +func (c *client) Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio StdioCallback) (int, error) { + ctr := c.getContainer(containerID) + switch { + case ctr == nil: + return -1, errors.WithStack(newNotFoundError("no such container")) + case ctr.task == nil: + return -1, errors.WithStack(newInvalidParameterError("container is not running")) + case ctr.execs != nil && ctr.execs[processID] != nil: + return -1, errors.WithStack(newConflictError("id already in use")) + } + + var ( + p containerd.Process + cio containerd.IO + err error + stdinCloseSync = make(chan struct{}) + ) + defer func() { + if err != nil { + if cio != nil { + cio.Cancel() + cio.Close() + } + } + }() + + p, err = ctr.task.Exec(ctx, processID, spec, func(id string) (containerd.IO, error) { + cio, err = c.createIO(ctr.bundleDir, containerID, processID, stdinCloseSync, withStdin, spec.Terminal, attachStdio) + return cio, err + }) + if err != nil { + close(stdinCloseSync) + if cio != nil { + cio.Cancel() + cio.Close() + } + return -1, err + } + + ctr.Lock() + if ctr.execs == nil { + ctr.execs = make(map[string]containerd.Process) + } + ctr.execs[processID] = p + ctr.Unlock() + + // Signal c.createIO that it can call CloseIO + close(stdinCloseSync) + + if err = p.Start(ctx); err != nil { + p.Delete(context.Background()) + ctr.Lock() + delete(ctr.execs, processID) + ctr.Unlock() + return -1, err + } + + return int(p.Pid()), nil +} + +func (c *client) SignalProcess(ctx context.Context, containerID, processID string, signal int) error { + p, err := c.getProcess(containerID, processID) + if err != nil { + return err + } + return p.Kill(ctx, syscall.Signal(signal)) +} + +func (c *client) ResizeTerminal(ctx context.Context, containerID, processID string, width, height int) error { + p, err := c.getProcess(containerID, processID) + if err != nil { + return err + } + + return p.Resize(ctx, uint32(width), uint32(height)) +} + +func (c *client) CloseStdin(ctx context.Context, containerID, processID string) error { + p, err := c.getProcess(containerID, processID) + if err != nil { + return err + } + + return p.CloseIO(ctx, containerd.WithStdinCloser) +} + +func (c *client) Pause(ctx context.Context, containerID string) error { + p, err := c.getProcess(containerID, InitProcessName) + if err != nil { + return err + } + + return p.(containerd.Task).Pause(ctx) +} + +func (c *client) Resume(ctx context.Context, containerID string) error { + p, err := c.getProcess(containerID, InitProcessName) + if err != nil { + return err + } + + return p.(containerd.Task).Resume(ctx) +} + +func (c *client) Stats(ctx context.Context, containerID string) (*Stats, error) { + p, err := c.getProcess(containerID, InitProcessName) + if err != nil { + return nil, err + } + + m, err := p.(containerd.Task).Metrics(ctx) + if err != nil { + return nil, err + } + + v, err := typeurl.UnmarshalAny(m.Data) + if err != nil { + return nil, err + } + return interfaceToStats(m.Timestamp, v), nil +} + +func (c *client) ListPids(ctx context.Context, containerID string) ([]uint32, error) { + p, err := c.getProcess(containerID, InitProcessName) + if err != nil { + return nil, err + } + + pis, err := p.(containerd.Task).Pids(ctx) + if err != nil { + return nil, err + } + + var pids []uint32 + for _, i := range pis { + pids = append(pids, i.Pid) + } + + return pids, nil +} + +func (c *client) Summary(ctx context.Context, containerID string) ([]Summary, error) { + p, err := c.getProcess(containerID, InitProcessName) + if err != nil { + return nil, err + } + + pis, err := p.(containerd.Task).Pids(ctx) + if err != nil { + return nil, err + } + + var infos []Summary + for _, pi := range pis { + i, err := typeurl.UnmarshalAny(pi.Info) + if err != nil { + return nil, errors.Wrap(err, "unable to decode process details") + } + s, err := summaryFromInterface(i) + if err != nil { + return nil, err + } + infos = append(infos, *s) + } + + return infos, nil +} + +func (c *client) DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error) { + p, err := c.getProcess(containerID, InitProcessName) + if err != nil { + return 255, time.Now(), nil + } + + status, err := p.(containerd.Task).Delete(ctx) + if err != nil { + return 255, time.Now(), nil + } + + c.Lock() + if ctr, ok := c.containers[containerID]; ok { + ctr.task = nil + } + c.Unlock() + + return status.ExitCode(), status.ExitTime(), nil +} + +func (c *client) Delete(ctx context.Context, containerID string) error { + ctr := c.getContainer(containerID) + if ctr == nil { + return errors.WithStack(newNotFoundError("no such container")) + } + + if err := ctr.ctr.Delete(ctx); err != nil { + return err + } + + if os.Getenv("LIBCONTAINERD_NOCLEAN") == "1" { + if err := os.RemoveAll(ctr.bundleDir); err != nil { + c.logger.WithError(err).WithFields(logrus.Fields{ + "container": containerID, + "bundle": ctr.bundleDir, + }).Error("failed to remove state dir") + } + } + + c.removeContainer(containerID) + + return nil +} + +func (c *client) Status(ctx context.Context, containerID string) (Status, error) { + ctr := c.getContainer(containerID) + if ctr == nil { + return StatusUnknown, errors.WithStack(newNotFoundError("no such container")) + } + + s, err := ctr.task.Status(ctx) + if err != nil { + return StatusUnknown, err + } + + return Status(s.Status), nil +} + +func (c *client) CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error { + p, err := c.getProcess(containerID, InitProcessName) + if err != nil { + return err + } + + img, err := p.(containerd.Task).Checkpoint(ctx) + if err != nil { + return err + } + // Whatever happens, delete the checkpoint from containerd + defer func() { + err := c.remote.ImageService().Delete(context.Background(), img.Name()) + if err != nil { + c.logger.WithError(err).WithField("digest", img.Target().Digest). + Warnf("failed to delete checkpoint image") + } + }() + + b, err := content.ReadBlob(ctx, c.remote.ContentStore(), img.Target().Digest) + if err != nil { + return wrapSystemError(errors.Wrapf(err, "failed to retrieve checkpoint data")) + } + var index v1.Index + if err := json.Unmarshal(b, &index); err != nil { + return wrapSystemError(errors.Wrapf(err, "failed to decode checkpoint data")) + } + + var cpDesc *v1.Descriptor + for _, m := range index.Manifests { + if m.MediaType == images.MediaTypeContainerd1Checkpoint { + cpDesc = &m + break + } + } + if cpDesc == nil { + return wrapSystemError(errors.Wrapf(err, "invalid checkpoint")) + } + + rat, err := c.remote.ContentStore().ReaderAt(ctx, cpDesc.Digest) + if err != nil { + return wrapSystemError(errors.Wrapf(err, "failed to get checkpoint reader")) + } + defer rat.Close() + _, err = archive.Apply(ctx, checkpointDir, content.NewReader(rat)) + if err != nil { + return wrapSystemError(errors.Wrapf(err, "failed to read checkpoint reader")) + } + + return err +} + +func (c *client) getContainer(id string) *container { + c.RLock() + ctr := c.containers[id] + c.RUnlock() + + return ctr +} + +func (c *client) removeContainer(id string) { + c.Lock() + delete(c.containers, id) + c.Unlock() +} + +func (c *client) getProcess(containerID, processID string) (containerd.Process, error) { + ctr := c.getContainer(containerID) + switch { + case ctr == nil: + return nil, errors.WithStack(newNotFoundError("no such container")) + case ctr.task == nil: + return nil, errors.WithStack(newNotFoundError("container is not running")) + case processID == InitProcessName: + return ctr.task, nil + default: + ctr.Lock() + defer ctr.Unlock() + if ctr.execs == nil { + return nil, errors.WithStack(newNotFoundError("no execs")) + } + } + + p := ctr.execs[processID] + if p == nil { + return nil, errors.WithStack(newNotFoundError("no such exec")) + } + + return p, nil +} + +// createIO creates the io to be used by a process +// This needs to get a pointer to interface as upon closure the process may not have yet been registered +func (c *client) createIO(bundleDir, containerID, processID string, stdinCloseSync chan struct{}, withStdin, withTerminal bool, attachStdio StdioCallback) (containerd.IO, error) { + fifos := newFIFOSet(bundleDir, containerID, processID, withStdin, withTerminal) + io, err := newIOPipe(fifos) + if err != nil { + return nil, err + } + + if io.Stdin != nil { + var ( + err error + stdinOnce sync.Once + ) + pipe := io.Stdin + io.Stdin = ioutils.NewWriteCloserWrapper(pipe, func() error { + stdinOnce.Do(func() { + err = pipe.Close() + // Do the rest in a new routine to avoid a deadlock if the + // Exec/Start call failed. + go func() { + <-stdinCloseSync + p, err := c.getProcess(containerID, processID) + if err == nil { + err = p.CloseIO(context.Background(), containerd.WithStdinCloser) + if err != nil && strings.Contains(err.Error(), "transport is closing") { + err = nil + } + } + }() + }) + return err + }) + } + + cio, err := attachStdio(io) + if err != nil { + io.Cancel() + io.Close() + } + return cio, err +} + +func (c *client) processEvent(ctr *container, et EventType, ei EventInfo) { + c.eventQ.append(ei.ContainerID, func() { + err := c.backend.ProcessEvent(ei.ContainerID, et, ei) + if err != nil { + c.logger.WithError(err).WithFields(logrus.Fields{ + "container": ei.ContainerID, + "event": et, + "event-info": ei, + }).Error("failed to process event") + } + + if et == EventExit && ei.ProcessID != ei.ContainerID { + var p containerd.Process + ctr.Lock() + if ctr.execs != nil { + p = ctr.execs[ei.ProcessID] + } + ctr.Unlock() + if p == nil { + c.logger.WithError(errors.New("no such process")). + WithFields(logrus.Fields{ + "container": ei.ContainerID, + "process": ei.ProcessID, + }).Error("exit event") + return + } + _, err = p.Delete(context.Background()) + if err != nil { + c.logger.WithError(err).WithFields(logrus.Fields{ + "container": ei.ContainerID, + "process": ei.ProcessID, + }).Warn("failed to delete process") + } + c.Lock() + delete(ctr.execs, ei.ProcessID) + c.Unlock() + } + }) +} + +func (c *client) processEventStream(ctx context.Context) { + var ( + err error + eventStream eventsapi.Events_SubscribeClient + ev *eventsapi.Envelope + et EventType + ei EventInfo + ctr *container + ) + defer func() { + if err != nil { + select { + case <-ctx.Done(): + c.logger.WithError(ctx.Err()). + Info("stopping event stream following graceful shutdown") + default: + go c.processEventStream(ctx) + } + } + }() + + eventStream, err = c.remote.EventService().Subscribe(ctx, &eventsapi.SubscribeRequest{ + Filters: []string{"namespace==" + c.namespace + ",topic~=/tasks/.+"}, + }, grpc.FailFast(false)) + if err != nil { + return + } + + var oomKilled bool + for { + ev, err = eventStream.Recv() + if err != nil { + c.logger.WithError(err).Error("failed to get event") + return + } + + if ev.Event == nil { + c.logger.WithField("event", ev).Warn("invalid event") + continue + } + + v, err := typeurl.UnmarshalAny(ev.Event) + if err != nil { + c.logger.WithError(err).WithField("event", ev).Warn("failed to unmarshal event") + continue + } + + c.logger.WithField("topic", ev.Topic).Debug("event") + + switch t := v.(type) { + case *eventsapi.TaskCreate: + et = EventCreate + ei = EventInfo{ + ContainerID: t.ContainerID, + ProcessID: t.ContainerID, + Pid: t.Pid, + } + case *eventsapi.TaskStart: + et = EventStart + ei = EventInfo{ + ContainerID: t.ContainerID, + ProcessID: t.ContainerID, + Pid: t.Pid, + } + case *eventsapi.TaskExit: + et = EventExit + ei = EventInfo{ + ContainerID: t.ContainerID, + ProcessID: t.ID, + Pid: t.Pid, + ExitCode: t.ExitStatus, + ExitedAt: t.ExitedAt, + } + case *eventsapi.TaskOOM: + et = EventOOM + ei = EventInfo{ + ContainerID: t.ContainerID, + OOMKilled: true, + } + oomKilled = true + case *eventsapi.TaskExecAdded: + et = EventExecAdded + ei = EventInfo{ + ContainerID: t.ContainerID, + ProcessID: t.ExecID, + } + case *eventsapi.TaskExecStarted: + et = EventExecStarted + ei = EventInfo{ + ContainerID: t.ContainerID, + ProcessID: t.ExecID, + Pid: t.Pid, + } + case *eventsapi.TaskPaused: + et = EventPaused + ei = EventInfo{ + ContainerID: t.ContainerID, + } + case *eventsapi.TaskResumed: + et = EventResumed + ei = EventInfo{ + ContainerID: t.ContainerID, + } + default: + c.logger.WithFields(logrus.Fields{ + "topic": ev.Topic, + "type": reflect.TypeOf(t)}, + ).Info("ignoring event") + continue + } + + ctr = c.getContainer(ei.ContainerID) + if ctr == nil { + c.logger.WithField("container", ei.ContainerID).Warn("unknown container") + continue + } + + if oomKilled { + ctr.oomKilled = true + oomKilled = false + } + ei.OOMKilled = ctr.oomKilled + + c.processEvent(ctr, et, ei) + } +} + +func (c *client) writeContent(ctx context.Context, mediaType, ref string, r io.Reader) (*types.Descriptor, error) { + writer, err := c.remote.ContentStore().Writer(ctx, ref, 0, "") + if err != nil { + return nil, err + } + defer writer.Close() + size, err := io.Copy(writer, r) + if err != nil { + return nil, err + } + labels := map[string]string{ + "containerd.io/gc.root": time.Now().UTC().Format(time.RFC3339), + } + if err := writer.Commit(ctx, 0, "", content.WithLabels(labels)); err != nil { + return nil, err + } + return &types.Descriptor{ + MediaType: mediaType, + Digest: writer.Digest(), + Size_: size, + }, nil +} + +func wrapError(err error) error { + if err != nil { + msg := err.Error() + for _, s := range []string{"container does not exist", "not found", "no such container"} { + if strings.Contains(msg, s) { + return wrapNotFoundError(err) + } + } + } + return err +} diff --git a/libcontainerd/client_daemon_linux.go b/libcontainerd/client_daemon_linux.go new file mode 100644 index 0000000000..03371954cc --- /dev/null +++ b/libcontainerd/client_daemon_linux.go @@ -0,0 +1,96 @@ +package libcontainerd + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/containerd/containerd" + "github.com/docker/docker/pkg/idtools" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +func summaryFromInterface(i interface{}) (*Summary, error) { + return &Summary{}, nil +} + +func (c *client) UpdateResources(ctx context.Context, containerID string, resources *Resources) error { + p, err := c.getProcess(containerID, InitProcessName) + if err != nil { + return err + } + + // go doesn't like the alias in 1.8, this means this need to be + // platform specific + return p.(containerd.Task).Update(ctx, containerd.WithResources((*specs.LinuxResources)(resources))) +} + +func hostIDFromMap(id uint32, mp []specs.LinuxIDMapping) int { + for _, m := range mp { + if id >= m.ContainerID && id <= m.ContainerID+m.Size-1 { + return int(m.HostID + id - m.ContainerID) + } + } + return 0 +} + +func getSpecUser(ociSpec *specs.Spec) (int, int) { + var ( + uid int + gid int + ) + + for _, ns := range ociSpec.Linux.Namespaces { + if ns.Type == specs.UserNamespace { + uid = hostIDFromMap(0, ociSpec.Linux.UIDMappings) + gid = hostIDFromMap(0, ociSpec.Linux.GIDMappings) + break + } + } + + return uid, gid +} + +func prepareBundleDir(bundleDir string, ociSpec *specs.Spec) (string, error) { + uid, gid := getSpecUser(ociSpec) + if uid == 0 && gid == 0 { + return bundleDir, idtools.MkdirAllAndChownNew(bundleDir, 0755, idtools.IDPair{0, 0}) + } + + p := string(filepath.Separator) + components := strings.Split(bundleDir, string(filepath.Separator)) + for _, d := range components[1:] { + p = filepath.Join(p, d) + fi, err := os.Stat(p) + if err != nil && !os.IsNotExist(err) { + return "", err + } + if os.IsNotExist(err) || fi.Mode()&1 == 0 { + p = fmt.Sprintf("%s.%d.%d", p, uid, gid) + if err := idtools.MkdirAndChown(p, 0700, idtools.IDPair{uid, gid}); err != nil && !os.IsExist(err) { + return "", err + } + } + } + + return p, nil +} + +func newFIFOSet(bundleDir, containerID, processID string, withStdin, withTerminal bool) *containerd.FIFOSet { + fifos := &containerd.FIFOSet{ + Terminal: withTerminal, + Out: filepath.Join(bundleDir, processID+"-stdout"), + } + + if withStdin { + fifos.In = filepath.Join(bundleDir, processID+"-stdin") + } + + if !fifos.Terminal { + fifos.Err = filepath.Join(bundleDir, processID+"-stderr") + } + + return fifos +} diff --git a/libcontainerd/client_daemon_windows.go b/libcontainerd/client_daemon_windows.go new file mode 100644 index 0000000000..9bb5d86f44 --- /dev/null +++ b/libcontainerd/client_daemon_windows.go @@ -0,0 +1,53 @@ +package libcontainerd + +import ( + "fmt" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/windows/hcsshimtypes" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" +) + +func summaryFromInterface(i interface{}) (*Summary, error) { + switch pd := i.(type) { + case *hcsshimtypes.ProcessDetails: + return &Summary{ + CreateTimestamp: pd.CreatedAt, + ImageName: pd.ImageName, + KernelTime100ns: pd.KernelTime_100Ns, + MemoryCommitBytes: pd.MemoryCommitBytes, + MemoryWorkingSetPrivateBytes: pd.MemoryWorkingSetPrivateBytes, + MemoryWorkingSetSharedBytes: pd.MemoryWorkingSetSharedBytes, + ProcessId: pd.ProcessID, + UserTime100ns: pd.UserTime_100Ns, + }, nil + default: + return nil, errors.Errorf("Unknown process details type %T", pd) + } +} + +func prepareBundleDir(bundleDir string, ociSpec *specs.Spec) (string, error) { + return bundleDir, nil +} + +func pipeName(containerID, processID, name string) string { + return fmt.Sprintf(`\\.\pipe\containerd-%s-%s-%s`, containerID, processID, name) +} + +func newFIFOSet(bundleDir, containerID, processID string, withStdin, withTerminal bool) *containerd.FIFOSet { + fifos := &containerd.FIFOSet{ + Terminal: withTerminal, + Out: pipeName(containerID, processID, "stdout"), + } + + if withStdin { + fifos.In = pipeName(containerID, processID, "stdin") + } + + if !fifos.Terminal { + fifos.Err = pipeName(containerID, processID, "stderr") + } + + return fifos +} diff --git a/libcontainerd/client_linux.go b/libcontainerd/client_linux.go deleted file mode 100644 index 12808fd0c1..0000000000 --- a/libcontainerd/client_linux.go +++ /dev/null @@ -1,616 +0,0 @@ -package libcontainerd - -import ( - "fmt" - "os" - "strings" - "sync" - "time" - - containerd "github.com/containerd/containerd/api/grpc/types" - containerd_runtime_types "github.com/containerd/containerd/runtime" - "github.com/docker/docker/pkg/ioutils" - "github.com/docker/docker/pkg/mount" - "github.com/golang/protobuf/ptypes" - "github.com/golang/protobuf/ptypes/timestamp" - specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/sirupsen/logrus" - "golang.org/x/net/context" - "golang.org/x/sys/unix" -) - -type client struct { - clientCommon - - // Platform specific properties below here. - remote *remote - q queue - exitNotifiers map[string]*exitNotifier - liveRestore bool -} - -// GetServerVersion returns the connected server version information -func (clnt *client) GetServerVersion(ctx context.Context) (*ServerVersion, error) { - resp, err := clnt.remote.apiClient.GetServerVersion(ctx, &containerd.GetServerVersionRequest{}) - if err != nil { - return nil, err - } - - sv := &ServerVersion{ - GetServerVersionResponse: *resp, - } - - return sv, nil -} - -// AddProcess is the handler for adding a process to an already running -// container. It's called through docker exec. It returns the system pid of the -// exec'd process. -func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendlyName string, specp Process, attachStdio StdioCallback) (pid int, err error) { - clnt.lock(containerID) - defer clnt.unlock(containerID) - container, err := clnt.getContainer(containerID) - if err != nil { - return -1, err - } - - spec, err := container.spec() - if err != nil { - return -1, err - } - sp := spec.Process - sp.Args = specp.Args - sp.Terminal = specp.Terminal - if len(specp.Env) > 0 { - sp.Env = specp.Env - } - if specp.Cwd != nil { - sp.Cwd = *specp.Cwd - } - if specp.User != nil { - sp.User = specs.User{ - UID: specp.User.UID, - GID: specp.User.GID, - AdditionalGids: specp.User.AdditionalGids, - } - } - if specp.Capabilities != nil { - sp.Capabilities.Bounding = specp.Capabilities - sp.Capabilities.Effective = specp.Capabilities - sp.Capabilities.Inheritable = specp.Capabilities - sp.Capabilities.Permitted = specp.Capabilities - } - - p := container.newProcess(processFriendlyName) - - r := &containerd.AddProcessRequest{ - Args: sp.Args, - Cwd: sp.Cwd, - Terminal: sp.Terminal, - Id: containerID, - Env: sp.Env, - User: &containerd.User{ - Uid: sp.User.UID, - Gid: sp.User.GID, - AdditionalGids: sp.User.AdditionalGids, - }, - Pid: processFriendlyName, - Stdin: p.fifo(unix.Stdin), - Stdout: p.fifo(unix.Stdout), - Stderr: p.fifo(unix.Stderr), - Capabilities: sp.Capabilities.Effective, - ApparmorProfile: sp.ApparmorProfile, - SelinuxLabel: sp.SelinuxLabel, - NoNewPrivileges: sp.NoNewPrivileges, - Rlimits: convertRlimits(sp.Rlimits), - } - - fifoCtx, cancel := context.WithCancel(context.Background()) - defer func() { - if err != nil { - cancel() - } - }() - - iopipe, err := p.openFifos(fifoCtx, sp.Terminal) - if err != nil { - return -1, err - } - - resp, err := clnt.remote.apiClient.AddProcess(ctx, r) - if err != nil { - p.closeFifos(iopipe) - return -1, err - } - - var stdinOnce sync.Once - stdin := iopipe.Stdin - iopipe.Stdin = ioutils.NewWriteCloserWrapper(stdin, func() error { - var err error - stdinOnce.Do(func() { // on error from attach we don't know if stdin was already closed - err = stdin.Close() - if err2 := p.sendCloseStdin(); err == nil { - err = err2 - } - }) - return err - }) - - container.processes[processFriendlyName] = p - - if err := attachStdio(*iopipe); err != nil { - p.closeFifos(iopipe) - return -1, err - } - - return int(resp.SystemPid), nil -} - -func (clnt *client) SignalProcess(containerID string, pid string, sig int) error { - clnt.lock(containerID) - defer clnt.unlock(containerID) - _, err := clnt.remote.apiClient.Signal(context.Background(), &containerd.SignalRequest{ - Id: containerID, - Pid: pid, - Signal: uint32(sig), - }) - return err -} - -func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error { - clnt.lock(containerID) - defer clnt.unlock(containerID) - if _, err := clnt.getContainer(containerID); err != nil { - return err - } - _, err := clnt.remote.apiClient.UpdateProcess(context.Background(), &containerd.UpdateProcessRequest{ - Id: containerID, - Pid: processFriendlyName, - Width: uint32(width), - Height: uint32(height), - }) - return err -} - -func (clnt *client) Pause(containerID string) error { - return clnt.setState(containerID, StatePause) -} - -func (clnt *client) setState(containerID, state string) error { - clnt.lock(containerID) - container, err := clnt.getContainer(containerID) - if err != nil { - clnt.unlock(containerID) - return err - } - if container.systemPid == 0 { - clnt.unlock(containerID) - return fmt.Errorf("No active process for container %s", containerID) - } - st := "running" - if state == StatePause { - st = "paused" - } - chstate := make(chan struct{}) - _, err = clnt.remote.apiClient.UpdateContainer(context.Background(), &containerd.UpdateContainerRequest{ - Id: containerID, - Pid: InitFriendlyName, - Status: st, - }) - if err != nil { - clnt.unlock(containerID) - return err - } - container.pauseMonitor.append(state, chstate) - clnt.unlock(containerID) - <-chstate - return nil -} - -func (clnt *client) Resume(containerID string) error { - return clnt.setState(containerID, StateResume) -} - -func (clnt *client) Stats(containerID string) (*Stats, error) { - resp, err := clnt.remote.apiClient.Stats(context.Background(), &containerd.StatsRequest{containerID}) - if err != nil { - return nil, err - } - return (*Stats)(resp), nil -} - -// Take care of the old 1.11.0 behavior in case the version upgrade -// happened without a clean daemon shutdown -func (clnt *client) cleanupOldRootfs(containerID string) { - // Unmount and delete the bundle folder - if mts, err := mount.GetMounts(); err == nil { - for _, mts := range mts { - if strings.HasSuffix(mts.Mountpoint, containerID+"/rootfs") { - if err := unix.Unmount(mts.Mountpoint, unix.MNT_DETACH); err == nil { - os.RemoveAll(strings.TrimSuffix(mts.Mountpoint, "/rootfs")) - } - break - } - } - } -} - -func (clnt *client) setExited(containerID string, exitCode uint32) error { - clnt.lock(containerID) - defer clnt.unlock(containerID) - - err := clnt.backend.StateChanged(containerID, StateInfo{ - CommonStateInfo: CommonStateInfo{ - State: StateExit, - ExitCode: exitCode, - }}) - - clnt.cleanupOldRootfs(containerID) - - return err -} - -func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) { - cont, err := clnt.getContainerdContainer(containerID) - if err != nil { - return nil, err - } - pids := make([]int, len(cont.Pids)) - for i, p := range cont.Pids { - pids[i] = int(p) - } - return pids, nil -} - -// Summary returns a summary of the processes running in a container. -// This is a no-op on Linux. -func (clnt *client) Summary(containerID string) ([]Summary, error) { - return nil, nil -} - -func (clnt *client) getContainerdContainer(containerID string) (*containerd.Container, error) { - resp, err := clnt.remote.apiClient.State(context.Background(), &containerd.StateRequest{Id: containerID}) - if err != nil { - return nil, err - } - for _, cont := range resp.Containers { - if cont.Id == containerID { - return cont, nil - } - } - return nil, fmt.Errorf("invalid state response") -} - -func (clnt *client) UpdateResources(containerID string, resources Resources) error { - clnt.lock(containerID) - defer clnt.unlock(containerID) - container, err := clnt.getContainer(containerID) - if err != nil { - return err - } - if container.systemPid == 0 { - return fmt.Errorf("No active process for container %s", containerID) - } - _, err = clnt.remote.apiClient.UpdateContainer(context.Background(), &containerd.UpdateContainerRequest{ - Id: containerID, - Pid: InitFriendlyName, - Resources: (*containerd.UpdateResource)(&resources), - }) - return err -} - -func (clnt *client) getExitNotifier(containerID string) *exitNotifier { - clnt.mapMutex.RLock() - defer clnt.mapMutex.RUnlock() - return clnt.exitNotifiers[containerID] -} - -func (clnt *client) getOrCreateExitNotifier(containerID string) *exitNotifier { - clnt.mapMutex.Lock() - w, ok := clnt.exitNotifiers[containerID] - defer clnt.mapMutex.Unlock() - if !ok { - w = &exitNotifier{c: make(chan struct{}), client: clnt} - clnt.exitNotifiers[containerID] = w - } - return w -} - -func (clnt *client) restore(cont *containerd.Container, lastEvent *containerd.Event, attachStdio StdioCallback, options ...CreateOption) (err error) { - clnt.lock(cont.Id) - defer clnt.unlock(cont.Id) - - logrus.Debugf("libcontainerd: restore container %s state %s", cont.Id, cont.Status) - - containerID := cont.Id - if _, err := clnt.getContainer(containerID); err == nil { - return fmt.Errorf("container %s is already active", containerID) - } - - defer func() { - if err != nil { - clnt.deleteContainer(cont.Id) - } - }() - - container := clnt.newContainer(cont.BundlePath, options...) - container.systemPid = systemPid(cont) - - var terminal bool - for _, p := range cont.Processes { - if p.Pid == InitFriendlyName { - terminal = p.Terminal - } - } - - fifoCtx, cancel := context.WithCancel(context.Background()) - defer func() { - if err != nil { - cancel() - } - }() - - iopipe, err := container.openFifos(fifoCtx, terminal) - if err != nil { - return err - } - var stdinOnce sync.Once - stdin := iopipe.Stdin - iopipe.Stdin = ioutils.NewWriteCloserWrapper(stdin, func() error { - var err error - stdinOnce.Do(func() { // on error from attach we don't know if stdin was already closed - err = stdin.Close() - }) - return err - }) - - if err := attachStdio(*iopipe); err != nil { - container.closeFifos(iopipe) - return err - } - - clnt.appendContainer(container) - - err = clnt.backend.StateChanged(containerID, StateInfo{ - CommonStateInfo: CommonStateInfo{ - State: StateRestore, - Pid: container.systemPid, - }}) - - if err != nil { - container.closeFifos(iopipe) - return err - } - - if lastEvent != nil { - // This should only be a pause or resume event - if lastEvent.Type == StatePause || lastEvent.Type == StateResume { - return clnt.backend.StateChanged(containerID, StateInfo{ - CommonStateInfo: CommonStateInfo{ - State: lastEvent.Type, - Pid: container.systemPid, - }}) - } - - logrus.Warnf("libcontainerd: unexpected backlog event: %#v", lastEvent) - } - - return nil -} - -func (clnt *client) getContainerLastEventSinceTime(id string, tsp *timestamp.Timestamp) (*containerd.Event, error) { - er := &containerd.EventsRequest{ - Timestamp: tsp, - StoredOnly: true, - Id: id, - } - events, err := clnt.remote.apiClient.Events(context.Background(), er) - if err != nil { - logrus.Errorf("libcontainerd: failed to get container events stream for %s: %q", er.Id, err) - return nil, err - } - - var ev *containerd.Event - for { - e, err := events.Recv() - if err != nil { - if err.Error() == "EOF" { - break - } - logrus.Errorf("libcontainerd: failed to get container event for %s: %q", id, err) - return nil, err - } - ev = e - logrus.Debugf("libcontainerd: received past event %#v", ev) - } - - return ev, nil -} - -func (clnt *client) getContainerLastEvent(id string) (*containerd.Event, error) { - ev, err := clnt.getContainerLastEventSinceTime(id, clnt.remote.restoreFromTimestamp) - if err == nil && ev == nil { - // If ev is nil and the container is running in containerd, - // we already consumed all the event of the - // container, included the "exit" one. - // Thus, we request all events containerd has in memory for - // this container in order to get the last one (which should - // be an exit event) - logrus.Warnf("libcontainerd: client is out of sync, restore was called on a fully synced container (%s).", id) - // Request all events since beginning of time - t := time.Unix(0, 0) - tsp, err := ptypes.TimestampProto(t) - if err != nil { - logrus.Errorf("libcontainerd: getLastEventSinceTime() failed to convert timestamp: %q", err) - return nil, err - } - - return clnt.getContainerLastEventSinceTime(id, tsp) - } - - return ev, err -} - -func (clnt *client) Restore(containerID string, attachStdio StdioCallback, options ...CreateOption) error { - // Synchronize with live events - clnt.remote.Lock() - defer clnt.remote.Unlock() - // Check that containerd still knows this container. - // - // In the unlikely event that Restore for this container process - // the its past event before the main loop, the event will be - // processed twice. However, this is not an issue as all those - // events will do is change the state of the container to be - // exactly the same. - cont, err := clnt.getContainerdContainer(containerID) - // Get its last event - ev, eerr := clnt.getContainerLastEvent(containerID) - if err != nil || containerd_runtime_types.State(cont.Status) == containerd_runtime_types.Stopped { - if err != nil { - logrus.Warnf("libcontainerd: failed to retrieve container %s state: %v", containerID, err) - } - if ev != nil && (ev.Pid != InitFriendlyName || ev.Type != StateExit) { - // Wait a while for the exit event - timeout := time.NewTimer(10 * time.Second) - tick := time.NewTicker(100 * time.Millisecond) - stop: - for { - select { - case <-timeout.C: - break stop - case <-tick.C: - ev, eerr = clnt.getContainerLastEvent(containerID) - if eerr != nil { - break stop - } - if ev != nil && ev.Pid == InitFriendlyName && ev.Type == StateExit { - break stop - } - } - } - timeout.Stop() - tick.Stop() - } - - // get the exit status for this container, if we don't have - // one, indicate an error - ec := uint32(255) - if eerr == nil && ev != nil && ev.Pid == InitFriendlyName && ev.Type == StateExit { - ec = ev.Status - } - clnt.setExited(containerID, ec) - - return nil - } - - // container is still alive - if clnt.liveRestore { - if err := clnt.restore(cont, ev, attachStdio, options...); err != nil { - logrus.Errorf("libcontainerd: error restoring %s: %v", containerID, err) - } - return nil - } - - // Kill the container if liveRestore == false - w := clnt.getOrCreateExitNotifier(containerID) - clnt.lock(cont.Id) - container := clnt.newContainer(cont.BundlePath) - container.systemPid = systemPid(cont) - clnt.appendContainer(container) - clnt.unlock(cont.Id) - - container.discardFifos() - - if err := clnt.Signal(containerID, int(unix.SIGTERM)); err != nil { - logrus.Errorf("libcontainerd: error sending sigterm to %v: %v", containerID, err) - } - - // Let the main loop handle the exit event - clnt.remote.Unlock() - - if ev != nil && ev.Type == StatePause { - // resume container, it depends on the main loop, so we do it after Unlock() - logrus.Debugf("libcontainerd: %s was paused, resuming it so it can die", containerID) - if err := clnt.Resume(containerID); err != nil { - return fmt.Errorf("failed to resume container: %v", err) - } - } - - select { - case <-time.After(10 * time.Second): - if err := clnt.Signal(containerID, int(unix.SIGKILL)); err != nil { - logrus.Errorf("libcontainerd: error sending sigkill to %v: %v", containerID, err) - } - select { - case <-time.After(2 * time.Second): - case <-w.wait(): - // relock because of the defer - clnt.remote.Lock() - return nil - } - case <-w.wait(): - // relock because of the defer - clnt.remote.Lock() - return nil - } - // relock because of the defer - clnt.remote.Lock() - - clnt.deleteContainer(containerID) - - return clnt.setExited(containerID, uint32(255)) -} - -func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error { - clnt.lock(containerID) - defer clnt.unlock(containerID) - if _, err := clnt.getContainer(containerID); err != nil { - return err - } - - _, err := clnt.remote.apiClient.CreateCheckpoint(context.Background(), &containerd.CreateCheckpointRequest{ - Id: containerID, - Checkpoint: &containerd.Checkpoint{ - Name: checkpointID, - Exit: exit, - Tcp: true, - UnixSockets: true, - Shell: false, - EmptyNS: []string{"network"}, - }, - CheckpointDir: checkpointDir, - }) - return err -} - -func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error { - clnt.lock(containerID) - defer clnt.unlock(containerID) - if _, err := clnt.getContainer(containerID); err != nil { - return err - } - - _, err := clnt.remote.apiClient.DeleteCheckpoint(context.Background(), &containerd.DeleteCheckpointRequest{ - Id: containerID, - Name: checkpointID, - CheckpointDir: checkpointDir, - }) - return err -} - -func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) { - clnt.lock(containerID) - defer clnt.unlock(containerID) - if _, err := clnt.getContainer(containerID); err != nil { - return nil, err - } - - resp, err := clnt.remote.apiClient.ListCheckpoint(context.Background(), &containerd.ListCheckpointRequest{ - Id: containerID, - CheckpointDir: checkpointDir, - }) - if err != nil { - return nil, err - } - return (*Checkpoints)(resp), nil -} diff --git a/libcontainerd/client_local_windows.go b/libcontainerd/client_local_windows.go new file mode 100644 index 0000000000..209b00db67 --- /dev/null +++ b/libcontainerd/client_local_windows.go @@ -0,0 +1,1340 @@ +package libcontainerd + +import ( + "context" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "os" + "path" + "path/filepath" + "regexp" + "strings" + "sync" + "syscall" + "time" + + "github.com/Microsoft/hcsshim" + opengcs "github.com/Microsoft/opengcs/client" + "github.com/docker/docker/pkg/sysinfo" + "github.com/docker/docker/pkg/system" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sys/windows" +) + +const InitProcessName = "init" + +type process struct { + id string + pid int + hcsProcess hcsshim.Process +} + +type container struct { + sync.Mutex + + // The ociSpec is required, as client.Create() needs a spec, but can + // be called from the RestartManager context which does not otherwise + // have access to the Spec + ociSpec *specs.Spec + + isWindows bool + manualStopRequested bool + hcsContainer hcsshim.Container + + id string + status Status + exitedAt time.Time + exitCode uint32 + waitCh chan struct{} + init *process + execs map[string]*process + updatePending bool +} + +// Win32 error codes that are used for various workarounds +// These really should be ALL_CAPS to match golangs syscall library and standard +// Win32 error conventions, but golint insists on CamelCase. +const ( + CoEClassstring = syscall.Errno(0x800401F3) // Invalid class string + ErrorNoNetwork = syscall.Errno(1222) // The network is not present or not started + ErrorBadPathname = syscall.Errno(161) // The specified path is invalid + ErrorInvalidObject = syscall.Errno(0x800710D8) // The object identifier does not represent a valid object +) + +// defaultOwner is a tag passed to HCS to allow it to differentiate between +// container creator management stacks. We hard code "docker" in the case +// of docker. +const defaultOwner = "docker" + +// Create is the entrypoint to create a container from a spec. +// Table below shows the fields required for HCS JSON calling parameters, +// where if not populated, is omitted. +// +-----------------+--------------------------------------------+---------------------------------------------------+ +// | | Isolation=Process | Isolation=Hyper-V | +// +-----------------+--------------------------------------------+---------------------------------------------------+ +// | VolumePath | \\?\\Volume{GUIDa} | | +// | LayerFolderPath | %root%\windowsfilter\containerID | %root%\windowsfilter\containerID (servicing only) | +// | Layers[] | ID=GUIDb;Path=%root%\windowsfilter\layerID | ID=GUIDb;Path=%root%\windowsfilter\layerID | +// | HvRuntime | | ImagePath=%root%\BaseLayerID\UtilityVM | +// +-----------------+--------------------------------------------+---------------------------------------------------+ +// +// Isolation=Process example: +// +// { +// "SystemType": "Container", +// "Name": "5e0055c814a6005b8e57ac59f9a522066e0af12b48b3c26a9416e23907698776", +// "Owner": "docker", +// "VolumePath": "\\\\\\\\?\\\\Volume{66d1ef4c-7a00-11e6-8948-00155ddbef9d}", +// "IgnoreFlushesDuringBoot": true, +// "LayerFolderPath": "C:\\\\control\\\\windowsfilter\\\\5e0055c814a6005b8e57ac59f9a522066e0af12b48b3c26a9416e23907698776", +// "Layers": [{ +// "ID": "18955d65-d45a-557b-bf1c-49d6dfefc526", +// "Path": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c" +// }], +// "HostName": "5e0055c814a6", +// "MappedDirectories": [], +// "HvPartition": false, +// "EndpointList": ["eef2649d-bb17-4d53-9937-295a8efe6f2c"], +// "Servicing": false +//} +// +// Isolation=Hyper-V example: +// +//{ +// "SystemType": "Container", +// "Name": "475c2c58933b72687a88a441e7e0ca4bd72d76413c5f9d5031fee83b98f6045d", +// "Owner": "docker", +// "IgnoreFlushesDuringBoot": true, +// "Layers": [{ +// "ID": "18955d65-d45a-557b-bf1c-49d6dfefc526", +// "Path": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c" +// }], +// "HostName": "475c2c58933b", +// "MappedDirectories": [], +// "HvPartition": true, +// "EndpointList": ["e1bb1e61-d56f-405e-b75d-fd520cefa0cb"], +// "DNSSearchList": "a.com,b.com,c.com", +// "HvRuntime": { +// "ImagePath": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c\\\\UtilityVM" +// }, +// "Servicing": false +//} +func (c *client) Create(_ context.Context, id string, spec *specs.Spec, runtimeOptions interface{}) error { + if ctr := c.getContainer(id); ctr != nil { + return errors.WithStack(newConflictError("id already in use")) + } + + // spec.Linux must be nil for Windows containers, but spec.Windows + // will be filled in regardless of container platform. This is a + // temporary workaround due to LCOW requiring layer folder paths, + // which are stored under spec.Windows. + // + // TODO: @darrenstahlmsft fix this once the OCI spec is updated to + // support layer folder paths for LCOW + if spec.Linux == nil { + return c.createWindows(id, spec, runtimeOptions) + } + return c.createLinux(id, spec, runtimeOptions) +} + +func (c *client) createWindows(id string, spec *specs.Spec, runtimeOptions interface{}) error { + logger := c.logger.WithField("container", id) + configuration := &hcsshim.ContainerConfig{ + SystemType: "Container", + Name: id, + Owner: defaultOwner, + IgnoreFlushesDuringBoot: spec.Windows.IgnoreFlushesDuringBoot, + HostName: spec.Hostname, + HvPartition: false, + Servicing: spec.Windows.Servicing, + } + + if spec.Windows.Resources != nil { + if spec.Windows.Resources.CPU != nil { + if spec.Windows.Resources.CPU.Count != nil { + // This check is being done here rather than in adaptContainerSettings + // because we don't want to update the HostConfig in case this container + // is moved to a host with more CPUs than this one. + cpuCount := *spec.Windows.Resources.CPU.Count + hostCPUCount := uint64(sysinfo.NumCPU()) + if cpuCount > hostCPUCount { + c.logger.Warnf("Changing requested CPUCount of %d to current number of processors, %d", cpuCount, hostCPUCount) + cpuCount = hostCPUCount + } + configuration.ProcessorCount = uint32(cpuCount) + } + if spec.Windows.Resources.CPU.Shares != nil { + configuration.ProcessorWeight = uint64(*spec.Windows.Resources.CPU.Shares) + } + if spec.Windows.Resources.CPU.Maximum != nil { + configuration.ProcessorMaximum = int64(*spec.Windows.Resources.CPU.Maximum) + } + } + if spec.Windows.Resources.Memory != nil { + if spec.Windows.Resources.Memory.Limit != nil { + configuration.MemoryMaximumInMB = int64(*spec.Windows.Resources.Memory.Limit) / 1024 / 1024 + } + } + if spec.Windows.Resources.Storage != nil { + if spec.Windows.Resources.Storage.Bps != nil { + configuration.StorageBandwidthMaximum = *spec.Windows.Resources.Storage.Bps + } + if spec.Windows.Resources.Storage.Iops != nil { + configuration.StorageIOPSMaximum = *spec.Windows.Resources.Storage.Iops + } + } + } + + if spec.Windows.HyperV != nil { + configuration.HvPartition = true + } + + if spec.Windows.Network != nil { + configuration.EndpointList = spec.Windows.Network.EndpointList + configuration.AllowUnqualifiedDNSQuery = spec.Windows.Network.AllowUnqualifiedDNSQuery + if spec.Windows.Network.DNSSearchList != nil { + configuration.DNSSearchList = strings.Join(spec.Windows.Network.DNSSearchList, ",") + } + configuration.NetworkSharedContainerName = spec.Windows.Network.NetworkSharedContainerName + } + + if cs, ok := spec.Windows.CredentialSpec.(string); ok { + configuration.Credentials = cs + } + + // We must have least two layers in the spec, the bottom one being a + // base image, the top one being the RW layer. + if spec.Windows.LayerFolders == nil || len(spec.Windows.LayerFolders) < 2 { + return fmt.Errorf("OCI spec is invalid - at least two LayerFolders must be supplied to the runtime") + } + + // Strip off the top-most layer as that's passed in separately to HCS + configuration.LayerFolderPath = spec.Windows.LayerFolders[len(spec.Windows.LayerFolders)-1] + layerFolders := spec.Windows.LayerFolders[:len(spec.Windows.LayerFolders)-1] + + if configuration.HvPartition { + // We don't currently support setting the utility VM image explicitly. + // TODO @swernli/jhowardmsft circa RS3/4, this may be re-locatable. + if spec.Windows.HyperV.UtilityVMPath != "" { + return errors.New("runtime does not support an explicit utility VM path for Hyper-V containers") + } + + // Find the upper-most utility VM image. + var uvmImagePath string + for _, path := range layerFolders { + fullPath := filepath.Join(path, "UtilityVM") + _, err := os.Stat(fullPath) + if err == nil { + uvmImagePath = fullPath + break + } + if !os.IsNotExist(err) { + return err + } + } + if uvmImagePath == "" { + return errors.New("utility VM image could not be found") + } + configuration.HvRuntime = &hcsshim.HvRuntime{ImagePath: uvmImagePath} + + if spec.Root.Path != "" { + return errors.New("OCI spec is invalid - Root.Path must be omitted for a Hyper-V container") + } + } else { + const volumeGUIDRegex = `^\\\\\?\\(Volume)\{{0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}(\}){0,1}\}\\$` + if _, err := regexp.MatchString(volumeGUIDRegex, spec.Root.Path); err != nil { + return fmt.Errorf(`OCI spec is invalid - Root.Path '%s' must be a volume GUID path in the format '\\?\Volume{GUID}\'`, spec.Root.Path) + } + // HCS API requires the trailing backslash to be removed + configuration.VolumePath = spec.Root.Path[:len(spec.Root.Path)-1] + } + + if spec.Root.Readonly { + return errors.New(`OCI spec is invalid - Root.Readonly must not be set on Windows`) + } + + for _, layerPath := range layerFolders { + _, filename := filepath.Split(layerPath) + g, err := hcsshim.NameToGuid(filename) + if err != nil { + return err + } + configuration.Layers = append(configuration.Layers, hcsshim.Layer{ + ID: g.ToString(), + Path: layerPath, + }) + } + + // Add the mounts (volumes, bind mounts etc) to the structure + var mds []hcsshim.MappedDir + var mps []hcsshim.MappedPipe + for _, mount := range spec.Mounts { + const pipePrefix = `\\.\pipe\` + if mount.Type != "" { + return fmt.Errorf("OCI spec is invalid - Mount.Type '%s' must not be set", mount.Type) + } + if strings.HasPrefix(mount.Destination, pipePrefix) { + mp := hcsshim.MappedPipe{ + HostPath: mount.Source, + ContainerPipeName: mount.Destination[len(pipePrefix):], + } + mps = append(mps, mp) + } else { + md := hcsshim.MappedDir{ + HostPath: mount.Source, + ContainerPath: mount.Destination, + ReadOnly: false, + } + for _, o := range mount.Options { + if strings.ToLower(o) == "ro" { + md.ReadOnly = true + } + } + mds = append(mds, md) + } + } + configuration.MappedDirectories = mds + if len(mps) > 0 && system.GetOSVersion().Build < 16210 { // replace with Win10 RS3 build number at RTM + return errors.New("named pipe mounts are not supported on this version of Windows") + } + configuration.MappedPipes = mps + + hcsContainer, err := hcsshim.CreateContainer(id, configuration) + if err != nil { + return err + } + + // Construct a container object for calling start on it. + ctr := &container{ + id: id, + execs: make(map[string]*process), + isWindows: true, + ociSpec: spec, + hcsContainer: hcsContainer, + status: StatusCreated, + waitCh: make(chan struct{}), + } + + // Start the container. If this is a servicing container, this call + // will block until the container is done with the servicing + // execution. + logger.Debug("starting container") + if err = hcsContainer.Start(); err != nil { + c.logger.WithError(err).Error("failed to start container") + ctr.debugGCS() + if err := c.terminateContainer(ctr); err != nil { + c.logger.WithError(err).Error("failed to cleanup after a failed Start") + } else { + c.logger.Debug("cleaned up after failed Start by calling Terminate") + } + return err + } + ctr.debugGCS() + + c.Lock() + c.containers[id] = ctr + c.Unlock() + + logger.Debug("createWindows() completed successfully") + return nil + +} + +func (c *client) createLinux(id string, spec *specs.Spec, runtimeOptions interface{}) error { + logrus.Debugf("libcontainerd: createLinux(): containerId %s ", id) + logger := c.logger.WithField("container", id) + + if runtimeOptions == nil { + return fmt.Errorf("lcow option must be supplied to the runtime") + } + lcowConfig, ok := runtimeOptions.(*opengcs.Config) + if !ok { + return fmt.Errorf("lcow option must be supplied to the runtime") + } + + configuration := &hcsshim.ContainerConfig{ + HvPartition: true, + Name: id, + SystemType: "container", + ContainerType: "linux", + Owner: defaultOwner, + TerminateOnLastHandleClosed: true, + } + + if lcowConfig.ActualMode == opengcs.ModeActualVhdx { + configuration.HvRuntime = &hcsshim.HvRuntime{ + ImagePath: lcowConfig.Vhdx, + BootSource: "Vhd", + WritableBootSource: false, + } + } else { + configuration.HvRuntime = &hcsshim.HvRuntime{ + ImagePath: lcowConfig.KirdPath, + LinuxKernelFile: lcowConfig.KernelFile, + LinuxInitrdFile: lcowConfig.InitrdFile, + LinuxBootParameters: lcowConfig.BootParameters, + } + } + + if spec.Windows == nil { + return fmt.Errorf("spec.Windows must not be nil for LCOW containers") + } + + // We must have least one layer in the spec + if spec.Windows.LayerFolders == nil || len(spec.Windows.LayerFolders) == 0 { + return fmt.Errorf("OCI spec is invalid - at least one LayerFolders must be supplied to the runtime") + } + + // Strip off the top-most layer as that's passed in separately to HCS + configuration.LayerFolderPath = spec.Windows.LayerFolders[len(spec.Windows.LayerFolders)-1] + layerFolders := spec.Windows.LayerFolders[:len(spec.Windows.LayerFolders)-1] + + for _, layerPath := range layerFolders { + _, filename := filepath.Split(layerPath) + g, err := hcsshim.NameToGuid(filename) + if err != nil { + return err + } + configuration.Layers = append(configuration.Layers, hcsshim.Layer{ + ID: g.ToString(), + Path: filepath.Join(layerPath, "layer.vhd"), + }) + } + + if spec.Windows.Network != nil { + configuration.EndpointList = spec.Windows.Network.EndpointList + configuration.AllowUnqualifiedDNSQuery = spec.Windows.Network.AllowUnqualifiedDNSQuery + if spec.Windows.Network.DNSSearchList != nil { + configuration.DNSSearchList = strings.Join(spec.Windows.Network.DNSSearchList, ",") + } + configuration.NetworkSharedContainerName = spec.Windows.Network.NetworkSharedContainerName + } + + // Add the mounts (volumes, bind mounts etc) to the structure. We have to do + // some translation for both the mapped directories passed into HCS and in + // the spec. + // + // For HCS, we only pass in the mounts from the spec which are type "bind". + // Further, the "ContainerPath" field (which is a little mis-leadingly + // named when it applies to the utility VM rather than the container in the + // utility VM) is moved to under /tmp/gcs//binds, where this is passed + // by the caller through a 'uvmpath' option. + // + // We do similar translation for the mounts in the spec by stripping out + // the uvmpath option, and translating the Source path to the location in the + // utility VM calculated above. + // + // From inside the utility VM, you would see a 9p mount such as in the following + // where a host folder has been mapped to /target. The line with /tmp/gcs//binds + // specifically: + // + // / # mount + // rootfs on / type rootfs (rw,size=463736k,nr_inodes=115934) + // proc on /proc type proc (rw,relatime) + // sysfs on /sys type sysfs (rw,relatime) + // udev on /dev type devtmpfs (rw,relatime,size=498100k,nr_inodes=124525,mode=755) + // tmpfs on /run type tmpfs (rw,relatime) + // cgroup on /sys/fs/cgroup type cgroup (rw,relatime,cpuset,cpu,cpuacct,blkio,memory,devices,freezer,net_cls,perf_event,net_prio,hugetlb,pids,rdma) + // mqueue on /dev/mqueue type mqueue (rw,relatime) + // devpts on /dev/pts type devpts (rw,relatime,mode=600,ptmxmode=000) + // /binds/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/target on /binds/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/target type 9p (rw,sync,dirsync,relatime,trans=fd,rfdno=6,wfdno=6) + // /dev/pmem0 on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/layer0 type ext4 (ro,relatime,block_validity,delalloc,norecovery,barrier,dax,user_xattr,acl) + // /dev/sda on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch type ext4 (rw,relatime,block_validity,delalloc,barrier,user_xattr,acl) + // overlay on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/rootfs type overlay (rw,relatime,lowerdir=/tmp/base/:/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/layer0,upperdir=/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch/upper,workdir=/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch/work) + // + // /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc # ls -l + // total 16 + // drwx------ 3 0 0 60 Sep 7 18:54 binds + // -rw-r--r-- 1 0 0 3345 Sep 7 18:54 config.json + // drwxr-xr-x 10 0 0 4096 Sep 6 17:26 layer0 + // drwxr-xr-x 1 0 0 4096 Sep 7 18:54 rootfs + // drwxr-xr-x 5 0 0 4096 Sep 7 18:54 scratch + // + // /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc # ls -l binds + // total 0 + // drwxrwxrwt 2 0 0 4096 Sep 7 16:51 target + + mds := []hcsshim.MappedDir{} + specMounts := []specs.Mount{} + for _, mount := range spec.Mounts { + specMount := mount + if mount.Type == "bind" { + // Strip out the uvmpath from the options + updatedOptions := []string{} + uvmPath := "" + readonly := false + for _, opt := range mount.Options { + dropOption := false + elements := strings.SplitN(opt, "=", 2) + switch elements[0] { + case "uvmpath": + uvmPath = elements[1] + dropOption = true + case "rw": + case "ro": + readonly = true + case "rbind": + default: + return fmt.Errorf("unsupported option %q", opt) + } + if !dropOption { + updatedOptions = append(updatedOptions, opt) + } + } + mount.Options = updatedOptions + if uvmPath == "" { + return fmt.Errorf("no uvmpath for bind mount %+v", mount) + } + md := hcsshim.MappedDir{ + HostPath: mount.Source, + ContainerPath: path.Join(uvmPath, mount.Destination), + CreateInUtilityVM: true, + ReadOnly: readonly, + } + mds = append(mds, md) + specMount.Source = path.Join(uvmPath, mount.Destination) + } + specMounts = append(specMounts, specMount) + } + configuration.MappedDirectories = mds + + hcsContainer, err := hcsshim.CreateContainer(id, configuration) + if err != nil { + return err + } + + spec.Mounts = specMounts + + // Construct a container object for calling start on it. + ctr := &container{ + id: id, + execs: make(map[string]*process), + isWindows: true, + ociSpec: spec, + hcsContainer: hcsContainer, + status: StatusCreated, + waitCh: make(chan struct{}), + } + + // Start the container. If this is a servicing container, this call + // will block until the container is done with the servicing + // execution. + logger.Debug("starting container") + if err = hcsContainer.Start(); err != nil { + c.logger.WithError(err).Error("failed to start container") + ctr.debugGCS() + if err := c.terminateContainer(ctr); err != nil { + c.logger.WithError(err).Error("failed to cleanup after a failed Start") + } else { + c.logger.Debug("cleaned up after failed Start by calling Terminate") + } + return err + } + ctr.debugGCS() + + c.Lock() + c.containers[id] = ctr + c.Unlock() + + c.eventQ.append(id, func() { + ei := EventInfo{ + ContainerID: id, + } + c.logger.WithFields(logrus.Fields{ + "container": ctr.id, + "event": EventCreate, + }).Info("sending event") + err := c.backend.ProcessEvent(id, EventCreate, ei) + if err != nil { + c.logger.WithError(err).WithFields(logrus.Fields{ + "container": id, + "event": EventCreate, + }).Error("failed to process event") + } + }) + + logger.Debug("createLinux() completed successfully") + return nil +} + +func (c *client) Start(_ context.Context, id, _ string, withStdin bool, attachStdio StdioCallback) (int, error) { + ctr := c.getContainer(id) + switch { + case ctr == nil: + return -1, errors.WithStack(newNotFoundError("no such container")) + case ctr.init != nil: + return -1, errors.WithStack(newConflictError("container already started")) + } + + logger := c.logger.WithField("container", id) + + // Note we always tell HCS to create stdout as it's required + // regardless of '-i' or '-t' options, so that docker can always grab + // the output through logs. We also tell HCS to always create stdin, + // even if it's not used - it will be closed shortly. Stderr is only + // created if it we're not -t. + var ( + emulateConsole bool + createStdErrPipe bool + ) + if ctr.ociSpec.Process != nil { + emulateConsole = ctr.ociSpec.Process.Terminal + createStdErrPipe = !ctr.ociSpec.Process.Terminal && !ctr.ociSpec.Windows.Servicing + } + + createProcessParms := &hcsshim.ProcessConfig{ + EmulateConsole: emulateConsole, + WorkingDirectory: ctr.ociSpec.Process.Cwd, + CreateStdInPipe: !ctr.ociSpec.Windows.Servicing, + CreateStdOutPipe: !ctr.ociSpec.Windows.Servicing, + CreateStdErrPipe: createStdErrPipe, + } + + if ctr.ociSpec.Process != nil && ctr.ociSpec.Process.ConsoleSize != nil { + createProcessParms.ConsoleSize[0] = uint(ctr.ociSpec.Process.ConsoleSize.Height) + createProcessParms.ConsoleSize[1] = uint(ctr.ociSpec.Process.ConsoleSize.Width) + } + + // Configure the environment for the process + createProcessParms.Environment = setupEnvironmentVariables(ctr.ociSpec.Process.Env) + if ctr.isWindows { + createProcessParms.CommandLine = strings.Join(ctr.ociSpec.Process.Args, " ") + } else { + createProcessParms.CommandArgs = ctr.ociSpec.Process.Args + } + createProcessParms.User = ctr.ociSpec.Process.User.Username + + // LCOW requires the raw OCI spec passed through HCS and onwards to + // GCS for the utility VM. + if !ctr.isWindows { + ociBuf, err := json.Marshal(ctr.ociSpec) + if err != nil { + return -1, err + } + ociRaw := json.RawMessage(ociBuf) + createProcessParms.OCISpecification = &ociRaw + } + + ctr.Lock() + defer ctr.Unlock() + + // Start the command running in the container. + newProcess, err := ctr.hcsContainer.CreateProcess(createProcessParms) + if err != nil { + logger.WithError(err).Error("CreateProcess() failed") + return -1, err + } + defer func() { + if err != nil { + if err := newProcess.Kill(); err != nil { + logger.WithError(err).Error("failed to kill process") + } + go func() { + if err := newProcess.Wait(); err != nil { + logger.WithError(err).Error("failed to wait for process") + } + if err := newProcess.Close(); err != nil { + logger.WithError(err).Error("failed to clean process resources") + } + }() + } + }() + p := &process{ + hcsProcess: newProcess, + id: InitProcessName, + pid: newProcess.Pid(), + } + logger.WithField("pid", p.pid).Debug("init process started") + + // If this is a servicing container, wait on the process synchronously here and + // if it succeeds, wait for it cleanly shutdown and merge into the parent container. + if ctr.ociSpec.Windows.Servicing { + // reapProcess takes the lock + ctr.Unlock() + defer ctr.Lock() + exitCode := c.reapProcess(ctr, p) + + if exitCode != 0 { + return -1, errors.Errorf("libcontainerd: servicing container %s returned non-zero exit code %d", ctr.id, exitCode) + } + + return p.pid, nil + } + + var ( + stdout, stderr io.ReadCloser + stdin io.WriteCloser + ) + stdin, stdout, stderr, err = newProcess.Stdio() + if err != nil { + logger.WithError(err).Error("failed to get stdio pipes") + return -1, err + } + + iopipe := &IOPipe{Terminal: ctr.ociSpec.Process.Terminal} + iopipe.Stdin = createStdInCloser(stdin, newProcess) + + // Convert io.ReadClosers to io.Readers + if stdout != nil { + iopipe.Stdout = ioutil.NopCloser(&autoClosingReader{ReadCloser: stdout}) + } + if stderr != nil { + iopipe.Stderr = ioutil.NopCloser(&autoClosingReader{ReadCloser: stderr}) + } + + _, err = attachStdio(iopipe) + if err != nil { + logger.WithError(err).Error("failed to attache stdio") + return -1, err + } + ctr.status = StatusRunning + ctr.init = p + + // Spin up a go routine waiting for exit to handle cleanup + go c.reapProcess(ctr, p) + + // Generate the associated event + c.eventQ.append(id, func() { + ei := EventInfo{ + ContainerID: id, + ProcessID: InitProcessName, + Pid: uint32(p.pid), + } + c.logger.WithFields(logrus.Fields{ + "container": ctr.id, + "event": EventStart, + "event-info": ei, + }).Info("sending event") + err := c.backend.ProcessEvent(ei.ContainerID, EventStart, ei) + if err != nil { + c.logger.WithError(err).WithFields(logrus.Fields{ + "container": id, + "event": EventStart, + "event-info": ei, + }).Error("failed to process event") + } + }) + logger.Debug("start() completed") + return p.pid, nil +} + +// Exec adds a process in an running container +func (c *client) Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio StdioCallback) (int, error) { + ctr := c.getContainer(containerID) + switch { + case ctr == nil: + return -1, errors.WithStack(newNotFoundError("no such container")) + case ctr.hcsContainer == nil: + return -1, errors.WithStack(newInvalidParameterError("container is not running")) + case ctr.execs != nil && ctr.execs[processID] != nil: + return -1, errors.WithStack(newConflictError("id already in use")) + } + logger := c.logger.WithFields(logrus.Fields{ + "container": containerID, + "exec": processID, + }) + + // Note we always tell HCS to + // create stdout as it's required regardless of '-i' or '-t' options, so that + // docker can always grab the output through logs. We also tell HCS to always + // create stdin, even if it's not used - it will be closed shortly. Stderr + // is only created if it we're not -t. + createProcessParms := hcsshim.ProcessConfig{ + CreateStdInPipe: true, + CreateStdOutPipe: true, + CreateStdErrPipe: !spec.Terminal, + } + if spec.Terminal { + createProcessParms.EmulateConsole = true + if spec.ConsoleSize != nil { + createProcessParms.ConsoleSize[0] = uint(spec.ConsoleSize.Height) + createProcessParms.ConsoleSize[1] = uint(spec.ConsoleSize.Width) + } + } + + // Take working directory from the process to add if it is defined, + // otherwise take from the first process. + if spec.Cwd != "" { + createProcessParms.WorkingDirectory = spec.Cwd + } else { + createProcessParms.WorkingDirectory = ctr.ociSpec.Process.Cwd + } + + // Configure the environment for the process + createProcessParms.Environment = setupEnvironmentVariables(spec.Env) + if ctr.isWindows { + createProcessParms.CommandLine = strings.Join(spec.Args, " ") + } else { + createProcessParms.CommandArgs = spec.Args + } + createProcessParms.User = spec.User.Username + + logger.Debugf("exec commandLine: %s", createProcessParms.CommandLine) + + // Start the command running in the container. + var ( + stdout, stderr io.ReadCloser + stdin io.WriteCloser + ) + newProcess, err := ctr.hcsContainer.CreateProcess(&createProcessParms) + if err != nil { + logger.WithError(err).Errorf("exec's CreateProcess() failed") + return -1, err + } + pid := newProcess.Pid() + defer func() { + if err != nil { + if err := newProcess.Kill(); err != nil { + logger.WithError(err).Error("failed to kill process") + } + go func() { + if err := newProcess.Wait(); err != nil { + logger.WithError(err).Error("failed to wait for process") + } + if err := newProcess.Close(); err != nil { + logger.WithError(err).Error("failed to clean process resources") + } + }() + } + }() + + stdin, stdout, stderr, err = newProcess.Stdio() + if err != nil { + logger.WithError(err).Error("getting std pipes failed") + return -1, err + } + + iopipe := &IOPipe{Terminal: spec.Terminal} + iopipe.Stdin = createStdInCloser(stdin, newProcess) + + // Convert io.ReadClosers to io.Readers + if stdout != nil { + iopipe.Stdout = ioutil.NopCloser(&autoClosingReader{ReadCloser: stdout}) + } + if stderr != nil { + iopipe.Stderr = ioutil.NopCloser(&autoClosingReader{ReadCloser: stderr}) + } + + // Tell the engine to attach streams back to the client + _, err = attachStdio(iopipe) + if err != nil { + return -1, err + } + + p := &process{ + id: processID, + pid: pid, + hcsProcess: newProcess, + } + + // Add the process to the container's list of processes + ctr.Lock() + ctr.execs[processID] = p + ctr.Unlock() + + // Spin up a go routine waiting for exit to handle cleanup + go c.reapProcess(ctr, p) + + c.eventQ.append(ctr.id, func() { + ei := EventInfo{ + ContainerID: ctr.id, + ProcessID: p.id, + Pid: uint32(p.pid), + } + c.logger.WithFields(logrus.Fields{ + "container": ctr.id, + "event": EventExecAdded, + "event-info": ei, + }).Info("sending event") + err := c.backend.ProcessEvent(ctr.id, EventExecAdded, ei) + if err != nil { + c.logger.WithError(err).WithFields(logrus.Fields{ + "container": ctr.id, + "event": EventExecAdded, + "event-info": ei, + }).Error("failed to process event") + } + err = c.backend.ProcessEvent(ctr.id, EventExecStarted, ei) + if err != nil { + c.logger.WithError(err).WithFields(logrus.Fields{ + "container": ctr.id, + "event": EventExecStarted, + "event-info": ei, + }).Error("failed to process event") + } + }) + + return pid, nil +} + +// Signal handles `docker stop` on Windows. While Linux has support for +// the full range of signals, signals aren't really implemented on Windows. +// We fake supporting regular stop and -9 to force kill. +func (c *client) SignalProcess(_ context.Context, containerID, processID string, signal int) error { + ctr, p, err := c.getProcess(containerID, processID) + if err != nil { + return err + } + + ctr.manualStopRequested = true + + logger := c.logger.WithFields(logrus.Fields{ + "container": containerID, + "process": processID, + "pid": p.pid, + "signal": signal, + }) + logger.Debug("Signal()") + + if processID == InitProcessName { + if syscall.Signal(signal) == syscall.SIGKILL { + // Terminate the compute system + if err := ctr.hcsContainer.Terminate(); err != nil { + if !hcsshim.IsPending(err) { + logger.WithError(err).Error("failed to terminate hccshim container") + } + } + } else { + // Shut down the container + if err := ctr.hcsContainer.Shutdown(); err != nil { + if !hcsshim.IsPending(err) && !hcsshim.IsAlreadyStopped(err) { + // ignore errors + logger.WithError(err).Error("failed to shutdown hccshim container") + } + } + } + } else { + return p.hcsProcess.Kill() + } + + return nil +} + +// Resize handles a CLI event to resize an interactive docker run or docker +// exec window. +func (c *client) ResizeTerminal(_ context.Context, containerID, processID string, width, height int) error { + _, p, err := c.getProcess(containerID, processID) + if err != nil { + return err + } + + c.logger.WithFields(logrus.Fields{ + "container": containerID, + "process": processID, + "height": height, + "width": width, + "pid": p.pid, + }).Debug("resizing") + return p.hcsProcess.ResizeConsole(uint16(height), uint16(width)) +} + +func (c *client) CloseStdin(_ context.Context, containerID, processID string) error { + _, p, err := c.getProcess(containerID, processID) + if err != nil { + return err + } + + return p.hcsProcess.CloseStdin() +} + +// Pause handles pause requests for containers +func (c *client) Pause(_ context.Context, containerID string) error { + ctr, _, err := c.getProcess(containerID, InitProcessName) + if err != nil { + return err + } + + if ctr.ociSpec.Windows.HyperV == nil { + return errors.New("cannot pause Windows Server Containers") + } + + ctr.Lock() + defer ctr.Unlock() + + if err = ctr.hcsContainer.Pause(); err != nil { + return err + } + + ctr.status = StatusPaused + + c.eventQ.append(containerID, func() { + err := c.backend.ProcessEvent(containerID, EventPaused, EventInfo{ + ContainerID: containerID, + ProcessID: InitProcessName, + }) + c.logger.WithFields(logrus.Fields{ + "container": ctr.id, + "event": EventPaused, + }).Info("sending event") + if err != nil { + c.logger.WithError(err).WithFields(logrus.Fields{ + "container": containerID, + "event": EventPaused, + }).Error("failed to process event") + } + }) + + return nil +} + +// Resume handles resume requests for containers +func (c *client) Resume(_ context.Context, containerID string) error { + ctr, _, err := c.getProcess(containerID, InitProcessName) + if err != nil { + return err + } + + if ctr.ociSpec.Windows.HyperV == nil { + return errors.New("cannot resume Windows Server Containers") + } + + ctr.Lock() + defer ctr.Unlock() + + if err = ctr.hcsContainer.Resume(); err != nil { + return err + } + + ctr.status = StatusRunning + + c.eventQ.append(containerID, func() { + err := c.backend.ProcessEvent(containerID, EventResumed, EventInfo{ + ContainerID: containerID, + ProcessID: InitProcessName, + }) + c.logger.WithFields(logrus.Fields{ + "container": ctr.id, + "event": EventResumed, + }).Info("sending event") + if err != nil { + c.logger.WithError(err).WithFields(logrus.Fields{ + "container": containerID, + "event": EventResumed, + }).Error("failed to process event") + } + }) + + return nil +} + +// Stats handles stats requests for containers +func (c *client) Stats(_ context.Context, containerID string) (*Stats, error) { + ctr, _, err := c.getProcess(containerID, InitProcessName) + if err != nil { + return nil, err + } + + readAt := time.Now() + s, err := ctr.hcsContainer.Statistics() + if err != nil { + return nil, err + } + return &Stats{ + Read: readAt, + HCSStats: &s, + }, nil +} + +// Restore is the handler for restoring a container +func (c *client) Restore(ctx context.Context, id string, attachStdio StdioCallback) (bool, int, error) { + c.logger.WithField("container", id).Debug("restore()") + + // TODO Windows: On RS1, a re-attach isn't possible. + // However, there is a scenario in which there is an issue. + // Consider a background container. The daemon dies unexpectedly. + // HCS will still have the compute service alive and running. + // For consistence, we call in to shoot it regardless if HCS knows about it + // We explicitly just log a warning if the terminate fails. + // Then we tell the backend the container exited. + if hc, err := hcsshim.OpenContainer(id); err == nil { + const terminateTimeout = time.Minute * 2 + err := hc.Terminate() + + if hcsshim.IsPending(err) { + err = hc.WaitTimeout(terminateTimeout) + } else if hcsshim.IsAlreadyStopped(err) { + err = nil + } + + if err != nil { + c.logger.WithField("container", id).WithError(err).Debug("terminate failed on restore") + return false, -1, err + } + } + return false, -1, nil +} + +// GetPidsForContainer returns a list of process IDs running in a container. +// Not used on Windows. +func (c *client) ListPids(_ context.Context, _ string) ([]uint32, error) { + return nil, errors.New("not implemented on Windows") +} + +// Summary returns a summary of the processes running in a container. +// This is present in Windows to support docker top. In linux, the +// engine shells out to ps to get process information. On Windows, as +// the containers could be Hyper-V containers, they would not be +// visible on the container host. However, libcontainerd does have +// that information. +func (c *client) Summary(_ context.Context, containerID string) ([]Summary, error) { + ctr, _, err := c.getProcess(containerID, InitProcessName) + if err != nil { + return nil, err + } + + p, err := ctr.hcsContainer.ProcessList() + if err != nil { + return nil, err + } + + pl := make([]Summary, len(p)) + for i := range p { + pl[i] = Summary(p[i]) + } + return pl, nil +} + +func (c *client) DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error) { + ec := -1 + ctr := c.getContainer(containerID) + if ctr == nil { + return uint32(ec), time.Now(), errors.WithStack(newNotFoundError("no such container")) + } + + select { + case <-ctx.Done(): + return uint32(ec), time.Now(), errors.WithStack(ctx.Err()) + case <-ctr.waitCh: + default: + return uint32(ec), time.Now(), errors.New("container is not stopped") + } + + ctr.Lock() + defer ctr.Unlock() + return ctr.exitCode, ctr.exitedAt, nil +} + +func (c *client) Delete(_ context.Context, containerID string) error { + c.Lock() + defer c.Unlock() + ctr := c.containers[containerID] + if ctr == nil { + return errors.WithStack(newNotFoundError("no such container")) + } + + ctr.Lock() + defer ctr.Unlock() + + switch ctr.status { + case StatusCreated: + if err := c.shutdownContainer(ctr); err != nil { + return err + } + fallthrough + case StatusStopped: + delete(c.containers, containerID) + return nil + } + + return errors.WithStack(newInvalidParameterError("container is not stopped")) +} + +func (c *client) Status(ctx context.Context, containerID string) (Status, error) { + c.Lock() + defer c.Unlock() + ctr := c.containers[containerID] + if ctr == nil { + return StatusUnknown, errors.WithStack(newNotFoundError("no such container")) + } + + ctr.Lock() + defer ctr.Unlock() + return ctr.status, nil +} + +func (c *client) UpdateResources(ctx context.Context, containerID string, resources *Resources) error { + // Updating resource isn't supported on Windows + // but we should return nil for enabling updating container + return nil +} + +func (c *client) CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error { + return errors.New("Windows: Containers do not support checkpoints") +} + +func (c *client) getContainer(id string) *container { + c.Lock() + ctr := c.containers[id] + c.Unlock() + + return ctr +} + +func (c *client) getProcess(containerID, processID string) (*container, *process, error) { + ctr := c.getContainer(containerID) + switch { + case ctr == nil: + return nil, nil, errors.WithStack(newNotFoundError("no such container")) + case ctr.init == nil: + return nil, nil, errors.WithStack(newNotFoundError("container is not running")) + case processID == InitProcessName: + return ctr, ctr.init, nil + default: + ctr.Lock() + defer ctr.Unlock() + if ctr.execs == nil { + return nil, nil, errors.WithStack(newNotFoundError("no execs")) + } + } + + p := ctr.execs[processID] + if p == nil { + return nil, nil, errors.WithStack(newNotFoundError("no such exec")) + } + + return ctr, p, nil +} + +func (c *client) shutdownContainer(ctr *container) error { + const shutdownTimeout = time.Minute * 5 + err := ctr.hcsContainer.Shutdown() + + if hcsshim.IsPending(err) { + err = ctr.hcsContainer.WaitTimeout(shutdownTimeout) + } else if hcsshim.IsAlreadyStopped(err) { + err = nil + } + + if err != nil { + c.logger.WithError(err).WithField("container", ctr.id). + Debug("failed to shutdown container, terminating it") + return c.terminateContainer(ctr) + } + + return nil +} + +func (c *client) terminateContainer(ctr *container) error { + const terminateTimeout = time.Minute * 5 + err := ctr.hcsContainer.Terminate() + + if hcsshim.IsPending(err) { + err = ctr.hcsContainer.WaitTimeout(terminateTimeout) + } else if hcsshim.IsAlreadyStopped(err) { + err = nil + } + + if err != nil { + c.logger.WithError(err).WithField("container", ctr.id). + Debug("failed to terminate container") + return err + } + + return nil +} + +func (c *client) reapProcess(ctr *container, p *process) int { + logger := c.logger.WithFields(logrus.Fields{ + "container": ctr.id, + "process": p.id, + }) + + // Block indefinitely for the process to exit. + if err := p.hcsProcess.Wait(); err != nil { + if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE { + logger.WithError(err).Warnf("Wait() failed (container may have been killed)") + } + // Fall through here, do not return. This ensures we attempt to + // continue the shutdown in HCS and tell the docker engine that the + // process/container has exited to avoid a container being dropped on + // the floor. + } + exitedAt := time.Now() + + exitCode, err := p.hcsProcess.ExitCode() + if err != nil { + if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE { + logger.WithError(err).Warnf("unable to get exit code for process") + } + // Since we got an error retrieving the exit code, make sure that the + // code we return doesn't incorrectly indicate success. + exitCode = -1 + + // Fall through here, do not return. This ensures we attempt to + // continue the shutdown in HCS and tell the docker engine that the + // process/container has exited to avoid a container being dropped on + // the floor. + } + + if err := p.hcsProcess.Close(); err != nil { + logger.WithError(err).Warnf("failed to cleanup hcs process resources") + } + + var pendingUpdates bool + if p.id == InitProcessName { + // Update container status + ctr.Lock() + ctr.status = StatusStopped + ctr.exitedAt = exitedAt + ctr.exitCode = uint32(exitCode) + close(ctr.waitCh) + ctr.Unlock() + + // Handle any servicing + if exitCode == 0 && ctr.isWindows && !ctr.ociSpec.Windows.Servicing { + pendingUpdates, err = ctr.hcsContainer.HasPendingUpdates() + logger.Infof("Pending updates: %v", pendingUpdates) + if err != nil { + logger.WithError(err). + Warnf("failed to check for pending updates (container may have been killed)") + } + } + + if err := c.shutdownContainer(ctr); err != nil { + logger.WithError(err).Warn("failed to shutdown container") + } else { + logger.Debug("completed container shutdown") + } + + if err := ctr.hcsContainer.Close(); err != nil { + logger.WithError(err).Error("failed to clean hcs container resources") + } + } + + if !(ctr.isWindows && ctr.ociSpec.Windows.Servicing) { + c.eventQ.append(ctr.id, func() { + ei := EventInfo{ + ContainerID: ctr.id, + ProcessID: p.id, + Pid: uint32(p.pid), + ExitCode: uint32(exitCode), + ExitedAt: exitedAt, + UpdatePending: pendingUpdates, + } + c.logger.WithFields(logrus.Fields{ + "container": ctr.id, + "event": EventExit, + "event-info": ei, + }).Info("sending event") + err := c.backend.ProcessEvent(ctr.id, EventExit, ei) + if err != nil { + c.logger.WithError(err).WithFields(logrus.Fields{ + "container": ctr.id, + "event": EventExit, + "event-info": ei, + }).Error("failed to process event") + } + if p.id != InitProcessName { + ctr.Lock() + delete(ctr.execs, p.id) + ctr.Unlock() + } + }) + } + + return exitCode +} diff --git a/libcontainerd/client_solaris.go b/libcontainerd/client_solaris.go deleted file mode 100644 index c54cea3bfa..0000000000 --- a/libcontainerd/client_solaris.go +++ /dev/null @@ -1,104 +0,0 @@ -package libcontainerd - -import ( - containerd "github.com/containerd/containerd/api/grpc/types" - "golang.org/x/net/context" -) - -type client struct { - clientCommon - - // Platform specific properties below here. - remote *remote - q queue - exitNotifiers map[string]*exitNotifier - liveRestore bool -} - -// GetServerVersion returns the connected server version information -func (clnt *client) GetServerVersion(ctx context.Context) (*ServerVersion, error) { - resp, err := clnt.remote.apiClient.GetServerVersion(ctx, &containerd.GetServerVersionRequest{}) - if err != nil { - return nil, err - } - - sv := &ServerVersion{ - GetServerVersionResponse: *resp, - } - - return sv, nil -} - -func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendlyName string, specp Process, attachStdio StdioCallback) (int, error) { - return -1, nil -} - -func (clnt *client) SignalProcess(containerID string, pid string, sig int) error { - return nil -} - -func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error { - return nil -} - -func (clnt *client) Pause(containerID string) error { - return nil -} - -func (clnt *client) Resume(containerID string) error { - return nil -} - -func (clnt *client) Stats(containerID string) (*Stats, error) { - return nil, nil -} - -func (clnt *client) getExitNotifier(containerID string) *exitNotifier { - clnt.mapMutex.RLock() - defer clnt.mapMutex.RUnlock() - return clnt.exitNotifiers[containerID] -} - -func (clnt *client) getOrCreateExitNotifier(containerID string) *exitNotifier { - clnt.mapMutex.Lock() - defer clnt.mapMutex.Unlock() - w, ok := clnt.exitNotifiers[containerID] - if !ok { - w = &exitNotifier{c: make(chan struct{}), client: clnt} - clnt.exitNotifiers[containerID] = w - } - return w -} - -// Restore is the handler for restoring a container -func (clnt *client) Restore(containerID string, attachStdio StdioCallback, options ...CreateOption) error { - return nil -} - -func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) { - return nil, nil -} - -// Summary returns a summary of the processes running in a container. -func (clnt *client) Summary(containerID string) ([]Summary, error) { - return nil, nil -} - -// UpdateResources updates resources for a running container. -func (clnt *client) UpdateResources(containerID string, resources Resources) error { - // Updating resource isn't supported on Solaris - // but we should return nil for enabling updating container - return nil -} - -func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error { - return nil -} - -func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error { - return nil -} - -func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) { - return nil, nil -} diff --git a/libcontainerd/client_unix.go b/libcontainerd/client_unix.go deleted file mode 100644 index 202a5b09b1..0000000000 --- a/libcontainerd/client_unix.go +++ /dev/null @@ -1,141 +0,0 @@ -// +build linux solaris - -package libcontainerd - -import ( - "encoding/json" - "fmt" - "os" - "path/filepath" - "strings" - "sync" - - containerd "github.com/containerd/containerd/api/grpc/types" - "github.com/docker/docker/pkg/idtools" - specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/sirupsen/logrus" - "golang.org/x/net/context" -) - -func (clnt *client) prepareBundleDir(uid, gid int) (string, error) { - root, err := filepath.Abs(clnt.remote.stateDir) - if err != nil { - return "", err - } - if uid == 0 && gid == 0 { - return root, nil - } - p := string(filepath.Separator) - for _, d := range strings.Split(root, string(filepath.Separator))[1:] { - p = filepath.Join(p, d) - fi, err := os.Stat(p) - if err != nil && !os.IsNotExist(err) { - return "", err - } - if os.IsNotExist(err) || fi.Mode()&1 == 0 { - p = fmt.Sprintf("%s.%d.%d", p, uid, gid) - if err := idtools.MkdirAndChown(p, 0700, idtools.IDPair{uid, gid}); err != nil && !os.IsExist(err) { - return "", err - } - } - } - return p, nil -} - -func (clnt *client) Create(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) (err error) { - clnt.lock(containerID) - defer clnt.unlock(containerID) - - if _, err := clnt.getContainer(containerID); err == nil { - return fmt.Errorf("Container %s is already active", containerID) - } - - uid, gid, err := getRootIDs(spec) - if err != nil { - return err - } - dir, err := clnt.prepareBundleDir(uid, gid) - if err != nil { - return err - } - - container := clnt.newContainer(filepath.Join(dir, containerID), options...) - if err := container.clean(); err != nil { - return err - } - - defer func() { - if err != nil { - container.clean() - clnt.deleteContainer(containerID) - } - }() - - if err := idtools.MkdirAllAndChown(container.dir, 0700, idtools.IDPair{uid, gid}); err != nil && !os.IsExist(err) { - return err - } - - f, err := os.Create(filepath.Join(container.dir, configFilename)) - if err != nil { - return err - } - defer f.Close() - if err := json.NewEncoder(f).Encode(spec); err != nil { - return err - } - return container.start(&spec, checkpoint, checkpointDir, attachStdio) -} - -func (clnt *client) Signal(containerID string, sig int) error { - clnt.lock(containerID) - defer clnt.unlock(containerID) - _, err := clnt.remote.apiClient.Signal(context.Background(), &containerd.SignalRequest{ - Id: containerID, - Pid: InitFriendlyName, - Signal: uint32(sig), - }) - return err -} - -func (clnt *client) newContainer(dir string, options ...CreateOption) *container { - container := &container{ - containerCommon: containerCommon{ - process: process{ - dir: dir, - processCommon: processCommon{ - containerID: filepath.Base(dir), - client: clnt, - friendlyName: InitFriendlyName, - }, - }, - processes: make(map[string]*process), - }, - } - for _, option := range options { - if err := option.Apply(container); err != nil { - logrus.Errorf("libcontainerd: newContainer(): %v", err) - } - } - return container -} - -type exitNotifier struct { - id string - client *client - c chan struct{} - once sync.Once -} - -func (en *exitNotifier) close() { - en.once.Do(func() { - close(en.c) - en.client.mapMutex.Lock() - if en == en.client.exitNotifiers[en.id] { - delete(en.client.exitNotifiers, en.id) - } - en.client.mapMutex.Unlock() - }) -} -func (en *exitNotifier) wait() <-chan struct{} { - return en.c -} diff --git a/libcontainerd/client_windows.go b/libcontainerd/client_windows.go deleted file mode 100644 index df9e40ea3c..0000000000 --- a/libcontainerd/client_windows.go +++ /dev/null @@ -1,886 +0,0 @@ -package libcontainerd - -import ( - "encoding/json" - "errors" - "fmt" - "io" - "io/ioutil" - "os" - "path" - "path/filepath" - "regexp" - "strings" - "syscall" - "time" - - "golang.org/x/net/context" - - "github.com/Microsoft/hcsshim" - opengcs "github.com/Microsoft/opengcs/client" - "github.com/docker/docker/pkg/sysinfo" - "github.com/docker/docker/pkg/system" - specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/sirupsen/logrus" -) - -type client struct { - clientCommon - - // Platform specific properties below here (none presently on Windows) -} - -// Win32 error codes that are used for various workarounds -// These really should be ALL_CAPS to match golangs syscall library and standard -// Win32 error conventions, but golint insists on CamelCase. -const ( - CoEClassstring = syscall.Errno(0x800401F3) // Invalid class string - ErrorNoNetwork = syscall.Errno(1222) // The network is not present or not started - ErrorBadPathname = syscall.Errno(161) // The specified path is invalid - ErrorInvalidObject = syscall.Errno(0x800710D8) // The object identifier does not represent a valid object -) - -// defaultOwner is a tag passed to HCS to allow it to differentiate between -// container creator management stacks. We hard code "docker" in the case -// of docker. -const defaultOwner = "docker" - -// Create is the entrypoint to create a container from a spec, and if successfully -// created, start it too. Table below shows the fields required for HCS JSON calling parameters, -// where if not populated, is omitted. -// +-----------------+--------------------------------------------+---------------------------------------------------+ -// | | Isolation=Process | Isolation=Hyper-V | -// +-----------------+--------------------------------------------+---------------------------------------------------+ -// | VolumePath | \\?\\Volume{GUIDa} | | -// | LayerFolderPath | %root%\windowsfilter\containerID | %root%\windowsfilter\containerID (servicing only) | -// | Layers[] | ID=GUIDb;Path=%root%\windowsfilter\layerID | ID=GUIDb;Path=%root%\windowsfilter\layerID | -// | HvRuntime | | ImagePath=%root%\BaseLayerID\UtilityVM | -// +-----------------+--------------------------------------------+---------------------------------------------------+ -// -// Isolation=Process example: -// -// { -// "SystemType": "Container", -// "Name": "5e0055c814a6005b8e57ac59f9a522066e0af12b48b3c26a9416e23907698776", -// "Owner": "docker", -// "VolumePath": "\\\\\\\\?\\\\Volume{66d1ef4c-7a00-11e6-8948-00155ddbef9d}", -// "IgnoreFlushesDuringBoot": true, -// "LayerFolderPath": "C:\\\\control\\\\windowsfilter\\\\5e0055c814a6005b8e57ac59f9a522066e0af12b48b3c26a9416e23907698776", -// "Layers": [{ -// "ID": "18955d65-d45a-557b-bf1c-49d6dfefc526", -// "Path": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c" -// }], -// "HostName": "5e0055c814a6", -// "MappedDirectories": [], -// "HvPartition": false, -// "EndpointList": ["eef2649d-bb17-4d53-9937-295a8efe6f2c"], -// "Servicing": false -//} -// -// Isolation=Hyper-V example: -// -//{ -// "SystemType": "Container", -// "Name": "475c2c58933b72687a88a441e7e0ca4bd72d76413c5f9d5031fee83b98f6045d", -// "Owner": "docker", -// "IgnoreFlushesDuringBoot": true, -// "Layers": [{ -// "ID": "18955d65-d45a-557b-bf1c-49d6dfefc526", -// "Path": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c" -// }], -// "HostName": "475c2c58933b", -// "MappedDirectories": [], -// "HvPartition": true, -// "EndpointList": ["e1bb1e61-d56f-405e-b75d-fd520cefa0cb"], -// "DNSSearchList": "a.com,b.com,c.com", -// "HvRuntime": { -// "ImagePath": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c\\\\UtilityVM" -// }, -// "Servicing": false -//} -func (clnt *client) Create(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) error { - clnt.lock(containerID) - defer clnt.unlock(containerID) - if b, err := json.Marshal(spec); err == nil { - logrus.Debugln("libcontainerd: client.Create() with spec", string(b)) - } - - // spec.Linux must be nil for Windows containers, but spec.Windows will be filled in regardless of container platform. - // This is a temporary workaround due to LCOW requiring layer folder paths, which are stored under spec.Windows. - // TODO: @darrenstahlmsft fix this once the OCI spec is updated to support layer folder paths for LCOW - if spec.Linux == nil { - return clnt.createWindows(containerID, checkpoint, checkpointDir, spec, attachStdio, options...) - } - return clnt.createLinux(containerID, checkpoint, checkpointDir, spec, attachStdio, options...) -} - -func (clnt *client) createWindows(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) error { - configuration := &hcsshim.ContainerConfig{ - SystemType: "Container", - Name: containerID, - Owner: defaultOwner, - IgnoreFlushesDuringBoot: spec.Windows.IgnoreFlushesDuringBoot, - HostName: spec.Hostname, - HvPartition: false, - Servicing: spec.Windows.Servicing, - } - - if spec.Windows.Resources != nil { - if spec.Windows.Resources.CPU != nil { - if spec.Windows.Resources.CPU.Count != nil { - // This check is being done here rather than in adaptContainerSettings - // because we don't want to update the HostConfig in case this container - // is moved to a host with more CPUs than this one. - cpuCount := *spec.Windows.Resources.CPU.Count - hostCPUCount := uint64(sysinfo.NumCPU()) - if cpuCount > hostCPUCount { - logrus.Warnf("Changing requested CPUCount of %d to current number of processors, %d", cpuCount, hostCPUCount) - cpuCount = hostCPUCount - } - configuration.ProcessorCount = uint32(cpuCount) - } - if spec.Windows.Resources.CPU.Shares != nil { - configuration.ProcessorWeight = uint64(*spec.Windows.Resources.CPU.Shares) - } - if spec.Windows.Resources.CPU.Maximum != nil { - configuration.ProcessorMaximum = int64(*spec.Windows.Resources.CPU.Maximum) - } - } - if spec.Windows.Resources.Memory != nil { - if spec.Windows.Resources.Memory.Limit != nil { - configuration.MemoryMaximumInMB = int64(*spec.Windows.Resources.Memory.Limit) / 1024 / 1024 - } - } - if spec.Windows.Resources.Storage != nil { - if spec.Windows.Resources.Storage.Bps != nil { - configuration.StorageBandwidthMaximum = *spec.Windows.Resources.Storage.Bps - } - if spec.Windows.Resources.Storage.Iops != nil { - configuration.StorageIOPSMaximum = *spec.Windows.Resources.Storage.Iops - } - } - } - - if spec.Windows.HyperV != nil { - configuration.HvPartition = true - } - - if spec.Windows.Network != nil { - configuration.EndpointList = spec.Windows.Network.EndpointList - configuration.AllowUnqualifiedDNSQuery = spec.Windows.Network.AllowUnqualifiedDNSQuery - if spec.Windows.Network.DNSSearchList != nil { - configuration.DNSSearchList = strings.Join(spec.Windows.Network.DNSSearchList, ",") - } - configuration.NetworkSharedContainerName = spec.Windows.Network.NetworkSharedContainerName - } - - if cs, ok := spec.Windows.CredentialSpec.(string); ok { - configuration.Credentials = cs - } - - // We must have least two layers in the spec, the bottom one being a base image, - // the top one being the RW layer. - if spec.Windows.LayerFolders == nil || len(spec.Windows.LayerFolders) < 2 { - return fmt.Errorf("OCI spec is invalid - at least two LayerFolders must be supplied to the runtime") - } - - // Strip off the top-most layer as that's passed in separately to HCS - configuration.LayerFolderPath = spec.Windows.LayerFolders[len(spec.Windows.LayerFolders)-1] - layerFolders := spec.Windows.LayerFolders[:len(spec.Windows.LayerFolders)-1] - - if configuration.HvPartition { - // We don't currently support setting the utility VM image explicitly. - // TODO @swernli/jhowardmsft circa RS3/4, this may be re-locatable. - if spec.Windows.HyperV.UtilityVMPath != "" { - return errors.New("runtime does not support an explicit utility VM path for Hyper-V containers") - } - - // Find the upper-most utility VM image. - var uvmImagePath string - for _, path := range layerFolders { - fullPath := filepath.Join(path, "UtilityVM") - _, err := os.Stat(fullPath) - if err == nil { - uvmImagePath = fullPath - break - } - if !os.IsNotExist(err) { - return err - } - } - if uvmImagePath == "" { - return errors.New("utility VM image could not be found") - } - configuration.HvRuntime = &hcsshim.HvRuntime{ImagePath: uvmImagePath} - - if spec.Root.Path != "" { - return errors.New("OCI spec is invalid - Root.Path must be omitted for a Hyper-V container") - } - } else { - const volumeGUIDRegex = `^\\\\\?\\(Volume)\{{0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}(\}){0,1}\}\\$` - if _, err := regexp.MatchString(volumeGUIDRegex, spec.Root.Path); err != nil { - return fmt.Errorf(`OCI spec is invalid - Root.Path '%s' must be a volume GUID path in the format '\\?\Volume{GUID}\'`, spec.Root.Path) - } - // HCS API requires the trailing backslash to be removed - configuration.VolumePath = spec.Root.Path[:len(spec.Root.Path)-1] - } - - if spec.Root.Readonly { - return errors.New(`OCI spec is invalid - Root.Readonly must not be set on Windows`) - } - - for _, layerPath := range layerFolders { - _, filename := filepath.Split(layerPath) - g, err := hcsshim.NameToGuid(filename) - if err != nil { - return err - } - configuration.Layers = append(configuration.Layers, hcsshim.Layer{ - ID: g.ToString(), - Path: layerPath, - }) - } - - // Add the mounts (volumes, bind mounts etc) to the structure - var mds []hcsshim.MappedDir - var mps []hcsshim.MappedPipe - for _, mount := range spec.Mounts { - const pipePrefix = `\\.\pipe\` - if mount.Type != "" { - return fmt.Errorf("OCI spec is invalid - Mount.Type '%s' must not be set", mount.Type) - } - if strings.HasPrefix(mount.Destination, pipePrefix) { - mp := hcsshim.MappedPipe{ - HostPath: mount.Source, - ContainerPipeName: mount.Destination[len(pipePrefix):], - } - mps = append(mps, mp) - } else { - md := hcsshim.MappedDir{ - HostPath: mount.Source, - ContainerPath: mount.Destination, - ReadOnly: false, - } - for _, o := range mount.Options { - if strings.ToLower(o) == "ro" { - md.ReadOnly = true - } - } - mds = append(mds, md) - } - } - configuration.MappedDirectories = mds - if len(mps) > 0 && system.GetOSVersion().Build < 16210 { // replace with Win10 RS3 build number at RTM - return errors.New("named pipe mounts are not supported on this version of Windows") - } - configuration.MappedPipes = mps - - hcsContainer, err := hcsshim.CreateContainer(containerID, configuration) - if err != nil { - return err - } - - // Construct a container object for calling start on it. - container := &container{ - containerCommon: containerCommon{ - process: process{ - processCommon: processCommon{ - containerID: containerID, - client: clnt, - friendlyName: InitFriendlyName, - }, - }, - processes: make(map[string]*process), - }, - isWindows: true, - ociSpec: spec, - hcsContainer: hcsContainer, - } - - container.options = options - for _, option := range options { - if err := option.Apply(container); err != nil { - logrus.Errorf("libcontainerd: %v", err) - } - } - - // Call start, and if it fails, delete the container from our - // internal structure, start will keep HCS in sync by deleting the - // container there. - logrus.Debugf("libcontainerd: createWindows() id=%s, Calling start()", containerID) - if err := container.start(attachStdio); err != nil { - clnt.deleteContainer(containerID) - return err - } - - logrus.Debugf("libcontainerd: createWindows() id=%s completed successfully", containerID) - return nil - -} - -func (clnt *client) createLinux(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) error { - logrus.Debugf("libcontainerd: createLinux(): containerId %s ", containerID) - - var lcowOpt *LCOWOption - for _, option := range options { - if lcow, ok := option.(*LCOWOption); ok { - lcowOpt = lcow - } - } - if lcowOpt == nil || lcowOpt.Config == nil { - return fmt.Errorf("lcow option must be supplied to the runtime") - } - - configuration := &hcsshim.ContainerConfig{ - HvPartition: true, - Name: containerID, - SystemType: "container", - ContainerType: "linux", - Owner: defaultOwner, - TerminateOnLastHandleClosed: true, - } - - if lcowOpt.Config.ActualMode == opengcs.ModeActualVhdx { - configuration.HvRuntime = &hcsshim.HvRuntime{ - ImagePath: lcowOpt.Config.Vhdx, - BootSource: "Vhd", - WritableBootSource: false, - } - } else { - configuration.HvRuntime = &hcsshim.HvRuntime{ - ImagePath: lcowOpt.Config.KirdPath, - LinuxKernelFile: lcowOpt.Config.KernelFile, - LinuxInitrdFile: lcowOpt.Config.InitrdFile, - LinuxBootParameters: lcowOpt.Config.BootParameters, - } - } - - if spec.Windows == nil { - return fmt.Errorf("spec.Windows must not be nil for LCOW containers") - } - - // We must have least one layer in the spec - if spec.Windows.LayerFolders == nil || len(spec.Windows.LayerFolders) == 0 { - return fmt.Errorf("OCI spec is invalid - at least one LayerFolders must be supplied to the runtime") - } - - // Strip off the top-most layer as that's passed in separately to HCS - configuration.LayerFolderPath = spec.Windows.LayerFolders[len(spec.Windows.LayerFolders)-1] - layerFolders := spec.Windows.LayerFolders[:len(spec.Windows.LayerFolders)-1] - - for _, layerPath := range layerFolders { - _, filename := filepath.Split(layerPath) - g, err := hcsshim.NameToGuid(filename) - if err != nil { - return err - } - configuration.Layers = append(configuration.Layers, hcsshim.Layer{ - ID: g.ToString(), - Path: filepath.Join(layerPath, "layer.vhd"), - }) - } - - if spec.Windows.Network != nil { - configuration.EndpointList = spec.Windows.Network.EndpointList - configuration.AllowUnqualifiedDNSQuery = spec.Windows.Network.AllowUnqualifiedDNSQuery - if spec.Windows.Network.DNSSearchList != nil { - configuration.DNSSearchList = strings.Join(spec.Windows.Network.DNSSearchList, ",") - } - configuration.NetworkSharedContainerName = spec.Windows.Network.NetworkSharedContainerName - } - - // Add the mounts (volumes, bind mounts etc) to the structure. We have to do - // some translation for both the mapped directories passed into HCS and in - // the spec. - // - // For HCS, we only pass in the mounts from the spec which are type "bind". - // Further, the "ContainerPath" field (which is a little mis-leadingly - // named when it applies to the utility VM rather than the container in the - // utility VM) is moved to under /tmp/gcs//binds, where this is passed - // by the caller through a 'uvmpath' option. - // - // We do similar translation for the mounts in the spec by stripping out - // the uvmpath option, and translating the Source path to the location in the - // utility VM calculated above. - // - // From inside the utility VM, you would see a 9p mount such as in the following - // where a host folder has been mapped to /target. The line with /tmp/gcs//binds - // specifically: - // - // / # mount - // rootfs on / type rootfs (rw,size=463736k,nr_inodes=115934) - // proc on /proc type proc (rw,relatime) - // sysfs on /sys type sysfs (rw,relatime) - // udev on /dev type devtmpfs (rw,relatime,size=498100k,nr_inodes=124525,mode=755) - // tmpfs on /run type tmpfs (rw,relatime) - // cgroup on /sys/fs/cgroup type cgroup (rw,relatime,cpuset,cpu,cpuacct,blkio,memory,devices,freezer,net_cls,perf_event,net_prio,hugetlb,pids,rdma) - // mqueue on /dev/mqueue type mqueue (rw,relatime) - // devpts on /dev/pts type devpts (rw,relatime,mode=600,ptmxmode=000) - // /binds/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/target on /binds/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/target type 9p (rw,sync,dirsync,relatime,trans=fd,rfdno=6,wfdno=6) - // /dev/pmem0 on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/layer0 type ext4 (ro,relatime,block_validity,delalloc,norecovery,barrier,dax,user_xattr,acl) - // /dev/sda on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch type ext4 (rw,relatime,block_validity,delalloc,barrier,user_xattr,acl) - // overlay on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/rootfs type overlay (rw,relatime,lowerdir=/tmp/base/:/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/layer0,upperdir=/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch/upper,workdir=/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch/work) - // - // /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc # ls -l - // total 16 - // drwx------ 3 0 0 60 Sep 7 18:54 binds - // -rw-r--r-- 1 0 0 3345 Sep 7 18:54 config.json - // drwxr-xr-x 10 0 0 4096 Sep 6 17:26 layer0 - // drwxr-xr-x 1 0 0 4096 Sep 7 18:54 rootfs - // drwxr-xr-x 5 0 0 4096 Sep 7 18:54 scratch - // - // /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc # ls -l binds - // total 0 - // drwxrwxrwt 2 0 0 4096 Sep 7 16:51 target - - mds := []hcsshim.MappedDir{} - specMounts := []specs.Mount{} - for _, mount := range spec.Mounts { - specMount := mount - if mount.Type == "bind" { - // Strip out the uvmpath from the options - updatedOptions := []string{} - uvmPath := "" - readonly := false - for _, opt := range mount.Options { - dropOption := false - elements := strings.SplitN(opt, "=", 2) - switch elements[0] { - case "uvmpath": - uvmPath = elements[1] - dropOption = true - case "rw": - case "ro": - readonly = true - case "rbind": - default: - return fmt.Errorf("unsupported option %q", opt) - } - if !dropOption { - updatedOptions = append(updatedOptions, opt) - } - } - mount.Options = updatedOptions - if uvmPath == "" { - return fmt.Errorf("no uvmpath for bind mount %+v", mount) - } - md := hcsshim.MappedDir{ - HostPath: mount.Source, - ContainerPath: path.Join(uvmPath, mount.Destination), - CreateInUtilityVM: true, - ReadOnly: readonly, - } - mds = append(mds, md) - specMount.Source = path.Join(uvmPath, mount.Destination) - } - specMounts = append(specMounts, specMount) - } - configuration.MappedDirectories = mds - - hcsContainer, err := hcsshim.CreateContainer(containerID, configuration) - if err != nil { - return err - } - - spec.Mounts = specMounts - - // Construct a container object for calling start on it. - container := &container{ - containerCommon: containerCommon{ - process: process{ - processCommon: processCommon{ - containerID: containerID, - client: clnt, - friendlyName: InitFriendlyName, - }, - }, - processes: make(map[string]*process), - }, - ociSpec: spec, - hcsContainer: hcsContainer, - } - - container.options = options - for _, option := range options { - if err := option.Apply(container); err != nil { - logrus.Errorf("libcontainerd: createLinux() %v", err) - } - } - - // Call start, and if it fails, delete the container from our - // internal structure, start will keep HCS in sync by deleting the - // container there. - logrus.Debugf("libcontainerd: createLinux() id=%s, Calling start()", containerID) - if err := container.start(attachStdio); err != nil { - clnt.deleteContainer(containerID) - return err - } - - logrus.Debugf("libcontainerd: createLinux() id=%s completed successfully", containerID) - return nil -} - -// AddProcess is the handler for adding a process to an already running -// container. It's called through docker exec. It returns the system pid of the -// exec'd process. -func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendlyName string, procToAdd Process, attachStdio StdioCallback) (int, error) { - clnt.lock(containerID) - defer clnt.unlock(containerID) - container, err := clnt.getContainer(containerID) - if err != nil { - return -1, err - } - - defer container.debugGCS() - - // Note we always tell HCS to - // create stdout as it's required regardless of '-i' or '-t' options, so that - // docker can always grab the output through logs. We also tell HCS to always - // create stdin, even if it's not used - it will be closed shortly. Stderr - // is only created if it we're not -t. - createProcessParms := hcsshim.ProcessConfig{ - CreateStdInPipe: true, - CreateStdOutPipe: true, - CreateStdErrPipe: !procToAdd.Terminal, - } - if procToAdd.Terminal { - createProcessParms.EmulateConsole = true - if procToAdd.ConsoleSize != nil { - createProcessParms.ConsoleSize[0] = uint(procToAdd.ConsoleSize.Height) - createProcessParms.ConsoleSize[1] = uint(procToAdd.ConsoleSize.Width) - } - } - - // Take working directory from the process to add if it is defined, - // otherwise take from the first process. - if procToAdd.Cwd != "" { - createProcessParms.WorkingDirectory = procToAdd.Cwd - } else { - createProcessParms.WorkingDirectory = container.ociSpec.Process.Cwd - } - - // Configure the environment for the process - createProcessParms.Environment = setupEnvironmentVariables(procToAdd.Env) - if container.isWindows { - createProcessParms.CommandLine = strings.Join(procToAdd.Args, " ") - } else { - createProcessParms.CommandArgs = procToAdd.Args - } - createProcessParms.User = procToAdd.User.Username - - logrus.Debugf("libcontainerd: commandLine: %s", createProcessParms.CommandLine) - - // Start the command running in the container. - var stdout, stderr io.ReadCloser - var stdin io.WriteCloser - newProcess, err := container.hcsContainer.CreateProcess(&createProcessParms) - if err != nil { - logrus.Errorf("libcontainerd: AddProcess(%s) CreateProcess() failed %s", containerID, err) - return -1, err - } - - pid := newProcess.Pid() - - stdin, stdout, stderr, err = newProcess.Stdio() - if err != nil { - logrus.Errorf("libcontainerd: %s getting std pipes failed %s", containerID, err) - return -1, err - } - - iopipe := &IOPipe{Terminal: procToAdd.Terminal} - iopipe.Stdin = createStdInCloser(stdin, newProcess) - - // Convert io.ReadClosers to io.Readers - if stdout != nil { - iopipe.Stdout = ioutil.NopCloser(&autoClosingReader{ReadCloser: stdout}) - } - if stderr != nil { - iopipe.Stderr = ioutil.NopCloser(&autoClosingReader{ReadCloser: stderr}) - } - - proc := &process{ - processCommon: processCommon{ - containerID: containerID, - friendlyName: processFriendlyName, - client: clnt, - systemPid: uint32(pid), - }, - hcsProcess: newProcess, - } - - // Add the process to the container's list of processes - container.processes[processFriendlyName] = proc - - // Tell the engine to attach streams back to the client - if err := attachStdio(*iopipe); err != nil { - return -1, err - } - - // Spin up a go routine waiting for exit to handle cleanup - go container.waitExit(proc, false) - - return pid, nil -} - -// Signal handles `docker stop` on Windows. While Linux has support for -// the full range of signals, signals aren't really implemented on Windows. -// We fake supporting regular stop and -9 to force kill. -func (clnt *client) Signal(containerID string, sig int) error { - var ( - cont *container - err error - ) - - // Get the container as we need it to get the container handle. - clnt.lock(containerID) - defer clnt.unlock(containerID) - if cont, err = clnt.getContainer(containerID); err != nil { - return err - } - - cont.manualStopRequested = true - - logrus.Debugf("libcontainerd: Signal() containerID=%s sig=%d pid=%d", containerID, sig, cont.systemPid) - - if syscall.Signal(sig) == syscall.SIGKILL { - // Terminate the compute system - if err := cont.hcsContainer.Terminate(); err != nil { - if !hcsshim.IsPending(err) { - logrus.Errorf("libcontainerd: failed to terminate %s - %q", containerID, err) - } - } - } else { - // Shut down the container - if err := cont.hcsContainer.Shutdown(); err != nil { - if !hcsshim.IsPending(err) && !hcsshim.IsAlreadyStopped(err) { - // ignore errors - logrus.Warnf("libcontainerd: failed to shutdown container %s: %q", containerID, err) - } - } - } - - return nil -} - -// While Linux has support for the full range of signals, signals aren't really implemented on Windows. -// We try to terminate the specified process whatever signal is requested. -func (clnt *client) SignalProcess(containerID string, processFriendlyName string, sig int) error { - clnt.lock(containerID) - defer clnt.unlock(containerID) - cont, err := clnt.getContainer(containerID) - if err != nil { - return err - } - - for _, p := range cont.processes { - if p.friendlyName == processFriendlyName { - return p.hcsProcess.Kill() - } - } - - return fmt.Errorf("SignalProcess could not find process %s in %s", processFriendlyName, containerID) -} - -// Resize handles a CLI event to resize an interactive docker run or docker exec -// window. -func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error { - // Get the libcontainerd container object - clnt.lock(containerID) - defer clnt.unlock(containerID) - cont, err := clnt.getContainer(containerID) - if err != nil { - return err - } - - h, w := uint16(height), uint16(width) - - if processFriendlyName == InitFriendlyName { - logrus.Debugln("libcontainerd: resizing systemPID in", containerID, cont.process.systemPid) - return cont.process.hcsProcess.ResizeConsole(w, h) - } - - for _, p := range cont.processes { - if p.friendlyName == processFriendlyName { - logrus.Debugln("libcontainerd: resizing exec'd process", containerID, p.systemPid) - return p.hcsProcess.ResizeConsole(w, h) - } - } - - return fmt.Errorf("Resize could not find containerID %s to resize", containerID) - -} - -// Pause handles pause requests for containers -func (clnt *client) Pause(containerID string) error { - unlockContainer := true - // Get the libcontainerd container object - clnt.lock(containerID) - defer func() { - if unlockContainer { - clnt.unlock(containerID) - } - }() - container, err := clnt.getContainer(containerID) - if err != nil { - return err - } - - if container.ociSpec.Windows.HyperV == nil { - return errors.New("cannot pause Windows Server Containers") - } - - err = container.hcsContainer.Pause() - if err != nil { - return err - } - - // Unlock container before calling back into the daemon - unlockContainer = false - clnt.unlock(containerID) - - return clnt.backend.StateChanged(containerID, StateInfo{ - CommonStateInfo: CommonStateInfo{ - State: StatePause, - }}) -} - -// Resume handles resume requests for containers -func (clnt *client) Resume(containerID string) error { - unlockContainer := true - // Get the libcontainerd container object - clnt.lock(containerID) - defer func() { - if unlockContainer { - clnt.unlock(containerID) - } - }() - container, err := clnt.getContainer(containerID) - if err != nil { - return err - } - - // This should never happen, since Windows Server Containers cannot be paused - - if container.ociSpec.Windows.HyperV == nil { - return errors.New("cannot resume Windows Server Containers") - } - - err = container.hcsContainer.Resume() - if err != nil { - return err - } - - // Unlock container before calling back into the daemon - unlockContainer = false - clnt.unlock(containerID) - - return clnt.backend.StateChanged(containerID, StateInfo{ - CommonStateInfo: CommonStateInfo{ - State: StateResume, - }}) -} - -// Stats handles stats requests for containers -func (clnt *client) Stats(containerID string) (*Stats, error) { - // Get the libcontainerd container object - clnt.lock(containerID) - defer clnt.unlock(containerID) - container, err := clnt.getContainer(containerID) - if err != nil { - return nil, err - } - s, err := container.hcsContainer.Statistics() - if err != nil { - return nil, err - } - st := Stats(s) - return &st, nil -} - -// Restore is the handler for restoring a container -func (clnt *client) Restore(containerID string, _ StdioCallback, unusedOnWindows ...CreateOption) error { - logrus.Debugf("libcontainerd: Restore(%s)", containerID) - - // TODO Windows: On RS1, a re-attach isn't possible. - // However, there is a scenario in which there is an issue. - // Consider a background container. The daemon dies unexpectedly. - // HCS will still have the compute service alive and running. - // For consistence, we call in to shoot it regardless if HCS knows about it - // We explicitly just log a warning if the terminate fails. - // Then we tell the backend the container exited. - if hc, err := hcsshim.OpenContainer(containerID); err == nil { - const terminateTimeout = time.Minute * 2 - err := hc.Terminate() - - if hcsshim.IsPending(err) { - err = hc.WaitTimeout(terminateTimeout) - } else if hcsshim.IsAlreadyStopped(err) { - err = nil - } - - if err != nil { - logrus.Warnf("libcontainerd: failed to terminate %s on restore - %q", containerID, err) - return err - } - } - return clnt.backend.StateChanged(containerID, StateInfo{ - CommonStateInfo: CommonStateInfo{ - State: StateExit, - ExitCode: 1 << 31, - }}) -} - -// GetPidsForContainer returns a list of process IDs running in a container. -// Not used on Windows. -func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) { - return nil, errors.New("not implemented on Windows") -} - -// Summary returns a summary of the processes running in a container. -// This is present in Windows to support docker top. In linux, the -// engine shells out to ps to get process information. On Windows, as -// the containers could be Hyper-V containers, they would not be -// visible on the container host. However, libcontainerd does have -// that information. -func (clnt *client) Summary(containerID string) ([]Summary, error) { - - // Get the libcontainerd container object - clnt.lock(containerID) - defer clnt.unlock(containerID) - container, err := clnt.getContainer(containerID) - if err != nil { - return nil, err - } - p, err := container.hcsContainer.ProcessList() - if err != nil { - return nil, err - } - pl := make([]Summary, len(p)) - for i := range p { - pl[i] = Summary(p[i]) - } - return pl, nil -} - -// UpdateResources updates resources for a running container. -func (clnt *client) UpdateResources(containerID string, resources Resources) error { - // Updating resource isn't supported on Windows - // but we should return nil for enabling updating container - return nil -} - -func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error { - return errors.New("Windows: Containers do not support checkpoints") -} - -func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error { - return errors.New("Windows: Containers do not support checkpoints") -} - -func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) { - return nil, errors.New("Windows: Containers do not support checkpoints") -} - -func (clnt *client) GetServerVersion(ctx context.Context) (*ServerVersion, error) { - return &ServerVersion{}, nil -} diff --git a/libcontainerd/container.go b/libcontainerd/container.go deleted file mode 100644 index b40321389a..0000000000 --- a/libcontainerd/container.go +++ /dev/null @@ -1,13 +0,0 @@ -package libcontainerd - -const ( - // InitFriendlyName is the name given in the lookup map of processes - // for the first process started in a container. - InitFriendlyName = "init" - configFilename = "config.json" -) - -type containerCommon struct { - process - processes map[string]*process -} diff --git a/libcontainerd/container_unix.go b/libcontainerd/container_unix.go deleted file mode 100644 index 9a7dbf01cd..0000000000 --- a/libcontainerd/container_unix.go +++ /dev/null @@ -1,246 +0,0 @@ -// +build linux solaris - -package libcontainerd - -import ( - "encoding/json" - "io" - "io/ioutil" - "os" - "path/filepath" - "sync" - "time" - - containerd "github.com/containerd/containerd/api/grpc/types" - "github.com/docker/docker/pkg/ioutils" - specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/sirupsen/logrus" - "github.com/tonistiigi/fifo" - "golang.org/x/net/context" - "golang.org/x/sys/unix" -) - -type container struct { - containerCommon - - // Platform specific fields are below here. - pauseMonitor - oom bool - runtime string - runtimeArgs []string -} - -type runtime struct { - path string - args []string -} - -// WithRuntime sets the runtime to be used for the created container -func WithRuntime(path string, args []string) CreateOption { - return runtime{path, args} -} - -func (rt runtime) Apply(p interface{}) error { - if pr, ok := p.(*container); ok { - pr.runtime = rt.path - pr.runtimeArgs = rt.args - } - return nil -} - -func (ctr *container) clean() error { - if os.Getenv("LIBCONTAINERD_NOCLEAN") == "1" { - return nil - } - if _, err := os.Lstat(ctr.dir); err != nil { - if os.IsNotExist(err) { - return nil - } - return err - } - - if err := os.RemoveAll(ctr.dir); err != nil { - return err - } - return nil -} - -// cleanProcess removes the fifos used by an additional process. -// Caller needs to lock container ID before calling this method. -func (ctr *container) cleanProcess(id string) { - if p, ok := ctr.processes[id]; ok { - for _, i := range []int{unix.Stdin, unix.Stdout, unix.Stderr} { - if err := os.Remove(p.fifo(i)); err != nil && !os.IsNotExist(err) { - logrus.Warnf("libcontainerd: failed to remove %v for process %v: %v", p.fifo(i), id, err) - } - } - } - delete(ctr.processes, id) -} - -func (ctr *container) spec() (*specs.Spec, error) { - var spec specs.Spec - dt, err := ioutil.ReadFile(filepath.Join(ctr.dir, configFilename)) - if err != nil { - return nil, err - } - if err := json.Unmarshal(dt, &spec); err != nil { - return nil, err - } - return &spec, nil -} - -func (ctr *container) start(spec *specs.Spec, checkpoint, checkpointDir string, attachStdio StdioCallback) (err error) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - ready := make(chan struct{}) - - fifoCtx, cancel := context.WithCancel(context.Background()) - defer func() { - if err != nil { - cancel() - } - }() - - iopipe, err := ctr.openFifos(fifoCtx, spec.Process.Terminal) - if err != nil { - return err - } - - var stdinOnce sync.Once - - // we need to delay stdin closure after container start or else "stdin close" - // event will be rejected by containerd. - // stdin closure happens in attachStdio - stdin := iopipe.Stdin - iopipe.Stdin = ioutils.NewWriteCloserWrapper(stdin, func() error { - var err error - stdinOnce.Do(func() { // on error from attach we don't know if stdin was already closed - err = stdin.Close() - go func() { - select { - case <-ready: - case <-ctx.Done(): - } - select { - case <-ready: - if err := ctr.sendCloseStdin(); err != nil { - logrus.Warnf("failed to close stdin: %+v", err) - } - default: - } - }() - }) - return err - }) - - r := &containerd.CreateContainerRequest{ - Id: ctr.containerID, - BundlePath: ctr.dir, - Stdin: ctr.fifo(unix.Stdin), - Stdout: ctr.fifo(unix.Stdout), - Stderr: ctr.fifo(unix.Stderr), - Checkpoint: checkpoint, - CheckpointDir: checkpointDir, - // check to see if we are running in ramdisk to disable pivot root - NoPivotRoot: os.Getenv("DOCKER_RAMDISK") != "", - Runtime: ctr.runtime, - RuntimeArgs: ctr.runtimeArgs, - } - ctr.client.appendContainer(ctr) - - if err := attachStdio(*iopipe); err != nil { - ctr.closeFifos(iopipe) - return err - } - - resp, err := ctr.client.remote.apiClient.CreateContainer(context.Background(), r) - if err != nil { - ctr.closeFifos(iopipe) - return err - } - ctr.systemPid = systemPid(resp.Container) - close(ready) - - return ctr.client.backend.StateChanged(ctr.containerID, StateInfo{ - CommonStateInfo: CommonStateInfo{ - State: StateStart, - Pid: ctr.systemPid, - }}) - -} - -func (ctr *container) newProcess(friendlyName string) *process { - return &process{ - dir: ctr.dir, - processCommon: processCommon{ - containerID: ctr.containerID, - friendlyName: friendlyName, - client: ctr.client, - }, - } -} - -func (ctr *container) handleEvent(e *containerd.Event) error { - ctr.client.lock(ctr.containerID) - defer ctr.client.unlock(ctr.containerID) - switch e.Type { - case StateExit, StatePause, StateResume, StateOOM: - st := StateInfo{ - CommonStateInfo: CommonStateInfo{ - State: e.Type, - ExitCode: e.Status, - }, - OOMKilled: e.Type == StateExit && ctr.oom, - } - if e.Type == StateOOM { - ctr.oom = true - } - if e.Type == StateExit && e.Pid != InitFriendlyName { - st.ProcessID = e.Pid - st.State = StateExitProcess - } - - // Remove process from list if we have exited - switch st.State { - case StateExit: - ctr.clean() - ctr.client.deleteContainer(e.Id) - case StateExitProcess: - ctr.cleanProcess(st.ProcessID) - } - ctr.client.q.append(e.Id, func() { - if err := ctr.client.backend.StateChanged(e.Id, st); err != nil { - logrus.Errorf("libcontainerd: backend.StateChanged(): %v", err) - } - if e.Type == StatePause || e.Type == StateResume { - ctr.pauseMonitor.handle(e.Type) - } - if e.Type == StateExit { - if en := ctr.client.getExitNotifier(e.Id); en != nil { - en.close() - } - } - }) - - default: - logrus.Debugf("libcontainerd: event unhandled: %+v", e) - } - return nil -} - -// discardFifos attempts to fully read the container fifos to unblock processes -// that may be blocked on the writer side. -func (ctr *container) discardFifos() { - ctx, _ := context.WithTimeout(context.Background(), 3*time.Second) - for _, i := range []int{unix.Stdout, unix.Stderr} { - f, err := fifo.OpenFifo(ctx, ctr.fifo(i), unix.O_RDONLY|unix.O_NONBLOCK, 0) - if err != nil { - logrus.Warnf("error opening fifo %v for discarding: %+v", f, err) - continue - } - go func() { - io.Copy(ioutil.Discard, f) - }() - } -} diff --git a/libcontainerd/container_windows.go b/libcontainerd/container_windows.go deleted file mode 100644 index 73fc6bd41b..0000000000 --- a/libcontainerd/container_windows.go +++ /dev/null @@ -1,338 +0,0 @@ -package libcontainerd - -import ( - "encoding/json" - "fmt" - "io" - "io/ioutil" - "strings" - "time" - - "github.com/Microsoft/hcsshim" - "github.com/opencontainers/runtime-spec/specs-go" - "github.com/sirupsen/logrus" - "golang.org/x/sys/windows" -) - -type container struct { - containerCommon - - // Platform specific fields are below here. There are none presently on Windows. - options []CreateOption - - // The ociSpec is required, as client.Create() needs a spec, - // but can be called from the RestartManager context which does not - // otherwise have access to the Spec - ociSpec specs.Spec - - isWindows bool - manualStopRequested bool - hcsContainer hcsshim.Container -} - -func (ctr *container) newProcess(friendlyName string) *process { - return &process{ - processCommon: processCommon{ - containerID: ctr.containerID, - friendlyName: friendlyName, - client: ctr.client, - }, - } -} - -// start starts a created container. -// Caller needs to lock container ID before calling this method. -func (ctr *container) start(attachStdio StdioCallback) error { - var err error - - // Start the container. If this is a servicing container, this call will block - // until the container is done with the servicing execution. - logrus.Debugln("libcontainerd: starting container ", ctr.containerID) - if err = ctr.hcsContainer.Start(); err != nil { - logrus.Errorf("libcontainerd: failed to start container: %s", err) - ctr.debugGCS() // Before terminating! - if err := ctr.terminate(); err != nil { - logrus.Errorf("libcontainerd: failed to cleanup after a failed Start. %s", err) - } else { - logrus.Debugln("libcontainerd: cleaned up after failed Start by calling Terminate") - } - return err - } - - defer ctr.debugGCS() - - // Note we always tell HCS to - // create stdout as it's required regardless of '-i' or '-t' options, so that - // docker can always grab the output through logs. We also tell HCS to always - // create stdin, even if it's not used - it will be closed shortly. Stderr - // is only created if it we're not -t. - var ( - emulateConsole bool - createStdErrPipe bool - ) - if ctr.ociSpec.Process != nil { - emulateConsole = ctr.ociSpec.Process.Terminal - createStdErrPipe = !ctr.ociSpec.Process.Terminal && !ctr.ociSpec.Windows.Servicing - } - - createProcessParms := &hcsshim.ProcessConfig{ - EmulateConsole: emulateConsole, - WorkingDirectory: ctr.ociSpec.Process.Cwd, - CreateStdInPipe: !ctr.ociSpec.Windows.Servicing, - CreateStdOutPipe: !ctr.ociSpec.Windows.Servicing, - CreateStdErrPipe: createStdErrPipe, - } - - if ctr.ociSpec.Process != nil && ctr.ociSpec.Process.ConsoleSize != nil { - createProcessParms.ConsoleSize[0] = uint(ctr.ociSpec.Process.ConsoleSize.Height) - createProcessParms.ConsoleSize[1] = uint(ctr.ociSpec.Process.ConsoleSize.Width) - } - - // Configure the environment for the process - createProcessParms.Environment = setupEnvironmentVariables(ctr.ociSpec.Process.Env) - if ctr.isWindows { - createProcessParms.CommandLine = strings.Join(ctr.ociSpec.Process.Args, " ") - } else { - createProcessParms.CommandArgs = ctr.ociSpec.Process.Args - } - createProcessParms.User = ctr.ociSpec.Process.User.Username - - // LCOW requires the raw OCI spec passed through HCS and onwards to GCS for the utility VM. - if !ctr.isWindows { - ociBuf, err := json.Marshal(ctr.ociSpec) - if err != nil { - return err - } - ociRaw := json.RawMessage(ociBuf) - createProcessParms.OCISpecification = &ociRaw - } - - // Start the command running in the container. - newProcess, err := ctr.hcsContainer.CreateProcess(createProcessParms) - if err != nil { - logrus.Errorf("libcontainerd: CreateProcess() failed %s", err) - if err := ctr.terminate(); err != nil { - logrus.Errorf("libcontainerd: failed to cleanup after a failed CreateProcess. %s", err) - } else { - logrus.Debugln("libcontainerd: cleaned up after failed CreateProcess by calling Terminate") - } - return err - } - - pid := newProcess.Pid() - - // Save the hcs Process and PID - ctr.process.friendlyName = InitFriendlyName - ctr.process.hcsProcess = newProcess - - // If this is a servicing container, wait on the process synchronously here and - // if it succeeds, wait for it cleanly shutdown and merge into the parent container. - if ctr.ociSpec.Windows.Servicing { - exitCode := ctr.waitProcessExitCode(&ctr.process) - - if exitCode != 0 { - if err := ctr.terminate(); err != nil { - logrus.Warnf("libcontainerd: terminating servicing container %s failed: %s", ctr.containerID, err) - } - return fmt.Errorf("libcontainerd: servicing container %s returned non-zero exit code %d", ctr.containerID, exitCode) - } - - return ctr.hcsContainer.WaitTimeout(time.Minute * 5) - } - - var stdout, stderr io.ReadCloser - var stdin io.WriteCloser - stdin, stdout, stderr, err = newProcess.Stdio() - if err != nil { - logrus.Errorf("libcontainerd: failed to get stdio pipes: %s", err) - if err := ctr.terminate(); err != nil { - logrus.Errorf("libcontainerd: failed to cleanup after a failed Stdio. %s", err) - } - return err - } - - iopipe := &IOPipe{Terminal: ctr.ociSpec.Process.Terminal} - - iopipe.Stdin = createStdInCloser(stdin, newProcess) - - // Convert io.ReadClosers to io.Readers - if stdout != nil { - iopipe.Stdout = ioutil.NopCloser(&autoClosingReader{ReadCloser: stdout}) - } - if stderr != nil { - iopipe.Stderr = ioutil.NopCloser(&autoClosingReader{ReadCloser: stderr}) - } - - // Save the PID - logrus.Debugf("libcontainerd: process started - PID %d", pid) - ctr.systemPid = uint32(pid) - - // Spin up a go routine waiting for exit to handle cleanup - go ctr.waitExit(&ctr.process, true) - - ctr.client.appendContainer(ctr) - - if err := attachStdio(*iopipe); err != nil { - // OK to return the error here, as waitExit will handle tear-down in HCS - return err - } - - // Tell the docker engine that the container has started. - si := StateInfo{ - CommonStateInfo: CommonStateInfo{ - State: StateStart, - Pid: ctr.systemPid, // Not sure this is needed? Double-check monitor.go in daemon BUGBUG @jhowardmsft - }} - logrus.Debugf("libcontainerd: start() completed OK, %+v", si) - return ctr.client.backend.StateChanged(ctr.containerID, si) - -} - -// waitProcessExitCode will wait for the given process to exit and return its error code. -func (ctr *container) waitProcessExitCode(process *process) int { - // Block indefinitely for the process to exit. - err := process.hcsProcess.Wait() - if err != nil { - if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE { - logrus.Warnf("libcontainerd: Wait() failed (container may have been killed): %s", err) - } - // Fall through here, do not return. This ensures we attempt to continue the - // shutdown in HCS and tell the docker engine that the process/container - // has exited to avoid a container being dropped on the floor. - } - - exitCode, err := process.hcsProcess.ExitCode() - if err != nil { - if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE { - logrus.Warnf("libcontainerd: unable to get exit code from container %s", ctr.containerID) - } - // Since we got an error retrieving the exit code, make sure that the code we return - // doesn't incorrectly indicate success. - exitCode = -1 - - // Fall through here, do not return. This ensures we attempt to continue the - // shutdown in HCS and tell the docker engine that the process/container - // has exited to avoid a container being dropped on the floor. - } - - return exitCode -} - -// waitExit runs as a goroutine waiting for the process to exit. It's -// equivalent to (in the linux containerd world) where events come in for -// state change notifications from containerd. -func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) error { - logrus.Debugln("libcontainerd: waitExit() on pid", process.systemPid) - - exitCode := ctr.waitProcessExitCode(process) - // Lock the container while removing the process/container from the list - ctr.client.lock(ctr.containerID) - - if !isFirstProcessToStart { - ctr.cleanProcess(process.friendlyName) - } else { - ctr.client.deleteContainer(ctr.containerID) - } - - // Unlock here so other threads are unblocked - ctr.client.unlock(ctr.containerID) - - // Assume the container has exited - si := StateInfo{ - CommonStateInfo: CommonStateInfo{ - State: StateExit, - ExitCode: uint32(exitCode), - Pid: process.systemPid, - ProcessID: process.friendlyName, - }, - UpdatePending: false, - } - - // But it could have been an exec'd process which exited - if !isFirstProcessToStart { - si.State = StateExitProcess - } else { - // Pending updates is only applicable for WCOW - if ctr.isWindows { - updatePending, err := ctr.hcsContainer.HasPendingUpdates() - if err != nil { - logrus.Warnf("libcontainerd: HasPendingUpdates() failed (container may have been killed): %s", err) - } else { - si.UpdatePending = updatePending - } - } - - logrus.Debugf("libcontainerd: shutting down container %s", ctr.containerID) - if err := ctr.shutdown(); err != nil { - logrus.Debugf("libcontainerd: failed to shutdown container %s", ctr.containerID) - } else { - logrus.Debugf("libcontainerd: completed shutting down container %s", ctr.containerID) - } - if err := ctr.hcsContainer.Close(); err != nil { - logrus.Error(err) - } - } - - if err := process.hcsProcess.Close(); err != nil { - logrus.Errorf("libcontainerd: hcsProcess.Close(): %v", err) - } - - // Call into the backend to notify it of the state change. - logrus.Debugf("libcontainerd: waitExit() calling backend.StateChanged %+v", si) - if err := ctr.client.backend.StateChanged(ctr.containerID, si); err != nil { - logrus.Error(err) - } - - logrus.Debugf("libcontainerd: waitExit() completed OK, %+v", si) - - return nil -} - -// cleanProcess removes process from the map. -// Caller needs to lock container ID before calling this method. -func (ctr *container) cleanProcess(id string) { - delete(ctr.processes, id) -} - -// shutdown shuts down the container in HCS -// Caller needs to lock container ID before calling this method. -func (ctr *container) shutdown() error { - const shutdownTimeout = time.Minute * 5 - err := ctr.hcsContainer.Shutdown() - if hcsshim.IsPending(err) { - // Explicit timeout to avoid a (remote) possibility that shutdown hangs indefinitely. - err = ctr.hcsContainer.WaitTimeout(shutdownTimeout) - } else if hcsshim.IsAlreadyStopped(err) { - err = nil - } - - if err != nil { - logrus.Debugf("libcontainerd: error shutting down container %s %v calling terminate", ctr.containerID, err) - if err := ctr.terminate(); err != nil { - return err - } - return err - } - - return nil -} - -// terminate terminates the container in HCS -// Caller needs to lock container ID before calling this method. -func (ctr *container) terminate() error { - const terminateTimeout = time.Minute * 5 - err := ctr.hcsContainer.Terminate() - - if hcsshim.IsPending(err) { - err = ctr.hcsContainer.WaitTimeout(terminateTimeout) - } else if hcsshim.IsAlreadyStopped(err) { - err = nil - } - - if err != nil { - logrus.Debugf("libcontainerd: error terminating container %s %v", ctr.containerID, err) - return err - } - - return nil -} diff --git a/libcontainerd/errors.go b/libcontainerd/errors.go new file mode 100644 index 0000000000..db59ea878c --- /dev/null +++ b/libcontainerd/errors.go @@ -0,0 +1,46 @@ +package libcontainerd + +import "errors" + +type liberr struct { + err error +} + +func (e liberr) Error() string { + return e.err.Error() +} + +func (e liberr) Cause() error { + return e.err +} + +type notFoundErr struct { + liberr +} + +func (notFoundErr) NotFound() {} + +func newNotFoundError(err string) error { return notFoundErr{liberr{errors.New(err)}} } +func wrapNotFoundError(err error) error { return notFoundErr{liberr{err}} } + +type invalidParamErr struct { + liberr +} + +func (invalidParamErr) InvalidParameter() {} + +func newInvalidParameterError(err string) error { return invalidParamErr{liberr{errors.New(err)}} } + +type conflictErr struct { + liberr +} + +func (conflictErr) ConflictErr() {} + +func newConflictError(err string) error { return conflictErr{liberr{errors.New(err)}} } + +type sysErr struct { + liberr +} + +func wrapSystemError(err error) error { return sysErr{liberr{err}} } diff --git a/libcontainerd/io.go b/libcontainerd/io.go new file mode 100644 index 0000000000..2c4af58ce9 --- /dev/null +++ b/libcontainerd/io.go @@ -0,0 +1,36 @@ +package libcontainerd + +import "github.com/containerd/containerd" + +// Config returns the containerd.IOConfig of this pipe set +func (p *IOPipe) Config() containerd.IOConfig { + return p.config +} + +// Cancel aborts ongoing operations if they have not completed yet +func (p *IOPipe) Cancel() { + p.cancel() +} + +// Wait waits for io operations to finish +func (p *IOPipe) Wait() { +} + +// Close closes the underlying pipes +func (p *IOPipe) Close() error { + p.cancel() + + if p.Stdin != nil { + p.Stdin.Close() + } + + if p.Stdout != nil { + p.Stdout.Close() + } + + if p.Stderr != nil { + p.Stderr.Close() + } + + return nil +} diff --git a/libcontainerd/io_unix.go b/libcontainerd/io_unix.go new file mode 100644 index 0000000000..0c08b20136 --- /dev/null +++ b/libcontainerd/io_unix.go @@ -0,0 +1,60 @@ +// +build !windows + +package libcontainerd + +import ( + "context" + "io" + "syscall" + + "github.com/containerd/containerd" + "github.com/containerd/fifo" + "github.com/pkg/errors" +) + +func newIOPipe(fifos *containerd.FIFOSet) (*IOPipe, error) { + var ( + err error + ctx, cancel = context.WithCancel(context.Background()) + f io.ReadWriteCloser + iop = &IOPipe{ + Terminal: fifos.Terminal, + cancel: cancel, + config: containerd.IOConfig{ + Terminal: fifos.Terminal, + Stdin: fifos.In, + Stdout: fifos.Out, + Stderr: fifos.Err, + }, + } + ) + defer func() { + if err != nil { + cancel() + iop.Close() + } + }() + + if fifos.In != "" { + if f, err = fifo.OpenFifo(ctx, fifos.In, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil { + return nil, errors.WithStack(err) + } + iop.Stdin = f + } + + if fifos.Out != "" { + if f, err = fifo.OpenFifo(ctx, fifos.Out, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil { + return nil, errors.WithStack(err) + } + iop.Stdout = f + } + + if fifos.Err != "" { + if f, err = fifo.OpenFifo(ctx, fifos.Err, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil { + return nil, errors.WithStack(err) + } + iop.Stderr = f + } + + return iop, nil +} diff --git a/libcontainerd/io_windows.go b/libcontainerd/io_windows.go new file mode 100644 index 0000000000..312bdbd8cf --- /dev/null +++ b/libcontainerd/io_windows.go @@ -0,0 +1,138 @@ +package libcontainerd + +import ( + "context" + "io" + "net" + "sync" + + winio "github.com/Microsoft/go-winio" + "github.com/containerd/containerd" + "github.com/pkg/errors" +) + +type winpipe struct { + sync.Mutex + + ctx context.Context + listener net.Listener + readyCh chan struct{} + readyErr error + + client net.Conn +} + +func newWinpipe(ctx context.Context, pipe string) (*winpipe, error) { + l, err := winio.ListenPipe(pipe, nil) + if err != nil { + return nil, errors.Wrapf(err, "%q pipe creation failed", pipe) + } + wp := &winpipe{ + ctx: ctx, + listener: l, + readyCh: make(chan struct{}), + } + go func() { + go func() { + defer close(wp.readyCh) + defer wp.listener.Close() + c, err := wp.listener.Accept() + if err != nil { + wp.Lock() + if wp.readyErr == nil { + wp.readyErr = err + } + wp.Unlock() + return + } + wp.client = c + }() + + select { + case <-wp.readyCh: + case <-ctx.Done(): + wp.Lock() + if wp.readyErr == nil { + wp.listener.Close() + wp.readyErr = ctx.Err() + } + wp.Unlock() + } + }() + + return wp, nil +} + +func (wp *winpipe) Read(b []byte) (int, error) { + select { + case <-wp.ctx.Done(): + return 0, wp.ctx.Err() + case <-wp.readyCh: + return wp.client.Read(b) + } +} + +func (wp *winpipe) Write(b []byte) (int, error) { + select { + case <-wp.ctx.Done(): + return 0, wp.ctx.Err() + case <-wp.readyCh: + return wp.client.Write(b) + } +} + +func (wp *winpipe) Close() error { + select { + case <-wp.readyCh: + return wp.client.Close() + default: + return nil + } +} + +func newIOPipe(fifos *containerd.FIFOSet) (*IOPipe, error) { + var ( + err error + ctx, cancel = context.WithCancel(context.Background()) + p io.ReadWriteCloser + iop = &IOPipe{ + Terminal: fifos.Terminal, + cancel: cancel, + config: containerd.IOConfig{ + Terminal: fifos.Terminal, + Stdin: fifos.In, + Stdout: fifos.Out, + Stderr: fifos.Err, + }, + } + ) + defer func() { + if err != nil { + cancel() + iop.Close() + } + }() + + if fifos.In != "" { + if p, err = newWinpipe(ctx, fifos.In); err != nil { + return nil, err + } + iop.Stdin = p + } + + if fifos.Out != "" { + if p, err = newWinpipe(ctx, fifos.Out); err != nil { + return nil, err + } + iop.Stdout = p + } + + if fifos.Err != "" { + if p, err = newWinpipe(ctx, fifos.Err); err != nil { + return nil, err + } + iop.Stderr = p + } + + return iop, nil +} diff --git a/libcontainerd/oom_linux.go b/libcontainerd/oom_linux.go deleted file mode 100644 index 70f0daca5f..0000000000 --- a/libcontainerd/oom_linux.go +++ /dev/null @@ -1,31 +0,0 @@ -package libcontainerd - -import ( - "fmt" - "os" - "strconv" - - "github.com/opencontainers/runc/libcontainer/system" - "github.com/sirupsen/logrus" -) - -func setOOMScore(pid, score int) error { - oomScoreAdjPath := fmt.Sprintf("/proc/%d/oom_score_adj", pid) - f, err := os.OpenFile(oomScoreAdjPath, os.O_WRONLY, 0) - if err != nil { - return err - } - stringScore := strconv.Itoa(score) - _, err = f.WriteString(stringScore) - f.Close() - if os.IsPermission(err) { - // Setting oom_score_adj does not work in an - // unprivileged container. Ignore the error, but log - // it if we appear not to be in that situation. - if !system.RunningInUserNS() { - logrus.Debugf("Permission denied writing %q to %s", stringScore, oomScoreAdjPath) - } - return nil - } - return err -} diff --git a/libcontainerd/oom_solaris.go b/libcontainerd/oom_solaris.go deleted file mode 100644 index 2ebe5e87cf..0000000000 --- a/libcontainerd/oom_solaris.go +++ /dev/null @@ -1,5 +0,0 @@ -package libcontainerd - -func setOOMScore(pid, score int) error { - return nil -} diff --git a/libcontainerd/pausemonitor_unix.go b/libcontainerd/pausemonitor_unix.go deleted file mode 100644 index 4f3766d95c..0000000000 --- a/libcontainerd/pausemonitor_unix.go +++ /dev/null @@ -1,42 +0,0 @@ -// +build !windows - -package libcontainerd - -import ( - "sync" -) - -// pauseMonitor is helper to get notifications from pause state changes. -type pauseMonitor struct { - sync.Mutex - waiters map[string][]chan struct{} -} - -func (m *pauseMonitor) handle(t string) { - m.Lock() - defer m.Unlock() - if m.waiters == nil { - return - } - q, ok := m.waiters[t] - if !ok { - return - } - if len(q) > 0 { - close(q[0]) - m.waiters[t] = q[1:] - } -} - -func (m *pauseMonitor) append(t string, waiter chan struct{}) { - m.Lock() - defer m.Unlock() - if m.waiters == nil { - m.waiters = make(map[string][]chan struct{}) - } - _, ok := m.waiters[t] - if !ok { - m.waiters[t] = make([]chan struct{}, 0) - } - m.waiters[t] = append(m.waiters[t], waiter) -} diff --git a/libcontainerd/process.go b/libcontainerd/process.go deleted file mode 100644 index 57562c8789..0000000000 --- a/libcontainerd/process.go +++ /dev/null @@ -1,18 +0,0 @@ -package libcontainerd - -// processCommon are the platform common fields as part of the process structure -// which keeps the state for the main container process, as well as any exec -// processes. -type processCommon struct { - client *client - - // containerID is the Container ID - containerID string - - // friendlyName is an identifier for the process (or `InitFriendlyName` - // for the first process) - friendlyName string - - // systemPid is the PID of the main container process - systemPid uint32 -} diff --git a/libcontainerd/process_unix.go b/libcontainerd/process_unix.go deleted file mode 100644 index 3b54e325b5..0000000000 --- a/libcontainerd/process_unix.go +++ /dev/null @@ -1,107 +0,0 @@ -// +build linux solaris - -package libcontainerd - -import ( - "io" - "io/ioutil" - "os" - "path/filepath" - goruntime "runtime" - "strings" - - containerd "github.com/containerd/containerd/api/grpc/types" - "github.com/tonistiigi/fifo" - "golang.org/x/net/context" - "golang.org/x/sys/unix" -) - -var fdNames = map[int]string{ - unix.Stdin: "stdin", - unix.Stdout: "stdout", - unix.Stderr: "stderr", -} - -// process keeps the state for both main container process and exec process. -type process struct { - processCommon - - // Platform specific fields are below here. - dir string -} - -func (p *process) openFifos(ctx context.Context, terminal bool) (pipe *IOPipe, err error) { - if err := os.MkdirAll(p.dir, 0700); err != nil { - return nil, err - } - - io := &IOPipe{} - - io.Stdin, err = fifo.OpenFifo(ctx, p.fifo(unix.Stdin), unix.O_WRONLY|unix.O_CREAT|unix.O_NONBLOCK, 0700) - if err != nil { - return nil, err - } - - defer func() { - if err != nil { - io.Stdin.Close() - } - }() - - io.Stdout, err = fifo.OpenFifo(ctx, p.fifo(unix.Stdout), unix.O_RDONLY|unix.O_CREAT|unix.O_NONBLOCK, 0700) - if err != nil { - return nil, err - } - - defer func() { - if err != nil { - io.Stdout.Close() - } - }() - - if goruntime.GOOS == "solaris" || !terminal { - // For Solaris terminal handling is done exclusively by the runtime therefore we make no distinction - // in the processing for terminal and !terminal cases. - io.Stderr, err = fifo.OpenFifo(ctx, p.fifo(unix.Stderr), unix.O_RDONLY|unix.O_CREAT|unix.O_NONBLOCK, 0700) - if err != nil { - return nil, err - } - defer func() { - if err != nil { - io.Stderr.Close() - } - }() - } else { - io.Stderr = ioutil.NopCloser(emptyReader{}) - } - - return io, nil -} - -func (p *process) sendCloseStdin() error { - _, err := p.client.remote.apiClient.UpdateProcess(context.Background(), &containerd.UpdateProcessRequest{ - Id: p.containerID, - Pid: p.friendlyName, - CloseStdin: true, - }) - if err != nil && (strings.Contains(err.Error(), "container not found") || strings.Contains(err.Error(), "process not found")) { - return nil - } - return err -} - -func (p *process) closeFifos(io *IOPipe) { - io.Stdin.Close() - io.Stdout.Close() - io.Stderr.Close() -} - -type emptyReader struct{} - -func (r emptyReader) Read(b []byte) (int, error) { - return 0, io.EOF -} - -func (p *process) fifo(index int) string { - return filepath.Join(p.dir, p.friendlyName+"-"+fdNames[index]) -} diff --git a/libcontainerd/process_windows.go b/libcontainerd/process_windows.go index 854c4dd1f0..76b92a6ceb 100644 --- a/libcontainerd/process_windows.go +++ b/libcontainerd/process_windows.go @@ -8,14 +8,6 @@ import ( "github.com/docker/docker/pkg/ioutils" ) -// process keeps the state for both main container process and exec process. -type process struct { - processCommon - - // Platform specific fields are below here. - hcsProcess hcsshim.Process -} - type autoClosingReader struct { io.ReadCloser sync.Once @@ -23,7 +15,7 @@ type autoClosingReader struct { func (r *autoClosingReader) Read(b []byte) (n int, err error) { n, err = r.ReadCloser.Read(b) - if err == io.EOF { + if err != nil { r.Once.Do(func() { r.ReadCloser.Close() }) } return @@ -46,3 +38,7 @@ func createStdInCloser(pipe io.WriteCloser, process hcsshim.Process) io.WriteClo return nil }) } + +func (p *process) Cleanup() error { + return nil +} diff --git a/libcontainerd/queue_unix.go b/libcontainerd/queue.go similarity index 94% rename from libcontainerd/queue_unix.go rename to libcontainerd/queue.go index 66765f75ec..38d74a0a46 100644 --- a/libcontainerd/queue_unix.go +++ b/libcontainerd/queue.go @@ -1,5 +1,3 @@ -// +build linux solaris - package libcontainerd import "sync" diff --git a/libcontainerd/queue_unix_test.go b/libcontainerd/queue_test.go similarity index 95% rename from libcontainerd/queue_unix_test.go rename to libcontainerd/queue_test.go index bb49a5d4c2..902f48aef2 100644 --- a/libcontainerd/queue_unix_test.go +++ b/libcontainerd/queue_test.go @@ -1,5 +1,3 @@ -// +build linux solaris - package libcontainerd import ( diff --git a/libcontainerd/remote.go b/libcontainerd/remote.go deleted file mode 100644 index 9031e3ae7d..0000000000 --- a/libcontainerd/remote.go +++ /dev/null @@ -1,20 +0,0 @@ -package libcontainerd - -// Remote on Linux defines the accesspoint to the containerd grpc API. -// Remote on Windows is largely an unimplemented interface as there is -// no remote containerd. -type Remote interface { - // Client returns a new Client instance connected with given Backend. - Client(Backend) (Client, error) - // Cleanup stops containerd if it was started by libcontainerd. - // Note this is not used on Windows as there is no remote containerd. - Cleanup() - // UpdateOptions allows various remote options to be updated at runtime. - UpdateOptions(...RemoteOption) error -} - -// RemoteOption allows to configure parameters of remotes. -// This is unused on Windows. -type RemoteOption interface { - Apply(Remote) error -} diff --git a/libcontainerd/remote_daemon.go b/libcontainerd/remote_daemon.go new file mode 100644 index 0000000000..e6fd05f08a --- /dev/null +++ b/libcontainerd/remote_daemon.go @@ -0,0 +1,317 @@ +// +build !windows + +package libcontainerd + +import ( + "context" + "fmt" + "io" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + "sync" + "syscall" + "time" + + "github.com/BurntSushi/toml" + "github.com/containerd/containerd" + "github.com/containerd/containerd/server" + "github.com/docker/docker/pkg/system" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +const ( + maxConnectionRetryCount = 3 + healthCheckTimeout = 3 * time.Second + shutdownTimeout = 15 * time.Second + configFile = "containerd.toml" + binaryName = "docker-containerd" + pidFile = "docker-containerd.pid" +) + +type pluginConfigs struct { + Plugins map[string]interface{} `toml:"plugins"` +} + +type remote struct { + sync.RWMutex + server.Config + + daemonPid int + logger *logrus.Entry + + daemonWaitCh chan struct{} + clients []*client + shutdownContext context.Context + shutdownCancel context.CancelFunc + shutdown bool + + // Options + startDaemon bool + rootDir string + stateDir string + snapshotter string + pluginConfs pluginConfigs +} + +// New creates a fresh instance of libcontainerd remote. +func New(rootDir, stateDir string, options ...RemoteOption) (rem Remote, err error) { + defer func() { + if err != nil { + err = errors.Wrap(err, "Failed to connect to containerd") + } + }() + + r := &remote{ + rootDir: rootDir, + stateDir: stateDir, + Config: server.Config{ + Root: filepath.Join(rootDir, "daemon"), + State: filepath.Join(stateDir, "daemon"), + }, + pluginConfs: pluginConfigs{make(map[string]interface{})}, + daemonPid: -1, + logger: logrus.WithField("module", "libcontainerd"), + } + r.shutdownContext, r.shutdownCancel = context.WithCancel(context.Background()) + + rem = r + for _, option := range options { + if err = option.Apply(r); err != nil { + return + } + } + r.setDefaults() + + if err = system.MkdirAll(stateDir, 0700, ""); err != nil { + return + } + + if r.startDaemon { + os.Remove(r.GRPC.Address) + if err = r.startContainerd(); err != nil { + return + } + defer func() { + if err != nil { + r.Cleanup() + } + }() + } + + // This connection is just used to monitor the connection + client, err := containerd.New(r.GRPC.Address) + if err != nil { + return + } + if _, err := client.Version(context.Background()); err != nil { + system.KillProcess(r.daemonPid) + return nil, errors.Wrapf(err, "unable to get containerd version") + } + + go r.monitorConnection(client) + + return r, nil +} + +func (r *remote) NewClient(ns string, b Backend) (Client, error) { + c := &client{ + stateDir: r.stateDir, + logger: r.logger.WithField("namespace", ns), + namespace: ns, + backend: b, + containers: make(map[string]*container), + } + + rclient, err := containerd.New(r.GRPC.Address, containerd.WithDefaultNamespace(ns)) + if err != nil { + return nil, err + } + c.remote = rclient + + go c.processEventStream(r.shutdownContext) + + r.Lock() + r.clients = append(r.clients, c) + r.Unlock() + return c, nil +} + +func (r *remote) Cleanup() { + if r.daemonPid != -1 { + r.shutdownCancel() + r.stopDaemon() + } + + // cleanup some files + os.Remove(filepath.Join(r.stateDir, pidFile)) + + r.platformCleanup() +} + +func (r *remote) getContainerdPid() (int, error) { + pidFile := filepath.Join(r.stateDir, pidFile) + f, err := os.OpenFile(pidFile, os.O_RDWR, 0600) + if err != nil { + if os.IsNotExist(err) { + return -1, nil + } + return -1, err + } + defer f.Close() + + b := make([]byte, 8) + n, err := f.Read(b) + if err != nil && err != io.EOF { + return -1, err + } + + if n > 0 { + pid, err := strconv.ParseUint(string(b[:n]), 10, 64) + if err != nil { + return -1, err + } + if system.IsProcessAlive(int(pid)) { + return int(pid), nil + } + } + + return -1, nil +} + +func (r *remote) getContainerdConfig() (string, error) { + path := filepath.Join(r.stateDir, configFile) + f, err := os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600) + if err != nil { + return "", errors.Wrapf(err, "failed to open containerd config file at %s", path) + } + defer f.Close() + + enc := toml.NewEncoder(f) + if err = enc.Encode(r.Config); err != nil { + return "", errors.Wrapf(err, "failed to encode general config") + } + if err = enc.Encode(r.pluginConfs); err != nil { + return "", errors.Wrapf(err, "failed to encode plugin configs") + } + + return path, nil +} + +func (r *remote) startContainerd() error { + pid, err := r.getContainerdPid() + if err != nil { + return err + } + + if pid != -1 { + r.daemonPid = pid + logrus.WithField("pid", pid). + Infof("libcontainerd: %s is still running", binaryName) + return nil + } + + configFile, err := r.getContainerdConfig() + if err != nil { + return err + } + + args := []string{"--config", configFile} + cmd := exec.Command(binaryName, args...) + // redirect containerd logs to docker logs + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.SysProcAttr = containerdSysProcAttr() + // clear the NOTIFY_SOCKET from the env when starting containerd + cmd.Env = nil + for _, e := range os.Environ() { + if !strings.HasPrefix(e, "NOTIFY_SOCKET") { + cmd.Env = append(cmd.Env, e) + } + } + if err := cmd.Start(); err != nil { + return err + } + + r.daemonWaitCh = make(chan struct{}) + go func() { + // Reap our child when needed + if err := cmd.Wait(); err != nil { + r.logger.WithError(err).Errorf("containerd did not exit successfully") + } + close(r.daemonWaitCh) + }() + + r.daemonPid = cmd.Process.Pid + + err = ioutil.WriteFile(filepath.Join(r.stateDir, pidFile), []byte(fmt.Sprintf("%d", r.daemonPid)), 0660) + if err != nil { + system.KillProcess(r.daemonPid) + return errors.Wrap(err, "libcontainerd: failed to save daemon pid to disk") + } + + logrus.WithField("pid", r.daemonPid). + Infof("libcontainerd: started new %s process", binaryName) + + return nil +} + +func (r *remote) monitorConnection(client *containerd.Client) { + var transientFailureCount = 0 + + ticker := time.NewTicker(500 * time.Millisecond) + defer ticker.Stop() + + for { + <-ticker.C + ctx, cancel := context.WithTimeout(r.shutdownContext, healthCheckTimeout) + _, err := client.IsServing(ctx) + cancel() + if err == nil { + transientFailureCount = 0 + continue + } + + select { + case <-r.shutdownContext.Done(): + r.logger.Info("stopping healtcheck following graceful shutdown") + client.Close() + return + default: + } + + r.logger.WithError(err).WithField("binary", binaryName).Debug("daemon is not responding") + + if r.daemonPid != -1 { + transientFailureCount++ + if transientFailureCount >= maxConnectionRetryCount || !system.IsProcessAlive(r.daemonPid) { + transientFailureCount = 0 + if system.IsProcessAlive(r.daemonPid) { + r.logger.WithField("pid", r.daemonPid).Info("killing and restarting containerd") + // Try to get a stack trace + syscall.Kill(r.daemonPid, syscall.SIGUSR1) + <-time.After(100 * time.Millisecond) + system.KillProcess(r.daemonPid) + } + <-r.daemonWaitCh + var err error + client.Close() + os.Remove(r.GRPC.Address) + if err = r.startContainerd(); err != nil { + r.logger.WithError(err).Error("failed restarting containerd") + } else { + newClient, err := containerd.New(r.GRPC.Address) + if err != nil { + r.logger.WithError(err).Error("failed connect to containerd") + } else { + client = newClient + } + } + } + } + } +} diff --git a/libcontainerd/remote_daemon_options.go b/libcontainerd/remote_daemon_options.go new file mode 100644 index 0000000000..b167f64c8b --- /dev/null +++ b/libcontainerd/remote_daemon_options.go @@ -0,0 +1,141 @@ +// +build !windows + +package libcontainerd + +import "fmt" + +// WithRemoteAddr sets the external containerd socket to connect to. +func WithRemoteAddr(addr string) RemoteOption { + return rpcAddr(addr) +} + +type rpcAddr string + +func (a rpcAddr) Apply(r Remote) error { + if remote, ok := r.(*remote); ok { + remote.GRPC.Address = string(a) + return nil + } + return fmt.Errorf("WithRemoteAddr option not supported for this remote") +} + +// WithRemoteAddrUser sets the uid and gid to create the RPC address with +func WithRemoteAddrUser(uid, gid int) RemoteOption { + return rpcUser{uid, gid} +} + +type rpcUser struct { + uid int + gid int +} + +func (u rpcUser) Apply(r Remote) error { + if remote, ok := r.(*remote); ok { + remote.GRPC.Uid = u.uid + remote.GRPC.Gid = u.gid + return nil + } + return fmt.Errorf("WithRemoteAddr option not supported for this remote") +} + +// WithStartDaemon defines if libcontainerd should also run containerd daemon. +func WithStartDaemon(start bool) RemoteOption { + return startDaemon(start) +} + +type startDaemon bool + +func (s startDaemon) Apply(r Remote) error { + if remote, ok := r.(*remote); ok { + remote.startDaemon = bool(s) + return nil + } + return fmt.Errorf("WithStartDaemon option not supported for this remote") +} + +// WithLogLevel defines which log level to starts containerd with. +// This only makes sense if WithStartDaemon() was set to true. +func WithLogLevel(lvl string) RemoteOption { + return logLevel(lvl) +} + +type logLevel string + +func (l logLevel) Apply(r Remote) error { + if remote, ok := r.(*remote); ok { + remote.Debug.Level = string(l) + return nil + } + return fmt.Errorf("WithDebugLog option not supported for this remote") +} + +// WithDebugAddress defines at which location the debug GRPC connection +// should be made +func WithDebugAddress(addr string) RemoteOption { + return debugAddress(addr) +} + +type debugAddress string + +func (d debugAddress) Apply(r Remote) error { + if remote, ok := r.(*remote); ok { + remote.Debug.Address = string(d) + return nil + } + return fmt.Errorf("WithDebugAddress option not supported for this remote") +} + +// WithMetricsAddress defines at which location the debug GRPC connection +// should be made +func WithMetricsAddress(addr string) RemoteOption { + return metricsAddress(addr) +} + +type metricsAddress string + +func (m metricsAddress) Apply(r Remote) error { + if remote, ok := r.(*remote); ok { + remote.Metrics.Address = string(m) + return nil + } + return fmt.Errorf("WithMetricsAddress option not supported for this remote") +} + +// WithSnapshotter defines snapshotter driver should be used +func WithSnapshotter(name string) RemoteOption { + return snapshotter(name) +} + +type snapshotter string + +func (s snapshotter) Apply(r Remote) error { + if remote, ok := r.(*remote); ok { + remote.snapshotter = string(s) + return nil + } + return fmt.Errorf("WithSnapshotter option not supported for this remote") +} + +// WithPlugin allow configuring a containerd plugin +// configuration values passed needs to be quoted if quotes are needed in +// the toml format. +func WithPlugin(name string, conf interface{}) RemoteOption { + return pluginConf{ + name: name, + conf: conf, + } +} + +type pluginConf struct { + // Name is the name of the plugin + name string + conf interface{} +} + +func (p pluginConf) Apply(r Remote) error { + if remote, ok := r.(*remote); ok { + remote.pluginConfs.Plugins[p.name] = p.conf + return nil + } + return fmt.Errorf("WithPlugin option not supported for this remote") +} diff --git a/libcontainerd/remote_daemon_options_unix.go b/libcontainerd/remote_daemon_options_unix.go new file mode 100644 index 0000000000..e97789c4e5 --- /dev/null +++ b/libcontainerd/remote_daemon_options_unix.go @@ -0,0 +1,36 @@ +// +build linux solaris + +package libcontainerd + +import "fmt" + +// WithOOMScore defines the oom_score_adj to set for the containerd process. +func WithOOMScore(score int) RemoteOption { + return oomScore(score) +} + +type oomScore int + +func (o oomScore) Apply(r Remote) error { + if remote, ok := r.(*remote); ok { + remote.OOMScore = int(o) + return nil + } + return fmt.Errorf("WithOOMScore option not supported for this remote") +} + +// WithSubreaper sets whether containerd should register itself as a +// subreaper +func WithSubreaper(reap bool) RemoteOption { + return subreaper(reap) +} + +type subreaper bool + +func (s subreaper) Apply(r Remote) error { + if remote, ok := r.(*remote); ok { + remote.Subreaper = bool(s) + return nil + } + return fmt.Errorf("WithSubreaper option not supported for this remote") +} diff --git a/libcontainerd/remote_daemon_process.go b/libcontainerd/remote_daemon_process.go new file mode 100644 index 0000000000..a00406e150 --- /dev/null +++ b/libcontainerd/remote_daemon_process.go @@ -0,0 +1,56 @@ +// +build !windows + +package libcontainerd + +import "github.com/pkg/errors" + +// process represents the state for the main container process or an exec. +type process struct { + // id is the logical name of the process + id string + + // cid is the container id to which this process belongs + cid string + + // pid is the identifier of the process + pid uint32 + + // io holds the io reader/writer associated with the process + io *IOPipe + + // root is the state directory for the process + root string +} + +func (p *process) ID() string { + return p.id +} + +func (p *process) Pid() uint32 { + return p.pid +} + +func (p *process) SetPid(pid uint32) error { + if p.pid != 0 { + return errors.Errorf("pid is already set to %d", pid) + } + + p.pid = pid + return nil +} + +func (p *process) IOPipe() *IOPipe { + return p.io +} + +func (p *process) CloseIO() { + if p.io.Stdin != nil { + p.io.Stdin.Close() + } + if p.io.Stdout != nil { + p.io.Stdout.Close() + } + if p.io.Stderr != nil { + p.io.Stderr.Close() + } +} diff --git a/libcontainerd/remote_daemon_process_unix.go b/libcontainerd/remote_daemon_process_unix.go new file mode 100644 index 0000000000..38533df35f --- /dev/null +++ b/libcontainerd/remote_daemon_process_unix.go @@ -0,0 +1,61 @@ +// +build linux solaris + +package libcontainerd + +import ( + "os" + "path/filepath" + + "github.com/pkg/errors" + "golang.org/x/sys/unix" +) + +var fdNames = map[int]string{ + unix.Stdin: "stdin", + unix.Stdout: "stdout", + unix.Stderr: "stderr", +} + +func (p *process) pipeName(index int) string { + return filepath.Join(p.root, p.id+"-"+fdNames[index]) +} + +func (p *process) IOPaths() (string, string, string) { + var ( + stdin = p.pipeName(unix.Stdin) + stdout = p.pipeName(unix.Stdout) + stderr = p.pipeName(unix.Stderr) + ) + // TODO: debug why we're having zombies when I don't unset those + if p.io.Stdin == nil { + stdin = "" + } + if p.io.Stderr == nil { + stderr = "" + } + return stdin, stdout, stderr +} + +func (p *process) Cleanup() error { + var retErr error + + // Ensure everything was closed + p.CloseIO() + + for _, i := range [3]string{ + p.pipeName(unix.Stdin), + p.pipeName(unix.Stdout), + p.pipeName(unix.Stderr), + } { + err := os.Remove(i) + if err != nil { + if retErr == nil { + retErr = errors.Wrapf(err, "failed to remove %s", i) + } else { + retErr = errors.Wrapf(retErr, "failed to remove %s", i) + } + } + } + + return retErr +} diff --git a/libcontainerd/remote_daemon_unix.go b/libcontainerd/remote_daemon_unix.go new file mode 100644 index 0000000000..e0c56e83aa --- /dev/null +++ b/libcontainerd/remote_daemon_unix.go @@ -0,0 +1,56 @@ +// +build linux solaris + +package libcontainerd + +import ( + "os" + "path/filepath" + "syscall" + "time" + + "github.com/docker/docker/pkg/system" +) + +const ( + sockFile = "docker-containerd.sock" + debugSockFile = "docker-containerd-debug.sock" +) + +func (r *remote) setDefaults() { + if r.GRPC.Address == "" { + r.GRPC.Address = filepath.Join(r.stateDir, sockFile) + } + if r.Debug.Address == "" { + r.Debug.Address = filepath.Join(r.stateDir, debugSockFile) + } + if r.Debug.Level == "" { + r.Debug.Level = "info" + } + if r.OOMScore == 0 { + r.OOMScore = -999 + } + if r.snapshotter == "" { + r.snapshotter = "overlay" + } +} + +func (r *remote) stopDaemon() { + // Ask the daemon to quit + syscall.Kill(r.daemonPid, syscall.SIGTERM) + // Wait up to 15secs for it to stop + for i := time.Duration(0); i < shutdownTimeout; i += time.Second { + if !system.IsProcessAlive(r.daemonPid) { + break + } + time.Sleep(time.Second) + } + + if system.IsProcessAlive(r.daemonPid) { + r.logger.WithField("pid", r.daemonPid).Warn("daemon didn't stop within 15 secs, killing it") + syscall.Kill(r.daemonPid, syscall.SIGKILL) + } +} + +func (r *remote) platformCleanup() { + os.Remove(filepath.Join(r.stateDir, sockFile)) +} diff --git a/libcontainerd/remote_daemon_windows.go b/libcontainerd/remote_daemon_windows.go new file mode 100644 index 0000000000..44b5fc0837 --- /dev/null +++ b/libcontainerd/remote_daemon_windows.go @@ -0,0 +1,50 @@ +// +build remote_daemon + +package libcontainerd + +import ( + "os" +) + +const ( + grpcPipeName = `\\.\pipe\docker-containerd-containerd` + debugPipeName = `\\.\pipe\docker-containerd-debug` +) + +func (r *remote) setDefaults() { + if r.GRPC.Address == "" { + r.GRPC.Address = grpcPipeName + } + if r.Debug.Address == "" { + r.Debug.Address = debugPipeName + } + if r.Debug.Level == "" { + r.Debug.Level = "info" + } + if r.snapshotter == "" { + r.snapshotter = "naive" // TODO(mlaventure): switch to "windows" once implemented + } +} + +func (r *remote) stopDaemon() { + p, err := os.FindProcess(r.daemonPid) + if err != nil { + r.logger.WithField("pid", r.daemonPid).Warn("could not find daemon process") + return + } + + if err = p.Kill(); err != nil { + r.logger.WithError(err).WithField("pid", r.daemonPid).Warn("could not kill daemon process") + return + } + + _, err = p.Wait() + if err != nil { + r.logger.WithError(err).WithField("pid", r.daemonPid).Warn("wait for daemon process") + return + } +} + +func (r *remote) platformCleanup() { + // Nothing to do +} diff --git a/libcontainerd/remote_local.go b/libcontainerd/remote_local.go new file mode 100644 index 0000000000..ad3be03abe --- /dev/null +++ b/libcontainerd/remote_local.go @@ -0,0 +1,59 @@ +// +build windows + +package libcontainerd + +import ( + "sync" + + "github.com/sirupsen/logrus" +) + +type remote struct { + sync.RWMutex + + logger *logrus.Entry + clients []*client + + // Options + rootDir string + stateDir string +} + +// New creates a fresh instance of libcontainerd remote. +func New(rootDir, stateDir string, options ...RemoteOption) (Remote, error) { + return &remote{ + logger: logrus.WithField("module", "libcontainerd"), + rootDir: rootDir, + stateDir: stateDir, + }, nil +} + +type client struct { + sync.Mutex + + rootDir string + stateDir string + backend Backend + logger *logrus.Entry + eventQ queue + containers map[string]*container +} + +func (r *remote) NewClient(ns string, b Backend) (Client, error) { + c := &client{ + rootDir: r.rootDir, + stateDir: r.stateDir, + backend: b, + logger: r.logger.WithField("namespace", ns), + containers: make(map[string]*container), + } + r.Lock() + r.clients = append(r.clients, c) + r.Unlock() + + return c, nil +} + +func (r *remote) Cleanup() { + // Nothing to do +} diff --git a/libcontainerd/remote_unix.go b/libcontainerd/remote_unix.go deleted file mode 100644 index 7bab53e796..0000000000 --- a/libcontainerd/remote_unix.go +++ /dev/null @@ -1,565 +0,0 @@ -// +build linux solaris - -package libcontainerd - -import ( - "fmt" - "io" - "io/ioutil" - "log" - "net" - "os" - "os/exec" - "path/filepath" - goruntime "runtime" - "strconv" - "strings" - "sync" - "time" - - containerd "github.com/containerd/containerd/api/grpc/types" - "github.com/docker/docker/pkg/locker" - "github.com/docker/docker/pkg/system" - "github.com/golang/protobuf/ptypes" - "github.com/golang/protobuf/ptypes/timestamp" - "github.com/sirupsen/logrus" - "golang.org/x/net/context" - "golang.org/x/sys/unix" - "google.golang.org/grpc" - "google.golang.org/grpc/grpclog" - "google.golang.org/grpc/health/grpc_health_v1" - "google.golang.org/grpc/transport" -) - -const ( - maxConnectionRetryCount = 3 - containerdHealthCheckTimeout = 3 * time.Second - containerdShutdownTimeout = 15 * time.Second - containerdBinary = "docker-containerd" - containerdPidFilename = "docker-containerd.pid" - containerdSockFilename = "docker-containerd.sock" - containerdStateDir = "containerd" - eventTimestampFilename = "event.ts" -) - -type remote struct { - sync.RWMutex - apiClient containerd.APIClient - daemonPid int - stateDir string - rpcAddr string - startDaemon bool - closedManually bool - debugLog bool - rpcConn *grpc.ClientConn - clients []*client - eventTsPath string - runtime string - runtimeArgs []string - daemonWaitCh chan struct{} - liveRestore bool - oomScore int - restoreFromTimestamp *timestamp.Timestamp -} - -// New creates a fresh instance of libcontainerd remote. -func New(stateDir string, options ...RemoteOption) (_ Remote, err error) { - defer func() { - if err != nil { - err = fmt.Errorf("Failed to connect to containerd. Please make sure containerd is installed in your PATH or you have specified the correct address. Got error: %v", err) - } - }() - r := &remote{ - stateDir: stateDir, - daemonPid: -1, - eventTsPath: filepath.Join(stateDir, eventTimestampFilename), - } - for _, option := range options { - if err := option.Apply(r); err != nil { - return nil, err - } - } - - if err := system.MkdirAll(stateDir, 0700, ""); err != nil { - return nil, err - } - - if r.rpcAddr == "" { - r.rpcAddr = filepath.Join(stateDir, containerdSockFilename) - } - - if r.startDaemon { - if err := r.runContainerdDaemon(); err != nil { - return nil, err - } - } - - // don't output the grpc reconnect logging - grpclog.SetLogger(log.New(ioutil.Discard, "", log.LstdFlags)) - dialOpts := []grpc.DialOption{ - grpc.WithInsecure(), - grpc.WithBackoffMaxDelay(2 * time.Second), - grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) { - return net.DialTimeout("unix", addr, timeout) - }), - } - conn, err := grpc.Dial(r.rpcAddr, dialOpts...) - if err != nil { - return nil, fmt.Errorf("error connecting to containerd: %v", err) - } - - r.rpcConn = conn - r.apiClient = containerd.NewAPIClient(conn) - - // Get the timestamp to restore from - t := r.getLastEventTimestamp() - tsp, err := ptypes.TimestampProto(t) - if err != nil { - logrus.Errorf("libcontainerd: failed to convert timestamp: %q", err) - } - r.restoreFromTimestamp = tsp - - go r.handleConnectionChange() - - if err := r.startEventsMonitor(); err != nil { - return nil, err - } - - return r, nil -} - -func (r *remote) UpdateOptions(options ...RemoteOption) error { - for _, option := range options { - if err := option.Apply(r); err != nil { - return err - } - } - return nil -} - -func (r *remote) handleConnectionChange() { - var transientFailureCount = 0 - - ticker := time.NewTicker(500 * time.Millisecond) - defer ticker.Stop() - healthClient := grpc_health_v1.NewHealthClient(r.rpcConn) - - for { - <-ticker.C - ctx, cancel := context.WithTimeout(context.Background(), containerdHealthCheckTimeout) - _, err := healthClient.Check(ctx, &grpc_health_v1.HealthCheckRequest{}) - cancel() - if err == nil { - continue - } - - logrus.Debugf("libcontainerd: containerd health check returned error: %v", err) - - if r.daemonPid != -1 { - if r.closedManually { - // Well, we asked for it to stop, just return - return - } - // all other errors are transient - // Reset state to be notified of next failure - transientFailureCount++ - if transientFailureCount >= maxConnectionRetryCount { - transientFailureCount = 0 - if system.IsProcessAlive(r.daemonPid) { - system.KillProcess(r.daemonPid) - } - <-r.daemonWaitCh - if err := r.runContainerdDaemon(); err != nil { //FIXME: Handle error - logrus.Errorf("libcontainerd: error restarting containerd: %v", err) - } - continue - } - } - } -} - -func (r *remote) Cleanup() { - if r.daemonPid == -1 { - return - } - r.closedManually = true - r.rpcConn.Close() - // Ask the daemon to quit - unix.Kill(r.daemonPid, unix.SIGTERM) - - // Wait up to 15secs for it to stop - for i := time.Duration(0); i < containerdShutdownTimeout; i += time.Second { - if !system.IsProcessAlive(r.daemonPid) { - break - } - time.Sleep(time.Second) - } - - if system.IsProcessAlive(r.daemonPid) { - logrus.Warnf("libcontainerd: containerd (%d) didn't stop within 15 secs, killing it\n", r.daemonPid) - unix.Kill(r.daemonPid, unix.SIGKILL) - } - - // cleanup some files - os.Remove(filepath.Join(r.stateDir, containerdPidFilename)) - os.Remove(filepath.Join(r.stateDir, containerdSockFilename)) -} - -func (r *remote) Client(b Backend) (Client, error) { - c := &client{ - clientCommon: clientCommon{ - backend: b, - containers: make(map[string]*container), - locker: locker.New(), - }, - remote: r, - exitNotifiers: make(map[string]*exitNotifier), - liveRestore: r.liveRestore, - } - - r.Lock() - r.clients = append(r.clients, c) - r.Unlock() - return c, nil -} - -func (r *remote) updateEventTimestamp(t time.Time) { - f, err := os.OpenFile(r.eventTsPath, unix.O_CREAT|unix.O_WRONLY|unix.O_TRUNC, 0600) - if err != nil { - logrus.Warnf("libcontainerd: failed to open event timestamp file: %v", err) - return - } - defer f.Close() - - b, err := t.MarshalText() - if err != nil { - logrus.Warnf("libcontainerd: failed to encode timestamp: %v", err) - return - } - - n, err := f.Write(b) - if err != nil || n != len(b) { - logrus.Warnf("libcontainerd: failed to update event timestamp file: %v", err) - f.Truncate(0) - return - } -} - -func (r *remote) getLastEventTimestamp() time.Time { - t := time.Now() - - fi, err := os.Stat(r.eventTsPath) - if os.IsNotExist(err) || fi.Size() == 0 { - return t - } - - f, err := os.Open(r.eventTsPath) - if err != nil { - logrus.Warnf("libcontainerd: Unable to access last event ts: %v", err) - return t - } - defer f.Close() - - b := make([]byte, fi.Size()) - n, err := f.Read(b) - if err != nil || n != len(b) { - logrus.Warnf("libcontainerd: Unable to read last event ts: %v", err) - return t - } - - t.UnmarshalText(b) - - return t -} - -func (r *remote) startEventsMonitor() error { - // First, get past events - t := r.getLastEventTimestamp() - tsp, err := ptypes.TimestampProto(t) - if err != nil { - logrus.Errorf("libcontainerd: failed to convert timestamp: %q", err) - } - er := &containerd.EventsRequest{ - Timestamp: tsp, - } - - var events containerd.API_EventsClient - for { - events, err = r.apiClient.Events(context.Background(), er, grpc.FailFast(false)) - if err == nil { - break - } - logrus.Warnf("libcontainerd: failed to get events from containerd: %q", err) - - if r.closedManually { - // ignore error if grpc remote connection is closed manually - return nil - } - - <-time.After(100 * time.Millisecond) - } - - go r.handleEventStream(events) - return nil -} - -func (r *remote) handleEventStream(events containerd.API_EventsClient) { - for { - e, err := events.Recv() - if err != nil { - if grpc.ErrorDesc(err) == transport.ErrConnClosing.Desc && - r.closedManually { - // ignore error if grpc remote connection is closed manually - return - } - logrus.Errorf("libcontainerd: failed to receive event from containerd: %v", err) - go r.startEventsMonitor() - return - } - - logrus.Debugf("libcontainerd: received containerd event: %#v", e) - - var container *container - var c *client - r.RLock() - for _, c = range r.clients { - container, err = c.getContainer(e.Id) - if err == nil { - break - } - } - r.RUnlock() - if container == nil { - logrus.Warnf("libcontainerd: unknown container %s", e.Id) - continue - } - - if err := container.handleEvent(e); err != nil { - logrus.Errorf("libcontainerd: error processing state change for %s: %v", e.Id, err) - } - - tsp, err := ptypes.Timestamp(e.Timestamp) - if err != nil { - logrus.Errorf("libcontainerd: failed to convert event timestamp: %q", err) - continue - } - - r.updateEventTimestamp(tsp) - } -} - -func (r *remote) runContainerdDaemon() error { - pidFilename := filepath.Join(r.stateDir, containerdPidFilename) - f, err := os.OpenFile(pidFilename, os.O_RDWR|os.O_CREATE, 0600) - if err != nil { - return err - } - defer f.Close() - - // File exist, check if the daemon is alive - b := make([]byte, 8) - n, err := f.Read(b) - if err != nil && err != io.EOF { - return err - } - - if n > 0 { - pid, err := strconv.ParseUint(string(b[:n]), 10, 64) - if err != nil { - return err - } - if system.IsProcessAlive(int(pid)) { - logrus.Infof("libcontainerd: previous instance of containerd still alive (%d)", pid) - r.daemonPid = int(pid) - return nil - } - } - - // rewind the file - _, err = f.Seek(0, os.SEEK_SET) - if err != nil { - return err - } - - // Truncate it - err = f.Truncate(0) - if err != nil { - return err - } - - // Start a new instance - args := []string{ - "-l", fmt.Sprintf("unix://%s", r.rpcAddr), - "--metrics-interval=0", - "--start-timeout", "2m", - "--state-dir", filepath.Join(r.stateDir, containerdStateDir), - } - if goruntime.GOOS == "solaris" { - args = append(args, "--shim", "containerd-shim", "--runtime", "runc") - } else { - args = append(args, "--shim", "docker-containerd-shim") - if r.runtime != "" { - args = append(args, "--runtime") - args = append(args, r.runtime) - } - } - if r.debugLog { - args = append(args, "--debug") - } - if len(r.runtimeArgs) > 0 { - for _, v := range r.runtimeArgs { - args = append(args, "--runtime-args") - args = append(args, v) - } - logrus.Debugf("libcontainerd: runContainerdDaemon: runtimeArgs: %s", args) - } - - cmd := exec.Command(containerdBinary, args...) - // redirect containerd logs to docker logs - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - cmd.SysProcAttr = setSysProcAttr(true) - cmd.Env = nil - // clear the NOTIFY_SOCKET from the env when starting containerd - for _, e := range os.Environ() { - if !strings.HasPrefix(e, "NOTIFY_SOCKET") { - cmd.Env = append(cmd.Env, e) - } - } - if err := cmd.Start(); err != nil { - return err - } - - // unless strictly necessary, do not add anything in between here - // as the reaper goroutine below needs to kick in as soon as possible - // and any "return" from code paths added here will defeat the reaper - // process. - - r.daemonWaitCh = make(chan struct{}) - go func() { - cmd.Wait() - close(r.daemonWaitCh) - }() // Reap our child when needed - - logrus.Infof("libcontainerd: new containerd process, pid: %d", cmd.Process.Pid) - if err := setOOMScore(cmd.Process.Pid, r.oomScore); err != nil { - system.KillProcess(cmd.Process.Pid) - return err - } - if _, err := f.WriteString(fmt.Sprintf("%d", cmd.Process.Pid)); err != nil { - system.KillProcess(cmd.Process.Pid) - return err - } - - r.daemonPid = cmd.Process.Pid - return nil -} - -// WithRemoteAddr sets the external containerd socket to connect to. -func WithRemoteAddr(addr string) RemoteOption { - return rpcAddr(addr) -} - -type rpcAddr string - -func (a rpcAddr) Apply(r Remote) error { - if remote, ok := r.(*remote); ok { - remote.rpcAddr = string(a) - return nil - } - return fmt.Errorf("WithRemoteAddr option not supported for this remote") -} - -// WithRuntimePath sets the path of the runtime to be used as the -// default by containerd -func WithRuntimePath(rt string) RemoteOption { - return runtimePath(rt) -} - -type runtimePath string - -func (rt runtimePath) Apply(r Remote) error { - if remote, ok := r.(*remote); ok { - remote.runtime = string(rt) - return nil - } - return fmt.Errorf("WithRuntime option not supported for this remote") -} - -// WithRuntimeArgs sets the list of runtime args passed to containerd -func WithRuntimeArgs(args []string) RemoteOption { - return runtimeArgs(args) -} - -type runtimeArgs []string - -func (rt runtimeArgs) Apply(r Remote) error { - if remote, ok := r.(*remote); ok { - remote.runtimeArgs = rt - return nil - } - return fmt.Errorf("WithRuntimeArgs option not supported for this remote") -} - -// WithStartDaemon defines if libcontainerd should also run containerd daemon. -func WithStartDaemon(start bool) RemoteOption { - return startDaemon(start) -} - -type startDaemon bool - -func (s startDaemon) Apply(r Remote) error { - if remote, ok := r.(*remote); ok { - remote.startDaemon = bool(s) - return nil - } - return fmt.Errorf("WithStartDaemon option not supported for this remote") -} - -// WithDebugLog defines if containerd debug logs will be enabled for daemon. -func WithDebugLog(debug bool) RemoteOption { - return debugLog(debug) -} - -type debugLog bool - -func (d debugLog) Apply(r Remote) error { - if remote, ok := r.(*remote); ok { - remote.debugLog = bool(d) - return nil - } - return fmt.Errorf("WithDebugLog option not supported for this remote") -} - -// WithLiveRestore defines if containers are stopped on shutdown or restored. -func WithLiveRestore(v bool) RemoteOption { - return liveRestore(v) -} - -type liveRestore bool - -func (l liveRestore) Apply(r Remote) error { - if remote, ok := r.(*remote); ok { - remote.liveRestore = bool(l) - for _, c := range remote.clients { - c.liveRestore = bool(l) - } - return nil - } - return fmt.Errorf("WithLiveRestore option not supported for this remote") -} - -// WithOOMScore defines the oom_score_adj to set for the containerd process. -func WithOOMScore(score int) RemoteOption { - return oomScore(score) -} - -type oomScore int - -func (o oomScore) Apply(r Remote) error { - if remote, ok := r.(*remote); ok { - remote.oomScore = int(o) - return nil - } - return fmt.Errorf("WithOOMScore option not supported for this remote") -} diff --git a/libcontainerd/remote_windows.go b/libcontainerd/remote_windows.go deleted file mode 100644 index 74c10447bb..0000000000 --- a/libcontainerd/remote_windows.go +++ /dev/null @@ -1,36 +0,0 @@ -package libcontainerd - -import "github.com/docker/docker/pkg/locker" - -type remote struct { -} - -func (r *remote) Client(b Backend) (Client, error) { - c := &client{ - clientCommon: clientCommon{ - backend: b, - containers: make(map[string]*container), - locker: locker.New(), - }, - } - return c, nil -} - -// Cleanup is a no-op on Windows. It is here to implement the interface. -func (r *remote) Cleanup() { -} - -func (r *remote) UpdateOptions(opts ...RemoteOption) error { - return nil -} - -// New creates a fresh instance of libcontainerd remote. On Windows, -// this is not used as there is no remote containerd process. -func New(_ string, _ ...RemoteOption) (Remote, error) { - return &remote{}, nil -} - -// WithLiveRestore is a noop on windows. -func WithLiveRestore(v bool) RemoteOption { - return nil -} diff --git a/libcontainerd/types.go b/libcontainerd/types.go index c7ade6b188..9e05c16bf8 100644 --- a/libcontainerd/types.go +++ b/libcontainerd/types.go @@ -1,64 +1,110 @@ package libcontainerd import ( + "context" "io" + "time" - containerd "github.com/containerd/containerd/api/grpc/types" + "github.com/containerd/containerd" "github.com/opencontainers/runtime-spec/specs-go" - "golang.org/x/net/context" ) -// State constants used in state change reporting. +// EventType represents a possible event from libcontainerd +type EventType string + +// Event constants used when reporting events const ( - StateStart = "start-container" - StatePause = "pause" - StateResume = "resume" - StateExit = "exit" - StateRestore = "restore" - StateExitProcess = "exit-process" - StateOOM = "oom" // fake state + EventUnknown EventType = "unknown" + EventExit EventType = "exit" + EventOOM EventType = "oom" + EventCreate EventType = "create" + EventStart EventType = "start" + EventExecAdded EventType = "exec-added" + EventExecStarted EventType = "exec-started" + EventPaused EventType = "paused" + EventResumed EventType = "resumed" ) -// CommonStateInfo contains the state info common to all platforms. -type CommonStateInfo struct { // FIXME: event? - State string - Pid uint32 - ExitCode uint32 - ProcessID string +// Status represents the current status of a container +type Status string + +// Possible container statuses +const ( + // Running indicates the process is currently executing + StatusRunning Status = "running" + // Created indicates the process has been created within containerd but the + // user's defined process has not started + StatusCreated Status = "created" + // Stopped indicates that the process has ran and exited + StatusStopped Status = "stopped" + // Paused indicates that the process is currently paused + StatusPaused Status = "paused" + // Pausing indicates that the process is currently switching from a + // running state into a paused state + StatusPausing Status = "pausing" + // Unknown indicates that we could not determine the status from the runtime + StatusUnknown Status = "unknown" +) + +// Remote on Linux defines the accesspoint to the containerd grpc API. +// Remote on Windows is largely an unimplemented interface as there is +// no remote containerd. +type Remote interface { + // Client returns a new Client instance connected with given Backend. + NewClient(namespace string, backend Backend) (Client, error) + // Cleanup stops containerd if it was started by libcontainerd. + // Note this is not used on Windows as there is no remote containerd. + Cleanup() +} + +// RemoteOption allows to configure parameters of remotes. +// This is unused on Windows. +type RemoteOption interface { + Apply(Remote) error +} + +// EventInfo contains the event info +type EventInfo struct { + ContainerID string + ProcessID string + Pid uint32 + ExitCode uint32 + ExitedAt time.Time + OOMKilled bool + // Windows Only field + UpdatePending bool } // Backend defines callbacks that the client of the library needs to implement. type Backend interface { - StateChanged(containerID string, state StateInfo) error + ProcessEvent(containerID string, event EventType, ei EventInfo) error } // Client provides access to containerd features. type Client interface { - GetServerVersion(ctx context.Context) (*ServerVersion, error) - Create(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) error - Signal(containerID string, sig int) error - SignalProcess(containerID string, processFriendlyName string, sig int) error - AddProcess(ctx context.Context, containerID, processFriendlyName string, process Process, attachStdio StdioCallback) (int, error) - Resize(containerID, processFriendlyName string, width, height int) error - Pause(containerID string) error - Resume(containerID string) error - Restore(containerID string, attachStdio StdioCallback, options ...CreateOption) error - Stats(containerID string) (*Stats, error) - GetPidsForContainer(containerID string) ([]int, error) - Summary(containerID string) ([]Summary, error) - UpdateResources(containerID string, resources Resources) error - CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error - DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error - ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) -} + Restore(ctx context.Context, containerID string, attachStdio StdioCallback) (alive bool, pid int, err error) -// CreateOption allows to configure parameters of container creation. -type CreateOption interface { - Apply(interface{}) error + Create(ctx context.Context, containerID string, spec *specs.Spec, runtimeOptions interface{}) error + Start(ctx context.Context, containerID, checkpointDir string, withStdin bool, attachStdio StdioCallback) (pid int, err error) + SignalProcess(ctx context.Context, containerID, processID string, signal int) error + Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio StdioCallback) (int, error) + ResizeTerminal(ctx context.Context, containerID, processID string, width, height int) error + CloseStdin(ctx context.Context, containerID, processID string) error + Pause(ctx context.Context, containerID string) error + Resume(ctx context.Context, containerID string) error + Stats(ctx context.Context, containerID string) (*Stats, error) + ListPids(ctx context.Context, containerID string) ([]uint32, error) + Summary(ctx context.Context, containerID string) ([]Summary, error) + DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error) + Delete(ctx context.Context, containerID string) error + Status(ctx context.Context, containerID string) (Status, error) + + UpdateResources(ctx context.Context, containerID string, resources *Resources) error + CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error } // StdioCallback is called to connect a container or process stdio. -type StdioCallback func(IOPipe) error +type StdioCallback func(*IOPipe) (containerd.IO, error) // IOPipe contains the stdio streams. type IOPipe struct { @@ -66,10 +112,12 @@ type IOPipe struct { Stdout io.ReadCloser Stderr io.ReadCloser Terminal bool // Whether stderr is connected on Windows + + cancel context.CancelFunc + config containerd.IOConfig } // ServerVersion contains version information as retrieved from the // server type ServerVersion struct { - containerd.GetServerVersionResponse } diff --git a/libcontainerd/types_linux.go b/libcontainerd/types_linux.go index f21a85eec9..b63efcb15c 100644 --- a/libcontainerd/types_linux.go +++ b/libcontainerd/types_linux.go @@ -1,49 +1,30 @@ package libcontainerd import ( - containerd "github.com/containerd/containerd/api/grpc/types" - "github.com/opencontainers/runtime-spec/specs-go" + "time" + + "github.com/containerd/cgroups" + specs "github.com/opencontainers/runtime-spec/specs-go" ) -// Process contains information to start a specific application inside the container. -type Process struct { - // Terminal creates an interactive terminal for the container. - Terminal bool `json:"terminal"` - // User specifies user information for the process. - User *specs.User `json:"user"` - // Args specifies the binary and arguments for the application to execute. - Args []string `json:"args"` - // Env populates the process environment for the process. - Env []string `json:"env,omitempty"` - // Cwd is the current working directory for the process and must be - // relative to the container's root. - Cwd *string `json:"cwd"` - // Capabilities are linux capabilities that are kept for the container. - Capabilities []string `json:"capabilities,omitempty"` - // Rlimits specifies rlimit options to apply to the process. - Rlimits []specs.POSIXRlimit `json:"rlimits,omitempty"` - // ApparmorProfile specifies the apparmor profile for the container. - ApparmorProfile *string `json:"apparmorProfile,omitempty"` - // SelinuxLabel specifies the selinux context that the container process is run as. - SelinuxLabel *string `json:"selinuxLabel,omitempty"` -} - -// StateInfo contains description about the new state container has entered. -type StateInfo struct { - CommonStateInfo - - // Platform specific StateInfo - OOMKilled bool -} - -// Stats contains a stats properties from containerd. -type Stats containerd.StatsResponse - -// Summary contains a container summary from containerd +// Summary is not used on linux type Summary struct{} -// Resources defines updatable container resource values. -type Resources containerd.UpdateResource +// Stats holds metrics properties as returned by containerd +type Stats struct { + Read time.Time + Metrics *cgroups.Metrics +} + +func interfaceToStats(read time.Time, v interface{}) *Stats { + return &Stats{ + Metrics: v.(*cgroups.Metrics), + Read: read, + } +} + +// Resources defines updatable container resource values. TODO: it must match containerd upcoming API +type Resources specs.LinuxResources // Checkpoints contains the details of a checkpoint -type Checkpoints containerd.ListCheckpointResponse +type Checkpoints struct{} diff --git a/libcontainerd/types_solaris.go b/libcontainerd/types_solaris.go deleted file mode 100644 index 2ab18eb0da..0000000000 --- a/libcontainerd/types_solaris.go +++ /dev/null @@ -1,43 +0,0 @@ -package libcontainerd - -import ( - containerd "github.com/containerd/containerd/api/grpc/types" - "github.com/opencontainers/runtime-spec/specs-go" -) - -// Process contains information to start a specific application inside the container. -type Process struct { - // Terminal creates an interactive terminal for the container. - Terminal bool `json:"terminal"` - // User specifies user information for the process. - User *specs.User `json:"user"` - // Args specifies the binary and arguments for the application to execute. - Args []string `json:"args"` - // Env populates the process environment for the process. - Env []string `json:"env,omitempty"` - // Cwd is the current working directory for the process and must be - // relative to the container's root. - Cwd *string `json:"cwd"` - // Capabilities are linux capabilities that are kept for the container. - Capabilities []string `json:"capabilities,omitempty"` -} - -// Stats contains a stats properties from containerd. -type Stats struct{} - -// Summary contains a container summary from containerd -type Summary struct{} - -// StateInfo contains description about the new state container has entered. -type StateInfo struct { - CommonStateInfo - - // Platform specific StateInfo - OOMKilled bool -} - -// Resources defines updatable container resource values. -type Resources struct{} - -// Checkpoints contains the details of a checkpoint -type Checkpoints containerd.ListCheckpointResponse diff --git a/libcontainerd/types_windows.go b/libcontainerd/types_windows.go index f271ecd479..aab8079f3b 100644 --- a/libcontainerd/types_windows.go +++ b/libcontainerd/types_windows.go @@ -1,27 +1,27 @@ package libcontainerd import ( + "time" + "github.com/Microsoft/hcsshim" opengcs "github.com/Microsoft/opengcs/client" - "github.com/opencontainers/runtime-spec/specs-go" ) -// Process contains information to start a specific application inside the container. -type Process specs.Process - // Summary contains a ProcessList item from HCS to support `top` type Summary hcsshim.ProcessListItem -// StateInfo contains description about the new state container has entered. -type StateInfo struct { - CommonStateInfo - - // Platform specific StateInfo - UpdatePending bool // Indicates that there are some update operations pending that should be completed by a servicing container. +// Stats contains statistics from HCS +type Stats struct { + Read time.Time + HCSStats *hcsshim.Statistics } -// Stats contains statistics from HCS -type Stats hcsshim.Statistics +func interfaceToStats(read time.Time, v interface{}) *Stats { + return &Stats{ + HCSStats: v.(*hcsshim.Statistics), + Read: read, + } +} // Resources defines updatable container resource values. type Resources struct{} diff --git a/libcontainerd/utils_linux.go b/libcontainerd/utils_linux.go index 5372b886cf..0f0adf322d 100644 --- a/libcontainerd/utils_linux.go +++ b/libcontainerd/utils_linux.go @@ -1,63 +1,12 @@ package libcontainerd -import ( - "syscall" +import "syscall" - containerd "github.com/containerd/containerd/api/grpc/types" - "github.com/opencontainers/runtime-spec/specs-go" - "golang.org/x/sys/unix" -) - -func getRootIDs(s specs.Spec) (int, int, error) { - var hasUserns bool - for _, ns := range s.Linux.Namespaces { - if ns.Type == specs.UserNamespace { - hasUserns = true - break - } - } - if !hasUserns { - return 0, 0, nil - } - uid := hostIDFromMap(0, s.Linux.UIDMappings) - gid := hostIDFromMap(0, s.Linux.GIDMappings) - return uid, gid, nil -} - -func hostIDFromMap(id uint32, mp []specs.LinuxIDMapping) int { - for _, m := range mp { - if id >= m.ContainerID && id <= m.ContainerID+m.Size-1 { - return int(m.HostID + id - m.ContainerID) - } - } - return 0 -} - -func systemPid(ctr *containerd.Container) uint32 { - var pid uint32 - for _, p := range ctr.Processes { - if p.Pid == InitFriendlyName { - pid = p.SystemPid - } - } - return pid -} - -func convertRlimits(sr []specs.POSIXRlimit) (cr []*containerd.Rlimit) { - for _, r := range sr { - cr = append(cr, &containerd.Rlimit{ - Type: r.Type, - Hard: r.Hard, - Soft: r.Soft, - }) - } - return -} - -// setPDeathSig sets the parent death signal to SIGKILL -func setSysProcAttr(sid bool) *syscall.SysProcAttr { +// containerdSysProcAttr returns the SysProcAttr to use when exec'ing +// containerd +func containerdSysProcAttr() *syscall.SysProcAttr { return &syscall.SysProcAttr{ - Setsid: sid, - Pdeathsig: unix.SIGKILL, + Setsid: true, + Pdeathsig: syscall.SIGKILL, } } diff --git a/libcontainerd/utils_solaris.go b/libcontainerd/utils_solaris.go deleted file mode 100644 index 10ae599808..0000000000 --- a/libcontainerd/utils_solaris.go +++ /dev/null @@ -1,27 +0,0 @@ -package libcontainerd - -import ( - "syscall" - - containerd "github.com/containerd/containerd/api/grpc/types" - "github.com/opencontainers/runtime-spec/specs-go" -) - -func getRootIDs(s specs.Spec) (int, int, error) { - return 0, 0, nil -} - -func systemPid(ctr *containerd.Container) uint32 { - var pid uint32 - for _, p := range ctr.Processes { - if p.Pid == InitFriendlyName { - pid = p.SystemPid - } - } - return pid -} - -// setPDeathSig sets the parent death signal to SIGKILL -func setSysProcAttr(sid bool) *syscall.SysProcAttr { - return nil -} diff --git a/libcontainerd/utils_windows.go b/libcontainerd/utils_windows.go index bca9fa2086..1347ff2b95 100644 --- a/libcontainerd/utils_windows.go +++ b/libcontainerd/utils_windows.go @@ -3,6 +3,8 @@ package libcontainerd import ( "strings" + "syscall" + opengcs "github.com/Microsoft/opengcs/client" ) @@ -36,3 +38,9 @@ func (c *container) debugGCS() { } cfg.DebugGCS() } + +// containerdSysProcAttr returns the SysProcAttr to use when exec'ing +// containerd +func containerdSysProcAttr() *syscall.SysProcAttr { + return nil +} diff --git a/oci/defaults.go b/oci/defaults.go index d706fafcc0..bf74ef4eeb 100644 --- a/oci/defaults.go +++ b/oci/defaults.go @@ -69,8 +69,14 @@ func DefaultSolarisSpec() specs.Spec { func DefaultLinuxSpec() specs.Spec { s := specs.Spec{ Version: specs.Version, - Process: &specs.Process{}, - Root: &specs.Root{}, + Process: &specs.Process{ + Capabilities: &specs.LinuxCapabilities{ + Bounding: defaultCapabilities(), + Permitted: defaultCapabilities(), + Inheritable: defaultCapabilities(), + Effective: defaultCapabilities(), + }, + }, } s.Mounts = []specs.Mount{ { @@ -116,14 +122,6 @@ func DefaultLinuxSpec() specs.Spec { Options: []string{"nosuid", "noexec", "nodev", "mode=1777"}, }, } - s.Process = &specs.Process{ - Capabilities: &specs.LinuxCapabilities{ - Bounding: defaultCapabilities(), - Permitted: defaultCapabilities(), - Inheritable: defaultCapabilities(), - Effective: defaultCapabilities(), - }, - } s.Linux = &specs.Linux{ MaskedPaths: []string{ diff --git a/pkg/authorization/plugin.go b/pkg/authorization/plugin.go index 939f926744..2797a72d58 100644 --- a/pkg/authorization/plugin.go +++ b/pkg/authorization/plugin.go @@ -48,9 +48,10 @@ func GetPluginGetter() plugingetter.PluginGetter { // authorizationPlugin is an internal adapter to docker plugin system type authorizationPlugin struct { - plugin *plugins.Client - name string - once sync.Once + initErr error + plugin *plugins.Client + name string + once sync.Once } func newAuthorizationPlugin(name string) Plugin { @@ -95,7 +96,6 @@ func (a *authorizationPlugin) AuthZResponse(authReq *Request) (*Response, error) // initPlugin initializes the authorization plugin if needed func (a *authorizationPlugin) initPlugin() error { // Lazy loading of plugins - var err error a.once.Do(func() { if a.plugin == nil { var plugin plugingetter.CompatPlugin @@ -108,11 +108,11 @@ func (a *authorizationPlugin) initPlugin() error { plugin, e = plugins.Get(a.name, AuthZApiImplements) } if e != nil { - err = e + a.initErr = e return } a.plugin = plugin.Client() } }) - return err + return a.initErr } diff --git a/pkg/mount/mount.go b/pkg/mount/mount.go index c9fdfd6942..eced0219fd 100644 --- a/pkg/mount/mount.go +++ b/pkg/mount/mount.go @@ -3,6 +3,8 @@ package mount import ( "sort" "strings" + + "github.com/sirupsen/logrus" ) // GetMounts retrieves a list of mounts for the current running process. @@ -74,12 +76,18 @@ func RecursiveUnmount(target string) error { if !strings.HasPrefix(m.Mountpoint, target) { continue } - if err := Unmount(m.Mountpoint); err != nil && i == len(mounts)-1 { + logrus.Debugf("Trying to unmount %s", m.Mountpoint) + err = Unmount(m.Mountpoint) + if err != nil && i == len(mounts)-1 { if mounted, err := Mounted(m.Mountpoint); err != nil || mounted { return err } // Ignore errors for submounts and continue trying to unmount others // The final unmount should fail if there ane any submounts remaining + } else if err != nil { + logrus.Errorf("Failed to unmount %s: %v", m.Mountpoint, err) + } else if err == nil { + logrus.Debugf("Unmounted %s", m.Mountpoint) } } return nil diff --git a/pkg/system/process_windows.go b/pkg/system/process_windows.go new file mode 100644 index 0000000000..5973c46de9 --- /dev/null +++ b/pkg/system/process_windows.go @@ -0,0 +1,18 @@ +package system + +import "os" + +// IsProcessAlive returns true if process with a given pid is running. +func IsProcessAlive(pid int) bool { + _, err := os.FindProcess(pid) + + return err == nil +} + +// KillProcess force-stops a process. +func KillProcess(pid int) { + p, err := os.FindProcess(pid) + if err == nil { + p.Kill() + } +} diff --git a/pkg/system/rm.go b/pkg/system/rm.go index 101b569a56..c453adcdb9 100644 --- a/pkg/system/rm.go +++ b/pkg/system/rm.go @@ -26,7 +26,7 @@ func EnsureRemoveAll(dir string) error { // track retries exitOnErr := make(map[string]int) - maxRetry := 5 + maxRetry := 50 // Attempt to unmount anything beneath this dir first mount.RecursiveUnmount(dir) diff --git a/plugin/executor/containerd/containerd.go b/plugin/executor/containerd/containerd.go index 74cf530cf1..d93b8b75ec 100644 --- a/plugin/executor/containerd/containerd.go +++ b/plugin/executor/containerd/containerd.go @@ -1,22 +1,35 @@ package containerd import ( + "context" "io" + "path/filepath" + "sync" + "github.com/containerd/containerd" + "github.com/containerd/containerd/linux/runcopts" + "github.com/docker/docker/api/errdefs" "github.com/docker/docker/libcontainerd" "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" + "github.com/sirupsen/logrus" ) +// PluginNamespace is the name used for the plugins namespace +var PluginNamespace = "moby-plugins" + // ExitHandler represents an object that is called when the exit event is received from containerd type ExitHandler interface { HandleExitEvent(id string) error } // New creates a new containerd plugin executor -func New(remote libcontainerd.Remote, exitHandler ExitHandler) (*Executor, error) { - e := &Executor{exitHandler: exitHandler} - client, err := remote.Client(e) +func New(rootDir string, remote libcontainerd.Remote, exitHandler ExitHandler) (*Executor, error) { + e := &Executor{ + rootDir: rootDir, + exitHandler: exitHandler, + } + client, err := remote.NewClient(PluginNamespace, e) if err != nil { return nil, errors.Wrap(err, "error creating containerd exec client") } @@ -26,52 +39,108 @@ func New(remote libcontainerd.Remote, exitHandler ExitHandler) (*Executor, error // Executor is the containerd client implementation of a plugin executor type Executor struct { + rootDir string client libcontainerd.Client exitHandler ExitHandler } // Create creates a new container func (e *Executor) Create(id string, spec specs.Spec, stdout, stderr io.WriteCloser) error { - return e.client.Create(id, "", "", spec, attachStreamsFunc(stdout, stderr)) + opts := runcopts.RuncOptions{ + RuntimeRoot: filepath.Join(e.rootDir, "runtime-root"), + } + ctx := context.Background() + err := e.client.Create(ctx, id, &spec, &opts) + if err != nil { + return err + } + + _, err = e.client.Start(ctx, id, "", false, attachStreamsFunc(stdout, stderr)) + return err } // Restore restores a container func (e *Executor) Restore(id string, stdout, stderr io.WriteCloser) error { - return e.client.Restore(id, attachStreamsFunc(stdout, stderr)) -} + alive, _, err := e.client.Restore(context.Background(), id, attachStreamsFunc(stdout, stderr)) + if err != nil && !errdefs.IsNotFound(err) { + return err + } + if !alive { + _, _, err = e.client.DeleteTask(context.Background(), id) + if err != nil && !errdefs.IsNotFound(err) { + logrus.WithError(err).Errorf("failed to delete container plugin %s task from containerd", id) + return err + } -// IsRunning returns if the container with the given id is running -func (e *Executor) IsRunning(id string) (bool, error) { - pids, err := e.client.GetPidsForContainer(id) - return len(pids) > 0, err -} - -// Signal sends the specified signal to the container -func (e *Executor) Signal(id string, signal int) error { - return e.client.Signal(id, signal) -} - -// StateChanged handles state changes from containerd -// All events are ignored except the exit event, which is sent of to the stored handler -func (e *Executor) StateChanged(id string, event libcontainerd.StateInfo) error { - switch event.State { - case libcontainerd.StateExit: - return e.exitHandler.HandleExitEvent(id) + err = e.client.Delete(context.Background(), id) + if err != nil && !errdefs.IsNotFound(err) { + logrus.WithError(err).Errorf("failed to delete container plugin %s from containerd", id) + return err + } } return nil } -func attachStreamsFunc(stdout, stderr io.WriteCloser) func(libcontainerd.IOPipe) error { - return func(iop libcontainerd.IOPipe) error { - iop.Stdin.Close() +// IsRunning returns if the container with the given id is running +func (e *Executor) IsRunning(id string) (bool, error) { + status, err := e.client.Status(context.Background(), id) + return status == libcontainerd.StatusRunning, err +} + +// Signal sends the specified signal to the container +func (e *Executor) Signal(id string, signal int) error { + return e.client.SignalProcess(context.Background(), id, libcontainerd.InitProcessName, signal) +} + +// ProcessEvent handles events from containerd +// All events are ignored except the exit event, which is sent of to the stored handler +func (e *Executor) ProcessEvent(id string, et libcontainerd.EventType, ei libcontainerd.EventInfo) error { + switch et { + case libcontainerd.EventExit: + // delete task and container + if _, _, err := e.client.DeleteTask(context.Background(), id); err != nil { + logrus.WithError(err).Errorf("failed to delete container plugin %s task from containerd", id) + } + + if err := e.client.Delete(context.Background(), id); err != nil { + logrus.WithError(err).Errorf("failed to delete container plugin %s from containerd", id) + } + return e.exitHandler.HandleExitEvent(ei.ContainerID) + } + return nil +} + +type cio struct { + containerd.IO + + wg sync.WaitGroup +} + +func (c *cio) Wait() { + c.wg.Wait() + c.IO.Wait() +} + +func attachStreamsFunc(stdout, stderr io.WriteCloser) libcontainerd.StdioCallback { + return func(iop *libcontainerd.IOPipe) (containerd.IO, error) { + if iop.Stdin != nil { + iop.Stdin.Close() + // closing stdin shouldn't be needed here, it should never be open + panic("plugin stdin shouldn't have been created!") + } + + cio := &cio{IO: iop} + cio.wg.Add(2) go func() { io.Copy(stdout, iop.Stdout) stdout.Close() + cio.wg.Done() }() go func() { io.Copy(stderr, iop.Stderr) stderr.Close() + cio.wg.Done() }() - return nil + return cio, nil } } diff --git a/plugin/manager_linux.go b/plugin/manager_linux.go index beefc3dfba..eff21e1d05 100644 --- a/plugin/manager_linux.go +++ b/plugin/manager_linux.go @@ -23,7 +23,7 @@ import ( "golang.org/x/sys/unix" ) -func (pm *Manager) enable(p *v2.Plugin, c *controller, force bool) error { +func (pm *Manager) enable(p *v2.Plugin, c *controller, force bool) (err error) { p.Rootfs = filepath.Join(pm.config.Root, p.PluginObj.ID, "rootfs") if p.IsEnabled() && !force { return errors.Wrap(enabledError(p.Name()), "plugin already enabled") @@ -44,15 +44,15 @@ func (pm *Manager) enable(p *v2.Plugin, c *controller, force bool) error { if p.PropagatedMount != "" { propRoot = filepath.Join(filepath.Dir(p.Rootfs), "propagated-mount") - if err := os.MkdirAll(propRoot, 0755); err != nil { + if err = os.MkdirAll(propRoot, 0755); err != nil { logrus.Errorf("failed to create PropagatedMount directory at %s: %v", propRoot, err) } - if err := mount.MakeRShared(propRoot); err != nil { + if err = mount.MakeRShared(propRoot); err != nil { return errors.Wrap(err, "error setting up propagated mount dir") } - if err := mount.Mount(propRoot, p.PropagatedMount, "none", "rbind"); err != nil { + if err = mount.Mount(propRoot, p.PropagatedMount, "none", "rbind"); err != nil { return errors.Wrap(err, "error creating mount for propagated mount") } } @@ -72,7 +72,6 @@ func (pm *Manager) enable(p *v2.Plugin, c *controller, force bool) error { logrus.Warnf("Could not unmount %s: %v", propRoot, err) } } - return errors.WithStack(err) } return pm.pluginPostStart(p, c) @@ -159,6 +158,12 @@ func shutdownPlugin(p *v2.Plugin, c *controller, executor Executor) { if err := executor.Signal(pluginID, int(unix.SIGKILL)); err != nil { logrus.Errorf("Sending SIGKILL to plugin failed with error: %v", err) } + select { + case <-c.exitChan: + logrus.Debug("SIGKILL plugin shutdown") + case <-time.After(time.Second * 10): + logrus.Debug("Force shutdown plugin FAILED") + } } } }