Переглянути джерело

Update libcontainerd to use containerd 1.0

Signed-off-by: Kenfe-Mickael Laventure <mickael.laventure@gmail.com>
Kenfe-Mickael Laventure 7 роки тому
батько
коміт
ddae20c032
100 змінених файлів з 4373 додано та 3745 видалено
  1. 1 1
      api/server/router/container/exec.go
  2. 1 1
      builder/dockerfile/containerbackend.go
  3. 2 0
      cmd/dockerd/config.go
  4. 0 2
      cmd/dockerd/config_unix.go
  5. 1 0
      cmd/dockerd/config_windows.go
  6. 16 1
      cmd/dockerd/daemon.go
  7. 1 1
      cmd/dockerd/daemon_linux.go
  8. 2 14
      cmd/dockerd/daemon_solaris.go
  9. 33 25
      cmd/dockerd/daemon_unix.go
  10. 4 11
      cmd/dockerd/daemon_windows.go
  11. 34 3
      container/container.go
  12. 0 9
      container/container_unix.go
  13. 0 6
      container/container_windows.go
  14. 12 5
      container/state.go
  15. 0 10
      container/state_unix.go
  16. 0 7
      container/state_windows.go
  17. 3 3
      container/stream/streams.go
  18. 13 4
      daemon/checkpoint.go
  19. 5 0
      daemon/config/config.go
  20. 0 2
      daemon/config/config_common_unix.go
  21. 92 33
      daemon/daemon.go
  22. 162 40
      daemon/daemon_unix.go
  23. 48 40
      daemon/daemon_windows.go
  24. 1 0
      daemon/delete.go
  25. 5 0
      daemon/errors.go
  26. 29 11
      daemon/exec.go
  27. 53 13
      daemon/exec/exec.go
  28. 10 4
      daemon/exec_linux.go
  29. 2 2
      daemon/exec_solaris.go
  30. 2 2
      daemon/exec_windows.go
  31. 19 10
      daemon/info_unix.go
  32. 3 2
      daemon/kill.go
  33. 1 1
      daemon/logger/plugin_unix.go
  34. 114 87
      daemon/monitor.go
  35. 1 9
      daemon/monitor_linux.go
  36. 1 8
      daemon/monitor_solaris.go
  37. 31 19
      daemon/monitor_windows.go
  38. 5 4
      daemon/oci_linux.go
  39. 13 2
      daemon/pause.go
  40. 0 4
      daemon/reload.go
  41. 3 2
      daemon/resize.go
  42. 48 9
      daemon/start.go
  43. 35 10
      daemon/start_unix.go
  44. 4 9
      daemon/start_windows.go
  45. 8 7
      daemon/top_unix.go
  46. 5 5
      daemon/top_unix_test.go
  47. 11 1
      daemon/top_windows.go
  48. 12 1
      daemon/unpause.go
  49. 2 1
      daemon/update.go
  50. 32 15
      daemon/update_linux.go
  51. 3 3
      daemon/update_windows.go
  52. 3 3
      hack/make/.go-autogen
  53. 3 1
      integration-cli/daemon/daemon.go
  54. 1 1
      integration-cli/docker_api_stats_test.go
  55. 4 1
      integration-cli/docker_cli_attach_test.go
  56. 5 0
      integration-cli/docker_cli_build_unix_test.go
  57. 8 6
      integration-cli/docker_cli_daemon_test.go
  58. 11 5
      integration-cli/docker_cli_events_test.go
  59. 1 0
      integration-cli/docker_cli_logs_test.go
  60. 1 0
      integration-cli/docker_cli_network_unix_test.go
  61. 1 0
      integration-cli/docker_cli_run_test.go
  62. 2 0
      integration-cli/docker_deprecated_api_v124_test.go
  63. 1 1
      integration-cli/events_utils_test.go
  64. 10 2
      integration/service/create_test.go
  65. 0 46
      libcontainerd/client.go
  66. 802 0
      libcontainerd/client_daemon.go
  67. 96 0
      libcontainerd/client_daemon_linux.go
  68. 53 0
      libcontainerd/client_daemon_windows.go
  69. 0 616
      libcontainerd/client_linux.go
  70. 1340 0
      libcontainerd/client_local_windows.go
  71. 0 104
      libcontainerd/client_solaris.go
  72. 0 141
      libcontainerd/client_unix.go
  73. 0 886
      libcontainerd/client_windows.go
  74. 0 13
      libcontainerd/container.go
  75. 0 246
      libcontainerd/container_unix.go
  76. 0 338
      libcontainerd/container_windows.go
  77. 46 0
      libcontainerd/errors.go
  78. 36 0
      libcontainerd/io.go
  79. 60 0
      libcontainerd/io_unix.go
  80. 138 0
      libcontainerd/io_windows.go
  81. 0 31
      libcontainerd/oom_linux.go
  82. 0 5
      libcontainerd/oom_solaris.go
  83. 0 42
      libcontainerd/pausemonitor_unix.go
  84. 0 18
      libcontainerd/process.go
  85. 0 107
      libcontainerd/process_unix.go
  86. 5 9
      libcontainerd/process_windows.go
  87. 0 2
      libcontainerd/queue.go
  88. 0 2
      libcontainerd/queue_test.go
  89. 0 20
      libcontainerd/remote.go
  90. 317 0
      libcontainerd/remote_daemon.go
  91. 141 0
      libcontainerd/remote_daemon_options.go
  92. 36 0
      libcontainerd/remote_daemon_options_unix.go
  93. 56 0
      libcontainerd/remote_daemon_process.go
  94. 61 0
      libcontainerd/remote_daemon_process_unix.go
  95. 56 0
      libcontainerd/remote_daemon_unix.go
  96. 50 0
      libcontainerd/remote_daemon_windows.go
  97. 59 0
      libcontainerd/remote_local.go
  98. 0 565
      libcontainerd/remote_unix.go
  99. 0 36
      libcontainerd/remote_windows.go
  100. 87 39
      libcontainerd/types.go

+ 1 - 1
api/server/router/container/exec.go

@@ -126,7 +126,7 @@ func (s *containerRouter) postContainerExecStart(ctx context.Context, w http.Res
 			return err
 		}
 		stdout.Write([]byte(err.Error() + "\r\n"))
-		logrus.Errorf("Error running exec in container: %v", err)
+		logrus.Errorf("Error running exec %s in container: %v", execName, err)
 	}
 	return nil
 }

+ 1 - 1
builder/dockerfile/containerbackend.go

@@ -102,7 +102,7 @@ func (c *containerManager) Run(ctx context.Context, cID string, stdout, stderr i
 
 func logCancellationError(cancelErrCh chan error, msg string) {
 	if cancelErr := <-cancelErrCh; cancelErr != nil {
-		logrus.Debugf("Build cancelled (%v): ", cancelErr, msg)
+		logrus.Debugf("Build cancelled (%v): %s", cancelErr, msg)
 	}
 }
 

+ 2 - 0
cmd/dockerd/config.go

@@ -27,6 +27,8 @@ func installCommonConfigFlags(conf *config.Config, flags *pflag.FlagSet) {
 	flags.Var(opts.NewNamedListOptsRef("exec-opts", &conf.ExecOptions, nil), "exec-opt", "Runtime execution options")
 	flags.StringVarP(&conf.Pidfile, "pidfile", "p", defaultPidFile, "Path to use for daemon PID file")
 	flags.StringVarP(&conf.Root, "graph", "g", defaultDataRoot, "Root of the Docker runtime")
+	flags.StringVar(&conf.ExecRoot, "exec-root", defaultExecRoot, "Root directory for execution state files")
+	flags.StringVar(&conf.ContainerdAddr, "containerd", "", "containerd grpc address")
 
 	// "--graph" is "soft-deprecated" in favor of "data-root". This flag was added
 	// before Docker 1.0, so won't be removed, only hidden, to discourage its usage.

+ 0 - 2
cmd/dockerd/config_unix.go

@@ -29,13 +29,11 @@ func installConfigFlags(conf *config.Config, flags *pflag.FlagSet) {
 	flags.BoolVar(&conf.BridgeConfig.EnableIPForward, "ip-forward", true, "Enable net.ipv4.ip_forward")
 	flags.BoolVar(&conf.BridgeConfig.EnableIPMasq, "ip-masq", true, "Enable IP masquerading")
 	flags.BoolVar(&conf.BridgeConfig.EnableIPv6, "ipv6", false, "Enable IPv6 networking")
-	flags.StringVar(&conf.ExecRoot, "exec-root", defaultExecRoot, "Root directory for execution state files")
 	flags.StringVar(&conf.BridgeConfig.FixedCIDRv6, "fixed-cidr-v6", "", "IPv6 subnet for fixed IPs")
 	flags.BoolVar(&conf.BridgeConfig.EnableUserlandProxy, "userland-proxy", true, "Use userland proxy for loopback traffic")
 	flags.StringVar(&conf.BridgeConfig.UserlandProxyPath, "userland-proxy-path", "", "Path to the userland proxy binary")
 	flags.StringVar(&conf.CgroupParent, "cgroup-parent", "", "Set parent cgroup for all containers")
 	flags.StringVar(&conf.RemappedRoot, "userns-remap", "", "User/Group setting for user namespaces")
-	flags.StringVar(&conf.ContainerdAddr, "containerd", "", "Path to containerd socket")
 	flags.BoolVar(&conf.LiveRestoreEnabled, "live-restore", false, "Enable live restore of docker when containers are still running")
 	flags.IntVar(&conf.OOMScoreAdjust, "oom-score-adjust", -500, "Set the oom_score_adj for the daemon")
 	flags.BoolVar(&conf.Init, "init", false, "Run an init in the container to forward signals and reap processes")

+ 1 - 0
cmd/dockerd/config_windows.go

@@ -11,6 +11,7 @@ import (
 var (
 	defaultPidFile  string
 	defaultDataRoot = filepath.Join(os.Getenv("programdata"), "docker")
+	defaultExecRoot = filepath.Join(os.Getenv("programdata"), "docker", "exec-root")
 )
 
 // installConfigFlags adds flags to the pflag.FlagSet to configure the daemon

+ 16 - 1
cmd/dockerd/daemon.go

@@ -204,7 +204,11 @@ func (cli *DaemonCli) start(opts *daemonOptions) (err error) {
 		return err
 	}
 
-	containerdRemote, err := libcontainerd.New(cli.getLibcontainerdRoot(), cli.getPlatformRemoteOptions()...)
+	rOpts, err := cli.getRemoteOptions()
+	if err != nil {
+		return fmt.Errorf("Failed to generate containerd options: %s", err)
+	}
+	containerdRemote, err := libcontainerd.New(filepath.Join(cli.Config.Root, "containerd"), filepath.Join(cli.Config.ExecRoot, "containerd"), rOpts...)
 	if err != nil {
 		return err
 	}
@@ -560,6 +564,17 @@ func (cli *DaemonCli) initMiddlewares(s *apiserver.Server, cfg *apiserver.Config
 	return nil
 }
 
+func (cli *DaemonCli) getRemoteOptions() ([]libcontainerd.RemoteOption, error) {
+	opts := []libcontainerd.RemoteOption{}
+
+	pOpts, err := cli.getPlatformRemoteOptions()
+	if err != nil {
+		return nil, err
+	}
+	opts = append(opts, pOpts...)
+	return opts, nil
+}
+
 // validates that the plugins requested with the --authorization-plugin flag are valid AuthzDriver
 // plugins present on the host and available to the daemon
 func validateAuthzPlugins(requestedPlugins []string, pg plugingetter.PluginGetter) error {

+ 1 - 1
cmd/dockerd/daemon_linux.go

@@ -11,5 +11,5 @@ func preNotifySystem() {
 // notifySystem sends a message to the host when the server is ready to be used
 func notifySystem() {
 	// Tell the init daemon we are accepting requests
-	go systemdDaemon.SdNotify("READY=1")
+	go systemdDaemon.SdNotify(false, "READY=1")
 }

+ 2 - 14
cmd/dockerd/daemon_solaris.go

@@ -41,20 +41,8 @@ func preNotifySystem() {
 func notifySystem() {
 }
 
-func (cli *DaemonCli) getPlatformRemoteOptions() []libcontainerd.RemoteOption {
-	opts := []libcontainerd.RemoteOption{}
-	if cli.Config.ContainerdAddr != "" {
-		opts = append(opts, libcontainerd.WithRemoteAddr(cli.Config.ContainerdAddr))
-	} else {
-		opts = append(opts, libcontainerd.WithStartDaemon(true))
-	}
-	return opts
-}
-
-// getLibcontainerdRoot gets the root directory for libcontainerd/containerd to
-// store their state.
-func (cli *DaemonCli) getLibcontainerdRoot() string {
-	return filepath.Join(cli.Config.ExecRoot, "libcontainerd")
+func (cli *DaemonCli) getPlatformRemoteOptions() ([]libcontainerd.RemoteOption, error) {
+	return nil, nil
 }
 
 // getSwarmRunRoot gets the root directory for swarm to store runtime state

+ 33 - 25
cmd/dockerd/daemon_unix.go

@@ -10,9 +10,11 @@ import (
 	"path/filepath"
 	"strconv"
 
+	"github.com/containerd/containerd/linux"
 	"github.com/docker/docker/cmd/dockerd/hack"
 	"github.com/docker/docker/daemon"
 	"github.com/docker/docker/libcontainerd"
+	"github.com/docker/docker/pkg/parsers/kernel"
 	"github.com/docker/libnetwork/portallocator"
 	"golang.org/x/sys/unix"
 )
@@ -35,42 +37,48 @@ func getDaemonConfDir(_ string) string {
 	return "/etc/docker"
 }
 
-// setupConfigReloadTrap configures the USR2 signal to reload the configuration.
-func (cli *DaemonCli) setupConfigReloadTrap() {
-	c := make(chan os.Signal, 1)
-	signal.Notify(c, unix.SIGHUP)
-	go func() {
-		for range c {
-			cli.reloadConfig()
-		}
-	}()
-}
+func (cli *DaemonCli) getPlatformRemoteOptions() ([]libcontainerd.RemoteOption, error) {
+	// On older kernel, letting putting the containerd-shim in its own
+	// namespace will effectively prevent operations such as unlink, rename
+	// and remove on mountpoints that were present at the time the shim
+	// namespace was created. This would led to a famous EBUSY will trying to
+	// remove shm mounts.
+	var noNewNS bool
+	if !kernel.CheckKernelVersion(3, 18, 0) {
+		noNewNS = true
+	}
 
-func (cli *DaemonCli) getPlatformRemoteOptions() []libcontainerd.RemoteOption {
 	opts := []libcontainerd.RemoteOption{
-		libcontainerd.WithDebugLog(cli.Config.Debug),
 		libcontainerd.WithOOMScore(cli.Config.OOMScoreAdjust),
+		libcontainerd.WithPlugin("linux", &linux.Config{
+			Shim:          daemon.DefaultShimBinary,
+			Runtime:       daemon.DefaultRuntimeBinary,
+			RuntimeRoot:   filepath.Join(cli.Config.Root, "runc"),
+			ShimDebug:     cli.Config.Debug,
+			ShimNoMountNS: noNewNS,
+		}),
+	}
+	if cli.Config.Debug {
+		opts = append(opts, libcontainerd.WithLogLevel("debug"))
 	}
 	if cli.Config.ContainerdAddr != "" {
 		opts = append(opts, libcontainerd.WithRemoteAddr(cli.Config.ContainerdAddr))
 	} else {
 		opts = append(opts, libcontainerd.WithStartDaemon(true))
 	}
-	if daemon.UsingSystemd(cli.Config) {
-		args := []string{"--systemd-cgroup=true"}
-		opts = append(opts, libcontainerd.WithRuntimeArgs(args))
-	}
-	if cli.Config.LiveRestoreEnabled {
-		opts = append(opts, libcontainerd.WithLiveRestore(true))
-	}
-	opts = append(opts, libcontainerd.WithRuntimePath(daemon.DefaultRuntimeBinary))
-	return opts
+
+	return opts, nil
 }
 
-// getLibcontainerdRoot gets the root directory for libcontainerd/containerd to
-// store their state.
-func (cli *DaemonCli) getLibcontainerdRoot() string {
-	return filepath.Join(cli.Config.ExecRoot, "libcontainerd")
+// setupConfigReloadTrap configures the USR2 signal to reload the configuration.
+func (cli *DaemonCli) setupConfigReloadTrap() {
+	c := make(chan os.Signal, 1)
+	signal.Notify(c, unix.SIGHUP)
+	go func() {
+		for range c {
+			cli.reloadConfig()
+		}
+	}()
 }
 
 // getSwarmRunRoot gets the root directory for swarm to store runtime state

+ 4 - 11
cmd/dockerd/daemon_windows.go

@@ -48,6 +48,10 @@ func notifyShutdown(err error) {
 	}
 }
 
+func (cli *DaemonCli) getPlatformRemoteOptions() ([]libcontainerd.RemoteOption, error) {
+	return nil, nil
+}
+
 // setupConfigReloadTrap configures a Win32 event to reload the configuration.
 func (cli *DaemonCli) setupConfigReloadTrap() {
 	go func() {
@@ -65,17 +69,6 @@ func (cli *DaemonCli) setupConfigReloadTrap() {
 	}()
 }
 
-func (cli *DaemonCli) getPlatformRemoteOptions() []libcontainerd.RemoteOption {
-	return nil
-}
-
-// getLibcontainerdRoot gets the root directory for libcontainerd to store its
-// state. The Windows libcontainerd implementation does not need to write a spec
-// or state to disk, so this is a no-op.
-func (cli *DaemonCli) getLibcontainerdRoot() string {
-	return ""
-}
-
 // getSwarmRunRoot gets the root directory for swarm to store runtime state
 // For example, the control socket
 func (cli *DaemonCli) getSwarmRunRoot() string {

+ 34 - 3
container/container.go

@@ -15,6 +15,7 @@ import (
 	"syscall"
 	"time"
 
+	"github.com/containerd/containerd"
 	containertypes "github.com/docker/docker/api/types/container"
 	mounttypes "github.com/docker/docker/api/types/mount"
 	networktypes "github.com/docker/docker/api/types/network"
@@ -61,6 +62,18 @@ var (
 	errInvalidNetwork  = errors.New("invalid network settings while building port map info")
 )
 
+// ExitStatus provides exit reasons for a container.
+type ExitStatus struct {
+	// The exit code with which the container exited.
+	ExitCode int
+
+	// Whether the container encountered an OOM.
+	OOMKilled bool
+
+	// Time at which the container died
+	ExitedAt time.Time
+}
+
 // Container holds the structure defining a container object.
 type Container struct {
 	StreamConfig *stream.Config
@@ -996,10 +1009,10 @@ func (container *Container) CloseStreams() error {
 }
 
 // InitializeStdio is called by libcontainerd to connect the stdio.
-func (container *Container) InitializeStdio(iop libcontainerd.IOPipe) error {
+func (container *Container) InitializeStdio(iop *libcontainerd.IOPipe) (containerd.IO, error) {
 	if err := container.startLogging(); err != nil {
 		container.Reset(false)
-		return err
+		return nil, err
 	}
 
 	container.StreamConfig.CopyToPipe(iop)
@@ -1012,7 +1025,7 @@ func (container *Container) InitializeStdio(iop libcontainerd.IOPipe) error {
 		}
 	}
 
-	return nil
+	return &cio{IO: iop, sc: container.StreamConfig}, nil
 }
 
 // SecretMountPath returns the path of the secret mount for the container
@@ -1069,3 +1082,21 @@ func (container *Container) CreateDaemonEnvironment(tty bool, linkedEnv []string
 	env = ReplaceOrAppendEnvValues(env, container.Config.Env)
 	return env
 }
+
+type cio struct {
+	containerd.IO
+
+	sc *stream.Config
+}
+
+func (i *cio) Close() error {
+	i.IO.Close()
+
+	return i.sc.CloseStreams()
+}
+
+func (i *cio) Wait() {
+	i.sc.Wait()
+
+	i.IO.Wait()
+}

+ 0 - 9
container/container_unix.go

@@ -24,15 +24,6 @@ const (
 	containerSecretMountPath = "/run/secrets"
 )
 
-// ExitStatus provides exit reasons for a container.
-type ExitStatus struct {
-	// The exit code with which the container exited.
-	ExitCode int
-
-	// Whether the container encountered an OOM.
-	OOMKilled bool
-}
-
 // TrySetNetworkMount attempts to set the network mounts given a provided destination and
 // the path to use for it; return true if the given destination was a network mount file
 func (container *Container) TrySetNetworkMount(destination string, path string) bool {

+ 0 - 6
container/container_windows.go

@@ -18,12 +18,6 @@ const (
 	containerInternalConfigsDirPath  = `C:\ProgramData\Docker\internal\configs`
 )
 
-// ExitStatus provides exit reasons for a container.
-type ExitStatus struct {
-	// The exit code with which the container exited.
-	ExitCode int
-}
-
 // UnmountIpcMount unmounts Ipc related mounts.
 // This is a NOOP on windows.
 func (container *Container) UnmountIpcMount(unmount func(pth string) error) error {

+ 12 - 5
container/state.go

@@ -276,6 +276,7 @@ func (s *State) SetExitCode(ec int) {
 // SetRunning sets the state of the container to "running".
 func (s *State) SetRunning(pid int, initial bool) {
 	s.ErrorMsg = ""
+	s.Paused = false
 	s.Running = true
 	s.Restarting = false
 	if initial {
@@ -294,9 +295,14 @@ func (s *State) SetStopped(exitStatus *ExitStatus) {
 	s.Paused = false
 	s.Restarting = false
 	s.Pid = 0
-	s.FinishedAt = time.Now().UTC()
-	s.setFromExitStatus(exitStatus)
-	close(s.waitStop) // Fire waiters for stop
+	if exitStatus.ExitedAt.IsZero() {
+		s.FinishedAt = time.Now().UTC()
+	} else {
+		s.FinishedAt = exitStatus.ExitedAt
+	}
+	s.ExitCodeValue = exitStatus.ExitCode
+	s.OOMKilled = exitStatus.OOMKilled
+	close(s.waitStop) // fire waiters for stop
 	s.waitStop = make(chan struct{})
 }
 
@@ -310,8 +316,9 @@ func (s *State) SetRestarting(exitStatus *ExitStatus) {
 	s.Paused = false
 	s.Pid = 0
 	s.FinishedAt = time.Now().UTC()
-	s.setFromExitStatus(exitStatus)
-	close(s.waitStop) // Fire waiters for stop
+	s.ExitCodeValue = exitStatus.ExitCode
+	s.OOMKilled = exitStatus.OOMKilled
+	close(s.waitStop) // fire waiters for stop
 	s.waitStop = make(chan struct{})
 }
 

+ 0 - 10
container/state_unix.go

@@ -1,10 +0,0 @@
-// +build linux freebsd
-
-package container
-
-// setFromExitStatus is a platform specific helper function to set the state
-// based on the ExitStatus structure.
-func (s *State) setFromExitStatus(exitStatus *ExitStatus) {
-	s.ExitCodeValue = exitStatus.ExitCode
-	s.OOMKilled = exitStatus.OOMKilled
-}

+ 0 - 7
container/state_windows.go

@@ -1,7 +0,0 @@
-package container
-
-// setFromExitStatus is a platform specific helper function to set the state
-// based on the ExitStatus structure.
-func (s *State) setFromExitStatus(exitStatus *ExitStatus) {
-	s.ExitCodeValue = exitStatus.ExitCode
-}

+ 3 - 3
container/stream/streams.go

@@ -114,12 +114,12 @@ func (c *Config) CloseStreams() error {
 }
 
 // CopyToPipe connects streamconfig with a libcontainerd.IOPipe
-func (c *Config) CopyToPipe(iop libcontainerd.IOPipe) {
+func (c *Config) CopyToPipe(iop *libcontainerd.IOPipe) {
 	copyFunc := func(w io.Writer, r io.ReadCloser) {
 		c.Add(1)
 		go func() {
 			if _, err := pools.Copy(w, r); err != nil {
-				logrus.Errorf("stream copy error: %+v", err)
+				logrus.Errorf("stream copy error: %v", err)
 			}
 			r.Close()
 			c.Done()
@@ -138,7 +138,7 @@ func (c *Config) CopyToPipe(iop libcontainerd.IOPipe) {
 			go func() {
 				pools.Copy(iop.Stdin, stdin)
 				if err := iop.Stdin.Close(); err != nil {
-					logrus.Warnf("failed to close stdin: %+v", err)
+					logrus.Warnf("failed to close stdin: %v", err)
 				}
 			}()
 		}

+ 13 - 4
daemon/checkpoint.go

@@ -1,6 +1,7 @@
 package daemon
 
 import (
+	"context"
 	"encoding/json"
 	"fmt"
 	"io/ioutil"
@@ -17,7 +18,7 @@ var (
 )
 
 // getCheckpointDir verifies checkpoint directory for create,remove, list options and checks if checkpoint already exists
-func getCheckpointDir(checkDir, checkpointID string, ctrName string, ctrID string, ctrCheckpointDir string, create bool) (string, error) {
+func getCheckpointDir(checkDir, checkpointID, ctrName, ctrID, ctrCheckpointDir string, create bool) (string, error) {
 	var checkpointDir string
 	var err2 error
 	if checkDir != "" {
@@ -32,7 +33,10 @@ func getCheckpointDir(checkDir, checkpointID string, ctrName string, ctrID strin
 		case err == nil && stat.IsDir():
 			err2 = fmt.Errorf("checkpoint with name %s already exists for container %s", checkpointID, ctrName)
 		case err != nil && os.IsNotExist(err):
-			err2 = nil
+			err2 = os.MkdirAll(checkpointAbsDir, 0700)
+			if os.IsExist(err2) {
+				err2 = nil
+			}
 		case err != nil:
 			err2 = err
 		case err == nil:
@@ -48,7 +52,7 @@ func getCheckpointDir(checkDir, checkpointID string, ctrName string, ctrID strin
 			err2 = fmt.Errorf("%s exists and is not a directory", checkpointAbsDir)
 		}
 	}
-	return checkpointDir, err2
+	return checkpointAbsDir, err2
 }
 
 // CheckpointCreate checkpoints the process running in a container with CRIU
@@ -62,6 +66,10 @@ func (daemon *Daemon) CheckpointCreate(name string, config types.CheckpointCreat
 		return fmt.Errorf("Container %s not running", name)
 	}
 
+	if container.Config.Tty {
+		return fmt.Errorf("checkpoint not support on containers with tty")
+	}
+
 	if !validCheckpointNamePattern.MatchString(config.CheckpointID) {
 		return fmt.Errorf("Invalid checkpoint ID (%s), only %s are allowed", config.CheckpointID, validCheckpointNameChars)
 	}
@@ -71,8 +79,9 @@ func (daemon *Daemon) CheckpointCreate(name string, config types.CheckpointCreat
 		return fmt.Errorf("cannot checkpoint container %s: %s", name, err)
 	}
 
-	err = daemon.containerd.CreateCheckpoint(container.ID, config.CheckpointID, checkpointDir, config.Exit)
+	err = daemon.containerd.CreateCheckpoint(context.Background(), container.ID, checkpointDir, config.Exit)
 	if err != nil {
+		os.RemoveAll(checkpointDir)
 		return fmt.Errorf("Cannot checkpoint container %s: %s", name, err)
 	}
 

+ 5 - 0
daemon/config/config.go

@@ -101,6 +101,7 @@ type CommonConfig struct {
 	RawLogs              bool                      `json:"raw-logs,omitempty"`
 	RootDeprecated       string                    `json:"graph,omitempty"`
 	Root                 string                    `json:"data-root,omitempty"`
+	ExecRoot             string                    `json:"exec-root,omitempty"`
 	SocketGroup          string                    `json:"group,omitempty"`
 	CorsHeaders          string                    `json:"api-cors-header,omitempty"`
 
@@ -172,6 +173,10 @@ type CommonConfig struct {
 	NodeGenericResources string `json:"node-generic-resources,omitempty"`
 	// NetworkControlPlaneMTU allows to specify the control plane MTU, this will allow to optimize the network use in some components
 	NetworkControlPlaneMTU int `json:"network-control-plane-mtu,omitempty"`
+
+	// ContainerAddr is the address used to connect to containerd if we're
+	// not starting it ourselves
+	ContainerdAddr string `json:"containerd,omitempty"`
 }
 
 // IsValueSet returns true if a configuration value

+ 0 - 2
daemon/config/config_common_unix.go

@@ -11,8 +11,6 @@ import (
 // CommonUnixConfig defines configuration of a docker daemon that is
 // common across Unix platforms.
 type CommonUnixConfig struct {
-	ExecRoot          string                   `json:"exec-root,omitempty"`
-	ContainerdAddr    string                   `json:"containerd,omitempty"`
 	Runtimes          map[string]types.Runtime `json:"runtimes,omitempty"`
 	DefaultRuntime    string                   `json:"default-runtime,omitempty"`
 	DefaultInitBinary string                   `json:"default-init,omitempty"`

+ 92 - 33
daemon/daemon.go

@@ -18,7 +18,7 @@ import (
 	"sync"
 	"time"
 
-	containerd "github.com/containerd/containerd/api/grpc/types"
+	"github.com/docker/docker/api/errdefs"
 	"github.com/docker/docker/api/types"
 	containertypes "github.com/docker/docker/api/types/container"
 	"github.com/docker/docker/api/types/swarm"
@@ -62,11 +62,10 @@ import (
 	"github.com/pkg/errors"
 )
 
-var (
-	// DefaultRuntimeBinary is the default runtime to be used by
-	// containerd if none is specified
-	DefaultRuntimeBinary = "docker-runc"
+// MainNamespace is the name of the namespace used for users containers
+const MainNamespace = "moby"
 
+var (
 	errSystemNotSupported = errors.New("the Docker daemon is not supported on this platform")
 )
 
@@ -170,7 +169,7 @@ func (daemon *Daemon) restore() error {
 				continue
 			}
 			container.RWLayer = rwlayer
-			logrus.Debugf("Loaded container %v", container.ID)
+			logrus.Debugf("Loaded container %v, isRunning: %v", container.ID, container.IsRunning())
 
 			containers[container.ID] = container
 		} else {
@@ -209,8 +208,10 @@ func (daemon *Daemon) restore() error {
 		}
 	}
 
-	var wg sync.WaitGroup
-	var mapLock sync.Mutex
+	var (
+		wg      sync.WaitGroup
+		mapLock sync.Mutex
+	)
 	for _, c := range containers {
 		wg.Add(1)
 		go func(c *container.Container) {
@@ -221,11 +222,74 @@ func (daemon *Daemon) restore() error {
 			}
 
 			daemon.setStateCounter(c)
+
+			logrus.WithFields(logrus.Fields{
+				"container": c.ID,
+				"running":   c.IsRunning(),
+				"paused":    c.IsPaused(),
+			}).Debug("restoring container")
+
+			var (
+				err      error
+				alive    bool
+				ec       uint32
+				exitedAt time.Time
+			)
+
+			alive, _, err = daemon.containerd.Restore(context.Background(), c.ID, c.InitializeStdio)
+			if err != nil && !errdefs.IsNotFound(err) {
+				logrus.Errorf("Failed to restore container %s with containerd: %s", c.ID, err)
+				return
+			}
+			if !alive {
+				ec, exitedAt, err = daemon.containerd.DeleteTask(context.Background(), c.ID)
+				if err != nil && !errdefs.IsNotFound(err) {
+					logrus.WithError(err).Errorf("Failed to delete container %s from containerd", c.ID)
+					return
+				}
+			}
+
 			if c.IsRunning() || c.IsPaused() {
 				c.RestartManager().Cancel() // manually start containers because some need to wait for swarm networking
-				if err := daemon.containerd.Restore(c.ID, c.InitializeStdio); err != nil {
-					logrus.Errorf("Failed to restore %s with containerd: %s", c.ID, err)
-					return
+
+				if c.IsPaused() && alive {
+					s, err := daemon.containerd.Status(context.Background(), c.ID)
+					if err != nil {
+						logrus.WithError(err).WithField("container", c.ID).
+							Errorf("Failed to get container status")
+					} else {
+						logrus.WithField("container", c.ID).WithField("state", s).
+							Info("restored container paused")
+						switch s {
+						case libcontainerd.StatusPaused, libcontainerd.StatusPausing:
+							// nothing to do
+						case libcontainerd.StatusStopped:
+							alive = false
+						case libcontainerd.StatusUnknown:
+							logrus.WithField("container", c.ID).
+								Error("Unknown status for container during restore")
+						default:
+							// running
+							c.Lock()
+							c.Paused = false
+							daemon.setStateCounter(c)
+							if err := c.CheckpointTo(daemon.containersReplica); err != nil {
+								logrus.WithError(err).WithField("container", c.ID).
+									Error("Failed to update stopped container state")
+							}
+							c.Unlock()
+						}
+					}
+				}
+
+				if !alive {
+					c.Lock()
+					c.SetStopped(&container.ExitStatus{ExitCode: int(ec), ExitedAt: exitedAt})
+					daemon.Cleanup(c)
+					if err := c.CheckpointTo(daemon.containersReplica); err != nil {
+						logrus.Errorf("Failed to update stopped container %s state: %v", c.ID, err)
+					}
+					c.Unlock()
 				}
 
 				// we call Mount and then Unmount to get BaseFs of the container
@@ -253,11 +317,9 @@ func (daemon *Daemon) restore() error {
 					activeSandboxes[c.NetworkSettings.SandboxID] = options
 					mapLock.Unlock()
 				}
+			} else {
+				// get list of containers we need to restart
 
-			}
-			// fixme: only if not running
-			// get list of containers we need to restart
-			if !c.IsRunning() && !c.IsPaused() {
 				// Do not autostart containers which
 				// has endpoints in a swarm scope
 				// network yet since the cluster is
@@ -289,7 +351,7 @@ func (daemon *Daemon) restore() error {
 				c.RemovalInProgress = false
 				c.Dead = true
 				if err := c.CheckpointTo(daemon.containersReplica); err != nil {
-					logrus.Errorf("Failed to update container %s state: %v", c.ID, err)
+					logrus.Errorf("Failed to update RemovalInProgress container %s state: %v", c.ID, err)
 				}
 			}
 			c.Unlock()
@@ -559,6 +621,7 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
 
 	d := &Daemon{
 		configStore: config,
+		PluginStore: pluginStore,
 		startupDone: make(chan struct{}),
 	}
 	// Ensure the daemon is properly shutdown if there is a failure during
@@ -606,6 +669,16 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
 		return nil, err
 	}
 
+	// Create the directory where we'll store the runtime scripts (i.e. in
+	// order to support runtimeArgs)
+	daemonRuntimes := filepath.Join(config.Root, "runtimes")
+	if err := system.MkdirAll(daemonRuntimes, 0700, ""); err != nil && !os.IsExist(err) {
+		return nil, err
+	}
+	if err := d.loadRuntimes(); err != nil {
+		return nil, err
+	}
+
 	if runtime.GOOS == "windows" {
 		if err := system.MkdirAll(filepath.Join(config.Root, "credentialspecs"), 0, ""); err != nil && !os.IsExist(err) {
 			return nil, err
@@ -635,7 +708,6 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
 	}
 
 	d.RegistryService = registryService
-	d.PluginStore = pluginStore
 	logger.RegisterPluginGetter(d.PluginStore)
 
 	metricsSockPath, err := d.listenMetricsSock()
@@ -645,7 +717,7 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
 	registerMetricsPluginCallback(d.PluginStore, metricsSockPath)
 
 	createPluginExec := func(m *plugin.Manager) (plugin.Executor, error) {
-		return pluginexec.New(containerdRemote, m)
+		return pluginexec.New(getPluginExecRoot(config.Root), containerdRemote, m)
 	}
 
 	// Plugin system initialization should happen before restore. Do not change order.
@@ -802,13 +874,13 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
 	d.idMappings = idMappings
 	d.seccompEnabled = sysInfo.Seccomp
 	d.apparmorEnabled = sysInfo.AppArmor
+	d.containerdRemote = containerdRemote
 
 	d.linkIndex = newLinkIndex()
-	d.containerdRemote = containerdRemote
 
 	go d.execCommandGC()
 
-	d.containerd, err = containerdRemote.Client(d)
+	d.containerd, err = containerdRemote.NewClient(MainNamespace, d)
 	if err != nil {
 		return nil, err
 	}
@@ -1171,19 +1243,6 @@ func (daemon *Daemon) networkOptions(dconfig *config.Config, pg plugingetter.Plu
 	return options, nil
 }
 
-func copyBlkioEntry(entries []*containerd.BlkioStatsEntry) []types.BlkioStatEntry {
-	out := make([]types.BlkioStatEntry, len(entries))
-	for i, re := range entries {
-		out[i] = types.BlkioStatEntry{
-			Major: re.Major,
-			Minor: re.Minor,
-			Op:    re.Op,
-			Value: re.Value,
-		}
-	}
-	return out
-}
-
 // GetCluster returns the cluster
 func (daemon *Daemon) GetCluster() Cluster {
 	return daemon.cluster

+ 162 - 40
daemon/daemon_unix.go

@@ -5,6 +5,7 @@ package daemon
 import (
 	"bufio"
 	"bytes"
+	"context"
 	"fmt"
 	"io/ioutil"
 	"net"
@@ -16,6 +17,7 @@ import (
 	"strings"
 	"time"
 
+	containerd_cgroups "github.com/containerd/cgroups"
 	"github.com/docker/docker/api/types"
 	"github.com/docker/docker/api/types/blkiodev"
 	pblkiodev "github.com/docker/docker/api/types/blkiodev"
@@ -26,6 +28,7 @@ import (
 	"github.com/docker/docker/opts"
 	"github.com/docker/docker/pkg/containerfs"
 	"github.com/docker/docker/pkg/idtools"
+	"github.com/docker/docker/pkg/ioutils"
 	"github.com/docker/docker/pkg/parsers"
 	"github.com/docker/docker/pkg/parsers/kernel"
 	"github.com/docker/docker/pkg/sysinfo"
@@ -38,7 +41,6 @@ import (
 	"github.com/docker/libnetwork/netutils"
 	"github.com/docker/libnetwork/options"
 	lntypes "github.com/docker/libnetwork/types"
-	"github.com/golang/protobuf/ptypes"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	rsystem "github.com/opencontainers/runc/libcontainer/system"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
@@ -50,6 +52,14 @@ import (
 )
 
 const (
+	// DefaultShimBinary is the default shim to be used by containerd if none
+	// is specified
+	DefaultShimBinary = "docker-containerd-shim"
+
+	// DefaultRuntimeBinary is the default runtime to be used by
+	// containerd if none is specified
+	DefaultRuntimeBinary = "docker-runc"
+
 	// See https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/tree/kernel/sched/sched.h?id=8cd9234c64c584432f6992fe944ca9e46ca8ea76#n269
 	linuxMinCPUShares = 2
 	linuxMaxCPUShares = 262144
@@ -63,6 +73,10 @@ const (
 	// constant for cgroup drivers
 	cgroupFsDriver      = "cgroupfs"
 	cgroupSystemdDriver = "systemd"
+
+	// DefaultRuntimeName is the default runtime to be used by
+	// containerd if none is specified
+	DefaultRuntimeName = "docker-runc"
 )
 
 type containerGetter interface {
@@ -623,6 +637,54 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.
 	return warnings, nil
 }
 
+func (daemon *Daemon) loadRuntimes() error {
+	return daemon.initRuntimes(daemon.configStore.Runtimes)
+}
+
+func (daemon *Daemon) initRuntimes(runtimes map[string]types.Runtime) (err error) {
+	runtimeDir := filepath.Join(daemon.configStore.Root, "runtimes")
+	// Remove old temp directory if any
+	os.RemoveAll(runtimeDir + "-old")
+	tmpDir, err := ioutils.TempDir(daemon.configStore.Root, "gen-runtimes")
+	if err != nil {
+		return errors.Wrapf(err, "failed to get temp dir to generate runtime scripts")
+	}
+	defer func() {
+		if err != nil {
+			if err1 := os.RemoveAll(tmpDir); err1 != nil {
+				logrus.WithError(err1).WithField("dir", tmpDir).
+					Warnf("failed to remove tmp dir")
+			}
+			return
+		}
+
+		if err = os.Rename(runtimeDir, runtimeDir+"-old"); err != nil {
+			return
+		}
+		if err = os.Rename(tmpDir, runtimeDir); err != nil {
+			err = errors.Wrapf(err, "failed to setup runtimes dir, new containers may not start")
+			return
+		}
+		if err = os.RemoveAll(runtimeDir + "-old"); err != nil {
+			logrus.WithError(err).WithField("dir", tmpDir).
+				Warnf("failed to remove old runtimes dir")
+		}
+	}()
+
+	for name, rt := range runtimes {
+		if len(rt.Args) == 0 {
+			continue
+		}
+
+		script := filepath.Join(tmpDir, name)
+		content := fmt.Sprintf("#!/bin/sh\n%s %s $@\n", rt.Path, strings.Join(rt.Args, " "))
+		if err := ioutil.WriteFile(script, []byte(content), 0700); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
 // reloadPlatform updates configuration with platform specific options
 // and updates the passed attributes
 func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string]string) error {
@@ -631,9 +693,12 @@ func (daemon *Daemon) reloadPlatform(conf *config.Config, attributes map[string]
 	}
 
 	if conf.IsValueSet("runtimes") {
-		daemon.configStore.Runtimes = conf.Runtimes
 		// Always set the default one
-		daemon.configStore.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
+		conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
+		if err := daemon.initRuntimes(conf.Runtimes); err != nil {
+			return err
+		}
+		daemon.configStore.Runtimes = conf.Runtimes
 	}
 
 	if conf.DefaultRuntime != "" {
@@ -692,7 +757,7 @@ func verifyDaemonSettings(conf *config.Config) error {
 	if conf.Runtimes == nil {
 		conf.Runtimes = make(map[string]types.Runtime)
 	}
-	conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeBinary}
+	conf.Runtimes[config.StockRuntimeName] = types.Runtime{Path: DefaultRuntimeName}
 
 	return nil
 }
@@ -1214,11 +1279,24 @@ func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container
 	return daemon.Unmount(container)
 }
 
+func copyBlkioEntry(entries []*containerd_cgroups.BlkIOEntry) []types.BlkioStatEntry {
+	out := make([]types.BlkioStatEntry, len(entries))
+	for i, re := range entries {
+		out[i] = types.BlkioStatEntry{
+			Major: re.Major,
+			Minor: re.Minor,
+			Op:    re.Op,
+			Value: re.Value,
+		}
+	}
+	return out
+}
+
 func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
 	if !c.IsRunning() {
 		return nil, errNotRunning(c.ID)
 	}
-	stats, err := daemon.containerd.Stats(c.ID)
+	cs, err := daemon.containerd.Stats(context.Background(), c.ID)
 	if err != nil {
 		if strings.Contains(err.Error(), "container not found") {
 			return nil, containerNotFound(c.ID)
@@ -1226,54 +1304,98 @@ func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
 		return nil, err
 	}
 	s := &types.StatsJSON{}
-	cgs := stats.CgroupStats
-	if cgs != nil {
+	s.Read = cs.Read
+	stats := cs.Metrics
+	if stats.Blkio != nil {
 		s.BlkioStats = types.BlkioStats{
-			IoServiceBytesRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceBytesRecursive),
-			IoServicedRecursive:     copyBlkioEntry(cgs.BlkioStats.IoServicedRecursive),
-			IoQueuedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoQueuedRecursive),
-			IoServiceTimeRecursive:  copyBlkioEntry(cgs.BlkioStats.IoServiceTimeRecursive),
-			IoWaitTimeRecursive:     copyBlkioEntry(cgs.BlkioStats.IoWaitTimeRecursive),
-			IoMergedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoMergedRecursive),
-			IoTimeRecursive:         copyBlkioEntry(cgs.BlkioStats.IoTimeRecursive),
-			SectorsRecursive:        copyBlkioEntry(cgs.BlkioStats.SectorsRecursive),
-		}
-		cpu := cgs.CpuStats
+			IoServiceBytesRecursive: copyBlkioEntry(stats.Blkio.IoServiceBytesRecursive),
+			IoServicedRecursive:     copyBlkioEntry(stats.Blkio.IoServicedRecursive),
+			IoQueuedRecursive:       copyBlkioEntry(stats.Blkio.IoQueuedRecursive),
+			IoServiceTimeRecursive:  copyBlkioEntry(stats.Blkio.IoServiceTimeRecursive),
+			IoWaitTimeRecursive:     copyBlkioEntry(stats.Blkio.IoWaitTimeRecursive),
+			IoMergedRecursive:       copyBlkioEntry(stats.Blkio.IoMergedRecursive),
+			IoTimeRecursive:         copyBlkioEntry(stats.Blkio.IoTimeRecursive),
+			SectorsRecursive:        copyBlkioEntry(stats.Blkio.SectorsRecursive),
+		}
+	}
+	if stats.CPU != nil {
 		s.CPUStats = types.CPUStats{
 			CPUUsage: types.CPUUsage{
-				TotalUsage:        cpu.CpuUsage.TotalUsage,
-				PercpuUsage:       cpu.CpuUsage.PercpuUsage,
-				UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode,
-				UsageInUsermode:   cpu.CpuUsage.UsageInUsermode,
+				TotalUsage:        stats.CPU.Usage.Total,
+				PercpuUsage:       stats.CPU.Usage.PerCPU,
+				UsageInKernelmode: stats.CPU.Usage.Kernel,
+				UsageInUsermode:   stats.CPU.Usage.User,
 			},
 			ThrottlingData: types.ThrottlingData{
-				Periods:          cpu.ThrottlingData.Periods,
-				ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods,
-				ThrottledTime:    cpu.ThrottlingData.ThrottledTime,
+				Periods:          stats.CPU.Throttling.Periods,
+				ThrottledPeriods: stats.CPU.Throttling.ThrottledPeriods,
+				ThrottledTime:    stats.CPU.Throttling.ThrottledTime,
 			},
 		}
-		mem := cgs.MemoryStats.Usage
-		s.MemoryStats = types.MemoryStats{
-			Usage:    mem.Usage,
-			MaxUsage: mem.MaxUsage,
-			Stats:    cgs.MemoryStats.Stats,
-			Failcnt:  mem.Failcnt,
-			Limit:    mem.Limit,
+	}
+
+	if stats.Memory != nil {
+		raw := make(map[string]uint64)
+		raw["cache"] = stats.Memory.Cache
+		raw["rss"] = stats.Memory.RSS
+		raw["rss_huge"] = stats.Memory.RSSHuge
+		raw["mapped_file"] = stats.Memory.MappedFile
+		raw["dirty"] = stats.Memory.Dirty
+		raw["writeback"] = stats.Memory.Writeback
+		raw["pgpgin"] = stats.Memory.PgPgIn
+		raw["pgpgout"] = stats.Memory.PgPgOut
+		raw["pgfault"] = stats.Memory.PgFault
+		raw["pgmajfault"] = stats.Memory.PgMajFault
+		raw["inactive_anon"] = stats.Memory.InactiveAnon
+		raw["active_anon"] = stats.Memory.ActiveAnon
+		raw["inactive_file"] = stats.Memory.InactiveFile
+		raw["active_file"] = stats.Memory.ActiveFile
+		raw["unevictable"] = stats.Memory.Unevictable
+		raw["hierarchical_memory_limit"] = stats.Memory.HierarchicalMemoryLimit
+		raw["hierarchical_memsw_limit"] = stats.Memory.HierarchicalSwapLimit
+		raw["total_cache"] = stats.Memory.TotalCache
+		raw["total_rss"] = stats.Memory.TotalRSS
+		raw["total_rss_huge"] = stats.Memory.TotalRSSHuge
+		raw["total_mapped_file"] = stats.Memory.TotalMappedFile
+		raw["total_dirty"] = stats.Memory.TotalDirty
+		raw["total_writeback"] = stats.Memory.TotalWriteback
+		raw["total_pgpgin"] = stats.Memory.TotalPgPgIn
+		raw["total_pgpgout"] = stats.Memory.TotalPgPgOut
+		raw["total_pgfault"] = stats.Memory.TotalPgFault
+		raw["total_pgmajfault"] = stats.Memory.TotalPgMajFault
+		raw["total_inactive_anon"] = stats.Memory.TotalInactiveAnon
+		raw["total_active_anon"] = stats.Memory.TotalActiveAnon
+		raw["total_inactive_file"] = stats.Memory.TotalInactiveFile
+		raw["total_active_file"] = stats.Memory.TotalActiveFile
+		raw["total_unevictable"] = stats.Memory.TotalUnevictable
+
+		if stats.Memory.Usage != nil {
+			s.MemoryStats = types.MemoryStats{
+				Stats:    raw,
+				Usage:    stats.Memory.Usage.Usage,
+				MaxUsage: stats.Memory.Usage.Max,
+				Limit:    stats.Memory.Usage.Limit,
+				Failcnt:  stats.Memory.Usage.Failcnt,
+			}
+		} else {
+			s.MemoryStats = types.MemoryStats{
+				Stats: raw,
+			}
 		}
+
 		// if the container does not set memory limit, use the machineMemory
-		if mem.Limit > daemon.machineMemory && daemon.machineMemory > 0 {
+		if s.MemoryStats.Limit > daemon.machineMemory && daemon.machineMemory > 0 {
 			s.MemoryStats.Limit = daemon.machineMemory
 		}
-		if cgs.PidsStats != nil {
-			s.PidsStats = types.PidsStats{
-				Current: cgs.PidsStats.Current,
-			}
-		}
 	}
-	s.Read, err = ptypes.Timestamp(stats.Timestamp)
-	if err != nil {
-		return nil, err
+
+	if stats.Pids != nil {
+		s.PidsStats = types.PidsStats{
+			Current: stats.Pids.Current,
+			Limit:   stats.Pids.Limit,
+		}
 	}
+
 	return s, nil
 }
 

+ 48 - 40
daemon/daemon_windows.go

@@ -1,6 +1,7 @@
 package daemon
 
 import (
+	"context"
 	"fmt"
 	"os"
 	"path/filepath"
@@ -532,7 +533,7 @@ func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
 	}
 
 	// Obtain the stats from HCS via libcontainerd
-	stats, err := daemon.containerd.Stats(c.ID)
+	stats, err := daemon.containerd.Stats(context.Background(), c.ID)
 	if err != nil {
 		if strings.Contains(err.Error(), "container not found") {
 			return nil, containerNotFound(c.ID)
@@ -542,49 +543,48 @@ func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
 
 	// Start with an empty structure
 	s := &types.StatsJSON{}
+	s.Stats.Read = stats.Read
+	s.Stats.NumProcs = platform.NumProcs()
 
-	// Populate the CPU/processor statistics
-	s.CPUStats = types.CPUStats{
-		CPUUsage: types.CPUUsage{
-			TotalUsage:        stats.Processor.TotalRuntime100ns,
-			UsageInKernelmode: stats.Processor.RuntimeKernel100ns,
-			UsageInUsermode:   stats.Processor.RuntimeKernel100ns,
-		},
-	}
-
-	// Populate the memory statistics
-	s.MemoryStats = types.MemoryStats{
-		Commit:            stats.Memory.UsageCommitBytes,
-		CommitPeak:        stats.Memory.UsageCommitPeakBytes,
-		PrivateWorkingSet: stats.Memory.UsagePrivateWorkingSetBytes,
-	}
-
-	// Populate the storage statistics
-	s.StorageStats = types.StorageStats{
-		ReadCountNormalized:  stats.Storage.ReadCountNormalized,
-		ReadSizeBytes:        stats.Storage.ReadSizeBytes,
-		WriteCountNormalized: stats.Storage.WriteCountNormalized,
-		WriteSizeBytes:       stats.Storage.WriteSizeBytes,
-	}
-
-	// Populate the network statistics
-	s.Networks = make(map[string]types.NetworkStats)
-
-	for _, nstats := range stats.Network {
-		s.Networks[nstats.EndpointId] = types.NetworkStats{
-			RxBytes:   nstats.BytesReceived,
-			RxPackets: nstats.PacketsReceived,
-			RxDropped: nstats.DroppedPacketsIncoming,
-			TxBytes:   nstats.BytesSent,
-			TxPackets: nstats.PacketsSent,
-			TxDropped: nstats.DroppedPacketsOutgoing,
+	if stats.HCSStats != nil {
+		hcss := stats.HCSStats
+		// Populate the CPU/processor statistics
+		s.CPUStats = types.CPUStats{
+			CPUUsage: types.CPUUsage{
+				TotalUsage:        hcss.Processor.TotalRuntime100ns,
+				UsageInKernelmode: hcss.Processor.RuntimeKernel100ns,
+				UsageInUsermode:   hcss.Processor.RuntimeKernel100ns,
+			},
 		}
-	}
 
-	// Set the timestamp
-	s.Stats.Read = stats.Timestamp
-	s.Stats.NumProcs = platform.NumProcs()
+		// Populate the memory statistics
+		s.MemoryStats = types.MemoryStats{
+			Commit:            hcss.Memory.UsageCommitBytes,
+			CommitPeak:        hcss.Memory.UsageCommitPeakBytes,
+			PrivateWorkingSet: hcss.Memory.UsagePrivateWorkingSetBytes,
+		}
 
+		// Populate the storage statistics
+		s.StorageStats = types.StorageStats{
+			ReadCountNormalized:  hcss.Storage.ReadCountNormalized,
+			ReadSizeBytes:        hcss.Storage.ReadSizeBytes,
+			WriteCountNormalized: hcss.Storage.WriteCountNormalized,
+			WriteSizeBytes:       hcss.Storage.WriteSizeBytes,
+		}
+
+		// Populate the network statistics
+		s.Networks = make(map[string]types.NetworkStats)
+		for _, nstats := range hcss.Network {
+			s.Networks[nstats.EndpointId] = types.NetworkStats{
+				RxBytes:   nstats.BytesReceived,
+				RxPackets: nstats.PacketsReceived,
+				RxDropped: nstats.DroppedPacketsIncoming,
+				TxBytes:   nstats.BytesSent,
+				TxPackets: nstats.PacketsSent,
+				TxDropped: nstats.DroppedPacketsOutgoing,
+			}
+		}
+	}
 	return s, nil
 }
 
@@ -664,3 +664,11 @@ func getRealPath(path string) (string, error) {
 	}
 	return fileutils.ReadSymlinkedDirectory(path)
 }
+
+func (daemon *Daemon) loadRuntimes() error {
+	return nil
+}
+
+func (daemon *Daemon) initRuntimes(_ map[string]types.Runtime) error {
+	return nil
+}

+ 1 - 0
daemon/delete.go

@@ -141,6 +141,7 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo
 	}
 	container.SetRemoved()
 	stateCtr.del(container.ID)
+
 	daemon.LogContainerEvent(container, "destroy")
 	return nil
 }

+ 5 - 0
daemon/errors.go

@@ -64,6 +64,11 @@ func errExecPaused(id string) error {
 	return stateConflictError{cause}
 }
 
+func errNotPaused(id string) error {
+	cause := errors.Errorf("Container %s is already paused", id)
+	return stateConflictError{cause}
+}
+
 type nameConflictError struct {
 	id   string
 	name string

+ 29 - 11
daemon/exec.go

@@ -13,10 +13,10 @@ import (
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/container/stream"
 	"github.com/docker/docker/daemon/exec"
-	"github.com/docker/docker/libcontainerd"
 	"github.com/docker/docker/pkg/pools"
 	"github.com/docker/docker/pkg/signal"
 	"github.com/docker/docker/pkg/term"
+	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
 )
@@ -31,6 +31,14 @@ func (d *Daemon) registerExecCommand(container *container.Container, config *exe
 	d.execCommands.Add(config.ID, config)
 }
 
+func (d *Daemon) registerExecPidUnlocked(container *container.Container, config *exec.Config) {
+	logrus.Debugf("registering pid %v for exec %v", config.Pid, config.ID)
+	// Storing execs in container in order to kill them gracefully whenever the container is stopped or removed.
+	container.ExecCommands.SetPidUnlocked(config.ID, config.Pid)
+	// Storing execs in daemon for easy access via Engine API.
+	d.execCommands.SetPidUnlocked(config.ID, config.Pid)
+}
+
 // ExecExists looks up the exec instance and returns a bool if it exists or not.
 // It will also return the error produced by `getConfig`
 func (d *Daemon) ExecExists(name string) (bool, error) {
@@ -70,8 +78,8 @@ func (d *Daemon) getExecConfig(name string) (*exec.Config, error) {
 }
 
 func (d *Daemon) unregisterExecCommand(container *container.Container, execConfig *exec.Config) {
-	container.ExecCommands.Delete(execConfig.ID)
-	d.execCommands.Delete(execConfig.ID)
+	container.ExecCommands.Delete(execConfig.ID, execConfig.Pid)
+	d.execCommands.Delete(execConfig.ID, execConfig.Pid)
 }
 
 func (d *Daemon) getActiveContainer(name string) (*container.Container, error) {
@@ -181,7 +189,7 @@ func (d *Daemon) ContainerExecStart(ctx context.Context, name string, stdin io.R
 				logrus.Errorf("failed to cleanup exec %s streams: %s", c.ID, err)
 			}
 			ec.Unlock()
-			c.ExecCommands.Delete(ec.ID)
+			c.ExecCommands.Delete(ec.ID, ec.Pid)
 		}
 	}()
 
@@ -207,13 +215,17 @@ func (d *Daemon) ContainerExecStart(ctx context.Context, name string, stdin io.R
 		ec.StreamConfig.NewNopInputPipe()
 	}
 
-	p := libcontainerd.Process{
+	p := &specs.Process{
 		Args:     append([]string{ec.Entrypoint}, ec.Args...),
 		Env:      ec.Env,
 		Terminal: ec.Tty,
+		Cwd:      c.Config.WorkingDir,
+	}
+	if p.Cwd == "" {
+		p.Cwd = "/"
 	}
 
-	if err := execSetPlatformOpt(c, ec, &p); err != nil {
+	if err := d.execSetPlatformOpt(c, ec, p); err != nil {
 		return err
 	}
 
@@ -231,22 +243,28 @@ func (d *Daemon) ContainerExecStart(ctx context.Context, name string, stdin io.R
 	ec.StreamConfig.AttachStreams(&attachConfig)
 	attachErr := ec.StreamConfig.CopyStreams(ctx, &attachConfig)
 
-	systemPid, err := d.containerd.AddProcess(ctx, c.ID, name, p, ec.InitializeStdio)
+	// Synchronize with libcontainerd event loop
+	ec.Lock()
+	c.ExecCommands.Lock()
+	systemPid, err := d.containerd.Exec(ctx, c.ID, ec.ID, p, cStdin != nil, ec.InitializeStdio)
 	if err != nil {
+		c.ExecCommands.Unlock()
+		ec.Unlock()
 		return translateContainerdStartErr(ec.Entrypoint, ec.SetExitCode, err)
 	}
-	ec.Lock()
 	ec.Pid = systemPid
+	d.registerExecPidUnlocked(c, ec)
+	c.ExecCommands.Unlock()
 	ec.Unlock()
 
 	select {
 	case <-ctx.Done():
 		logrus.Debugf("Sending TERM signal to process %v in container %v", name, c.ID)
-		d.containerd.SignalProcess(c.ID, name, int(signal.SignalMap["TERM"]))
+		d.containerd.SignalProcess(ctx, c.ID, name, int(signal.SignalMap["TERM"]))
 		select {
 		case <-time.After(termProcessTimeout * time.Second):
 			logrus.Infof("Container %v, process %v failed to exit within %d seconds of signal TERM - using the force", c.ID, name, termProcessTimeout)
-			d.containerd.SignalProcess(c.ID, name, int(signal.SignalMap["KILL"]))
+			d.containerd.SignalProcess(ctx, c.ID, name, int(signal.SignalMap["KILL"]))
 		case <-attachErr:
 			// TERM signal worked
 		}
@@ -273,7 +291,7 @@ func (d *Daemon) execCommandGC() {
 		for id, config := range d.execCommands.Commands() {
 			if config.CanRemove {
 				cleaned++
-				d.execCommands.Delete(id)
+				d.execCommands.Delete(id, config.Pid)
 			} else {
 				if _, exists := liveExecCommands[id]; !exists {
 					config.CanRemove = true

+ 53 - 13
daemon/exec/exec.go

@@ -4,6 +4,7 @@ import (
 	"runtime"
 	"sync"
 
+	"github.com/containerd/containerd"
 	"github.com/docker/docker/container/stream"
 	"github.com/docker/docker/libcontainerd"
 	"github.com/docker/docker/pkg/stringid"
@@ -42,8 +43,26 @@ func NewConfig() *Config {
 	}
 }
 
+type cio struct {
+	containerd.IO
+
+	sc *stream.Config
+}
+
+func (i *cio) Close() error {
+	i.IO.Close()
+
+	return i.sc.CloseStreams()
+}
+
+func (i *cio) Wait() {
+	i.sc.Wait()
+
+	i.IO.Wait()
+}
+
 // InitializeStdio is called by libcontainerd to connect the stdio.
-func (c *Config) InitializeStdio(iop libcontainerd.IOPipe) error {
+func (c *Config) InitializeStdio(iop *libcontainerd.IOPipe) (containerd.IO, error) {
 	c.StreamConfig.CopyToPipe(iop)
 
 	if c.StreamConfig.Stdin() == nil && !c.Tty && runtime.GOOS == "windows" {
@@ -54,7 +73,7 @@ func (c *Config) InitializeStdio(iop libcontainerd.IOPipe) error {
 		}
 	}
 
-	return nil
+	return &cio{IO: iop, sc: c.StreamConfig}, nil
 }
 
 // CloseStreams closes the stdio streams for the exec
@@ -69,45 +88,66 @@ func (c *Config) SetExitCode(code int) {
 
 // Store keeps track of the exec configurations.
 type Store struct {
-	commands map[string]*Config
+	byID  map[string]*Config
+	byPid map[int]*Config
 	sync.RWMutex
 }
 
 // NewStore initializes a new exec store.
 func NewStore() *Store {
-	return &Store{commands: make(map[string]*Config)}
+	return &Store{
+		byID:  make(map[string]*Config),
+		byPid: make(map[int]*Config),
+	}
 }
 
 // Commands returns the exec configurations in the store.
 func (e *Store) Commands() map[string]*Config {
 	e.RLock()
-	commands := make(map[string]*Config, len(e.commands))
-	for id, config := range e.commands {
-		commands[id] = config
+	byID := make(map[string]*Config, len(e.byID))
+	for id, config := range e.byID {
+		byID[id] = config
 	}
 	e.RUnlock()
-	return commands
+	return byID
 }
 
 // Add adds a new exec configuration to the store.
 func (e *Store) Add(id string, Config *Config) {
 	e.Lock()
-	e.commands[id] = Config
+	e.byID[id] = Config
 	e.Unlock()
 }
 
+// SetPidUnlocked adds an association between a Pid and a config, it does not
+// synchronized with other operations.
+func (e *Store) SetPidUnlocked(id string, pid int) {
+	if config, ok := e.byID[id]; ok {
+		e.byPid[pid] = config
+	}
+}
+
 // Get returns an exec configuration by its id.
 func (e *Store) Get(id string) *Config {
 	e.RLock()
-	res := e.commands[id]
+	res := e.byID[id]
+	e.RUnlock()
+	return res
+}
+
+// ByPid returns an exec configuration by its pid.
+func (e *Store) ByPid(pid int) *Config {
+	e.RLock()
+	res := e.byPid[pid]
 	e.RUnlock()
 	return res
 }
 
 // Delete removes an exec configuration from the store.
-func (e *Store) Delete(id string) {
+func (e *Store) Delete(id string, pid int) {
 	e.Lock()
-	delete(e.commands, id)
+	delete(e.byPid, pid)
+	delete(e.byID, id)
 	e.Unlock()
 }
 
@@ -115,7 +155,7 @@ func (e *Store) Delete(id string) {
 func (e *Store) List() []string {
 	var IDs []string
 	e.RLock()
-	for id := range e.commands {
+	for id := range e.byID {
 		IDs = append(IDs, id)
 	}
 	e.RUnlock()

+ 10 - 4
daemon/exec_linux.go

@@ -4,25 +4,30 @@ import (
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/daemon/caps"
 	"github.com/docker/docker/daemon/exec"
-	"github.com/docker/docker/libcontainerd"
 	"github.com/opencontainers/runc/libcontainer/apparmor"
 	"github.com/opencontainers/runtime-spec/specs-go"
 )
 
-func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainerd.Process) error {
+func (daemon *Daemon) execSetPlatformOpt(c *container.Container, ec *exec.Config, p *specs.Process) error {
 	if len(ec.User) > 0 {
 		uid, gid, additionalGids, err := getUser(c, ec.User)
 		if err != nil {
 			return err
 		}
-		p.User = &specs.User{
+		p.User = specs.User{
 			UID:            uid,
 			GID:            gid,
 			AdditionalGids: additionalGids,
 		}
 	}
 	if ec.Privileged {
-		p.Capabilities = caps.GetAllCapabilities()
+		if p.Capabilities == nil {
+			p.Capabilities = &specs.LinuxCapabilities{}
+		}
+		p.Capabilities.Bounding = caps.GetAllCapabilities()
+		p.Capabilities.Permitted = p.Capabilities.Bounding
+		p.Capabilities.Inheritable = p.Capabilities.Bounding
+		p.Capabilities.Effective = p.Capabilities.Bounding
 	}
 	if apparmor.IsEnabled() {
 		var appArmorProfile string
@@ -46,5 +51,6 @@ func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainer
 			}
 		}
 	}
+	daemon.setRlimits(&specs.Spec{Process: p}, c)
 	return nil
 }

+ 2 - 2
daemon/exec_solaris.go

@@ -3,9 +3,9 @@ package daemon
 import (
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/daemon/exec"
-	"github.com/docker/docker/libcontainerd"
+	specs "github.com/opencontainers/runtime-spec/specs-go"
 )
 
-func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainerd.Process) error {
+func (daemon *Daemon) execSetPlatformOpt(_ *container.Container, _ *exec.Config, _ *specs.Process) error {
 	return nil
 }

+ 2 - 2
daemon/exec_windows.go

@@ -3,10 +3,10 @@ package daemon
 import (
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/daemon/exec"
-	"github.com/docker/docker/libcontainerd"
+	specs "github.com/opencontainers/runtime-spec/specs-go"
 )
 
-func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainerd.Process) error {
+func (daemon *Daemon) execSetPlatformOpt(c *container.Container, ec *exec.Config, p *specs.Process) error {
 	// Process arguments need to be escaped before sending to OCI.
 	if c.OS == "windows" {
 		p.Args = escapeArgs(p.Args)

+ 19 - 10
daemon/info_unix.go

@@ -3,7 +3,6 @@
 package daemon
 
 import (
-	"context"
 	"os/exec"
 	"strings"
 
@@ -28,16 +27,8 @@ func (daemon *Daemon) FillPlatformInfo(v *types.Info, sysInfo *sysinfo.SysInfo)
 	v.DefaultRuntime = daemon.configStore.GetDefaultRuntimeName()
 	v.InitBinary = daemon.configStore.GetInitPath()
 
-	v.ContainerdCommit.Expected = dockerversion.ContainerdCommitID
-	if sv, err := daemon.containerd.GetServerVersion(context.Background()); err == nil {
-		v.ContainerdCommit.ID = sv.Revision
-	} else {
-		logrus.Warnf("failed to retrieve containerd version: %v", err)
-		v.ContainerdCommit.ID = "N/A"
-	}
-
 	v.RuncCommit.Expected = dockerversion.RuncCommitID
-	defaultRuntimeBinary := daemon.configStore.GetRuntime(daemon.configStore.GetDefaultRuntimeName()).Path
+	defaultRuntimeBinary := daemon.configStore.GetRuntime(v.DefaultRuntime).Path
 	if rv, err := exec.Command(defaultRuntimeBinary, "--version").Output(); err == nil {
 		parts := strings.Split(strings.TrimSpace(string(rv)), "\n")
 		if len(parts) == 3 {
@@ -56,6 +47,24 @@ func (daemon *Daemon) FillPlatformInfo(v *types.Info, sysInfo *sysinfo.SysInfo)
 		v.RuncCommit.ID = "N/A"
 	}
 
+	v.ContainerdCommit.Expected = dockerversion.ContainerdCommitID
+	if rv, err := exec.Command("docker-containerd", "--version").Output(); err == nil {
+		parts := strings.Split(strings.TrimSpace(string(rv)), " ")
+		if len(parts) == 3 {
+			v.ContainerdCommit.ID = parts[2]
+		}
+		switch {
+		case v.ContainerdCommit.ID == "":
+			logrus.Warnf("failed to retrieve docker-containerd version: unknown format", string(rv))
+			v.ContainerdCommit.ID = "N/A"
+		case strings.HasSuffix(v.ContainerdCommit.ID, "-g"+v.ContainerdCommit.ID[len(v.ContainerdCommit.ID)-7:]):
+			v.ContainerdCommit.ID = v.ContainerdCommit.Expected
+		}
+	} else {
+		logrus.Warnf("failed to retrieve docker-containerd version: %v", err)
+		v.ContainerdCommit.ID = "N/A"
+	}
+
 	defaultInitBinary := daemon.configStore.GetInitPath()
 	if rv, err := exec.Command(defaultInitBinary, "--version").Output(); err == nil {
 		ver, err := parseInitVersion(string(rv))

+ 3 - 2
daemon/kill.go

@@ -9,6 +9,7 @@ import (
 	"time"
 
 	containerpkg "github.com/docker/docker/container"
+	"github.com/docker/docker/libcontainerd"
 	"github.com/docker/docker/pkg/signal"
 	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
@@ -108,7 +109,7 @@ func (daemon *Daemon) killWithSignal(container *containerpkg.Container, sig int)
 
 	if unpause {
 		// above kill signal will be sent once resume is finished
-		if err := daemon.containerd.Resume(container.ID); err != nil {
+		if err := daemon.containerd.Resume(context.Background(), container.ID); err != nil {
 			logrus.Warn("Cannot unpause container %s: %s", container.ID, err)
 		}
 	}
@@ -177,5 +178,5 @@ func (daemon *Daemon) killPossiblyDeadProcess(container *containerpkg.Container,
 }
 
 func (daemon *Daemon) kill(c *containerpkg.Container, sig int) error {
-	return daemon.containerd.Signal(c.ID, sig)
+	return daemon.containerd.SignalProcess(context.Background(), c.ID, libcontainerd.InitProcessName, sig)
 }

+ 1 - 1
daemon/logger/plugin_unix.go

@@ -6,8 +6,8 @@ import (
 	"context"
 	"io"
 
+	"github.com/containerd/fifo"
 	"github.com/pkg/errors"
-	"github.com/tonistiigi/fifo"
 	"golang.org/x/sys/unix"
 )
 

+ 114 - 87
daemon/monitor.go

@@ -1,6 +1,7 @@
 package daemon
 
 import (
+	"context"
 	"errors"
 	"fmt"
 	"runtime"
@@ -25,15 +26,15 @@ func (daemon *Daemon) setStateCounter(c *container.Container) {
 	}
 }
 
-// StateChanged updates daemon state changes from containerd
-func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
-	c := daemon.containers.Get(id)
-	if c == nil {
+// ProcessEvent is called by libcontainerd whenever an event occurs
+func (daemon *Daemon) ProcessEvent(id string, e libcontainerd.EventType, ei libcontainerd.EventInfo) error {
+	c, err := daemon.GetContainer(id)
+	if c == nil || err != nil {
 		return fmt.Errorf("no such container: %s", id)
 	}
 
-	switch e.State {
-	case libcontainerd.StateOOM:
+	switch e {
+	case libcontainerd.EventOOM:
 		// StateOOM is Linux specific and should never be hit on Windows
 		if runtime.GOOS == "windows" {
 			return errors.New("received StateOOM from libcontainerd on Windows. This should never happen")
@@ -43,63 +44,72 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
 			return err
 		}
 		daemon.LogContainerEvent(c, "oom")
-	case libcontainerd.StateExit:
+	case libcontainerd.EventExit:
+		if int(ei.Pid) == c.Pid {
+			_, _, err := daemon.containerd.DeleteTask(context.Background(), c.ID)
+			if err != nil {
+				logrus.WithError(err).Warnf("failed to delete container %s from containerd", c.ID)
+			}
 
-		c.Lock()
-		c.StreamConfig.Wait()
-		c.Reset(false)
-
-		// If daemon is being shutdown, don't let the container restart
-		restart, wait, err := c.RestartManager().ShouldRestart(e.ExitCode, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt))
-		if err == nil && restart {
-			c.RestartCount++
-			c.SetRestarting(platformConstructExitStatus(e))
-		} else {
-			c.SetStopped(platformConstructExitStatus(e))
-			defer daemon.autoRemove(c)
-		}
+			c.Lock()
+			c.StreamConfig.Wait()
+			c.Reset(false)
 
-		// cancel healthcheck here, they will be automatically
-		// restarted if/when the container is started again
-		daemon.stopHealthchecks(c)
-		attributes := map[string]string{
-			"exitCode": strconv.Itoa(int(e.ExitCode)),
-		}
-		daemon.LogContainerEventWithAttributes(c, "die", attributes)
-		daemon.Cleanup(c)
-
-		if err == nil && restart {
-			go func() {
-				err := <-wait
-				if err == nil {
-					// daemon.netController is initialized when daemon is restoring containers.
-					// But containerStart will use daemon.netController segment.
-					// So to avoid panic at startup process, here must wait util daemon restore done.
-					daemon.waitForStartupDone()
-					if err = daemon.containerStart(c, "", "", false); err != nil {
-						logrus.Debugf("failed to restart container: %+v", err)
+			exitStatus := container.ExitStatus{
+				ExitCode:  int(ei.ExitCode),
+				ExitedAt:  ei.ExitedAt,
+				OOMKilled: ei.OOMKilled,
+			}
+			restart, wait, err := c.RestartManager().ShouldRestart(ei.ExitCode, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt))
+			if err == nil && restart {
+				c.RestartCount++
+				c.SetRestarting(&exitStatus)
+			} else {
+				c.SetStopped(&exitStatus)
+				defer daemon.autoRemove(c)
+			}
+
+			// cancel healthcheck here, they will be automatically
+			// restarted if/when the container is started again
+			daemon.stopHealthchecks(c)
+			attributes := map[string]string{
+				"exitCode": strconv.Itoa(int(ei.ExitCode)),
+			}
+			daemon.LogContainerEventWithAttributes(c, "die", attributes)
+			daemon.Cleanup(c)
+
+			if err == nil && restart {
+				go func() {
+					err := <-wait
+					if err == nil {
+						// daemon.netController is initialized when daemon is restoring containers.
+						// But containerStart will use daemon.netController segment.
+						// So to avoid panic at startup process, here must wait util daemon restore done.
+						daemon.waitForStartupDone()
+						if err = daemon.containerStart(c, "", "", false); err != nil {
+							logrus.Debugf("failed to restart container: %+v", err)
+						}
 					}
-				}
-				if err != nil {
-					c.SetStopped(platformConstructExitStatus(e))
-					defer daemon.autoRemove(c)
-					if err != restartmanager.ErrRestartCanceled {
-						logrus.Errorf("restartmanger wait error: %+v", err)
+					if err != nil {
+						c.SetStopped(&exitStatus)
+						defer daemon.autoRemove(c)
+						if err != restartmanager.ErrRestartCanceled {
+							logrus.Errorf("restartmanger wait error: %+v", err)
+						}
 					}
-				}
-			}()
-		}
-
-		daemon.setStateCounter(c)
+				}()
+			}
 
-		defer c.Unlock()
-		if err := c.CheckpointTo(daemon.containersReplica); err != nil {
-			return err
+			daemon.setStateCounter(c)
+			defer c.Unlock()
+			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
+				return err
+			}
+			return daemon.postRunProcessing(c, ei)
 		}
-		return daemon.postRunProcessing(c, e)
-	case libcontainerd.StateExitProcess:
-		if execConfig := c.ExecCommands.Get(e.ProcessID); execConfig != nil {
-			ec := int(e.ExitCode)
+
+		if execConfig := c.ExecCommands.ByPid(int(ei.Pid)); execConfig != nil {
+			ec := int(ei.ExitCode)
 			execConfig.Lock()
 			defer execConfig.Unlock()
 			execConfig.ExitCode = &ec
@@ -111,42 +121,59 @@ func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
 
 			// remove the exec command from the container's store only and not the
 			// daemon's store so that the exec command can be inspected.
-			c.ExecCommands.Delete(execConfig.ID)
+			c.ExecCommands.Delete(execConfig.ID, execConfig.Pid)
 		} else {
-			logrus.Warnf("Ignoring StateExitProcess for %v but no exec command found", e)
+			logrus.WithFields(logrus.Fields{
+				"container": c.ID,
+				"exec-pid":  ei.Pid,
+			}).Warnf("Ignoring Exit Event, no such exec command found")
 		}
-	case libcontainerd.StateStart, libcontainerd.StateRestore:
-		// Container is already locked in this case
-		c.SetRunning(int(e.Pid), e.State == libcontainerd.StateStart)
-		c.HasBeenManuallyStopped = false
-		c.HasBeenStartedBefore = true
-		daemon.setStateCounter(c)
-
-		daemon.initHealthMonitor(c)
-		if err := c.CheckpointTo(daemon.containersReplica); err != nil {
-			c.Reset(false)
-			return err
+	case libcontainerd.EventStart:
+		c.Lock()
+		defer c.Unlock()
+
+		// This is here to handle start not generated by docker
+		if !c.Running {
+			c.SetRunning(int(ei.Pid), false)
+			c.HasBeenManuallyStopped = false
+			c.HasBeenStartedBefore = true
+			daemon.setStateCounter(c)
+
+			daemon.initHealthMonitor(c)
+
+			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
+				return err
+			}
+			daemon.LogContainerEvent(c, "start")
 		}
 
-		daemon.LogContainerEvent(c, "start")
-	case libcontainerd.StatePause:
-		// Container is already locked in this case
-		c.Paused = true
-		daemon.setStateCounter(c)
-		daemon.updateHealthMonitor(c)
-		if err := c.CheckpointTo(daemon.containersReplica); err != nil {
-			return err
+	case libcontainerd.EventPaused:
+		c.Lock()
+		defer c.Unlock()
+
+		if !c.Paused {
+			c.Paused = true
+			daemon.setStateCounter(c)
+			daemon.updateHealthMonitor(c)
+			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
+				return err
+			}
+			daemon.LogContainerEvent(c, "pause")
 		}
-		daemon.LogContainerEvent(c, "pause")
-	case libcontainerd.StateResume:
-		// Container is already locked in this case
-		c.Paused = false
-		daemon.setStateCounter(c)
-		daemon.updateHealthMonitor(c)
-		if err := c.CheckpointTo(daemon.containersReplica); err != nil {
-			return err
+	case libcontainerd.EventResumed:
+		c.Lock()
+		defer c.Unlock()
+
+		if c.Paused {
+			c.Paused = false
+			daemon.setStateCounter(c)
+			daemon.updateHealthMonitor(c)
+
+			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
+				return err
+			}
+			daemon.LogContainerEvent(c, "unpause")
 		}
-		daemon.LogContainerEvent(c, "unpause")
 	}
 	return nil
 }

+ 1 - 9
daemon/monitor_linux.go

@@ -5,15 +5,7 @@ import (
 	"github.com/docker/docker/libcontainerd"
 )
 
-// platformConstructExitStatus returns a platform specific exit status structure
-func platformConstructExitStatus(e libcontainerd.StateInfo) *container.ExitStatus {
-	return &container.ExitStatus{
-		ExitCode:  int(e.ExitCode),
-		OOMKilled: e.OOMKilled,
-	}
-}
-
 // postRunProcessing perfoms any processing needed on the container after it has stopped.
-func (daemon *Daemon) postRunProcessing(container *container.Container, e libcontainerd.StateInfo) error {
+func (daemon *Daemon) postRunProcessing(_ *container.Container, _ libcontainerd.EventInfo) error {
 	return nil
 }

+ 1 - 8
daemon/monitor_solaris.go

@@ -5,14 +5,7 @@ import (
 	"github.com/docker/docker/libcontainerd"
 )
 
-// platformConstructExitStatus returns a platform specific exit status structure
-func platformConstructExitStatus(e libcontainerd.StateInfo) *container.ExitStatus {
-	return &container.ExitStatus{
-		ExitCode: int(e.ExitCode),
-	}
-}
-
 // postRunProcessing perfoms any processing needed on the container after it has stopped.
-func (daemon *Daemon) postRunProcessing(container *container.Container, e libcontainerd.StateInfo) error {
+func (daemon *Daemon) postRunProcessing(_ *container.Container, _ libcontainerd.EventInfo) error {
 	return nil
 }

+ 31 - 19
daemon/monitor_windows.go

@@ -1,40 +1,52 @@
 package daemon
 
 import (
-	"fmt"
+	"context"
 
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/libcontainerd"
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
 )
 
-// platformConstructExitStatus returns a platform specific exit status structure
-func platformConstructExitStatus(e libcontainerd.StateInfo) *container.ExitStatus {
-	return &container.ExitStatus{
-		ExitCode: int(e.ExitCode),
-	}
-}
-
-// postRunProcessing perfoms any processing needed on the container after it has stopped.
-func (daemon *Daemon) postRunProcessing(container *container.Container, e libcontainerd.StateInfo) error {
-	if e.ExitCode == 0 && e.UpdatePending {
-		spec, err := daemon.createSpec(container)
+// postRunProcessing starts a servicing container if required
+func (daemon *Daemon) postRunProcessing(c *container.Container, ei libcontainerd.EventInfo) error {
+	if ei.ExitCode == 0 && ei.UpdatePending {
+		spec, err := daemon.createSpec(c)
 		if err != nil {
 			return err
 		}
-
 		// Turn on servicing
 		spec.Windows.Servicing = true
 
-		copts, err := daemon.getLibcontainerdCreateOptions(container)
+		copts, err := daemon.getLibcontainerdCreateOptions(c)
 		if err != nil {
 			return err
 		}
 
-		// Create a new servicing container, which will start, complete the update, and merge back the
-		// results if it succeeded, all as part of the below function call.
-		if err := daemon.containerd.Create((container.ID + "_servicing"), "", "", *spec, container.InitializeStdio, copts...); err != nil {
-			container.SetExitCode(-1)
-			return fmt.Errorf("Post-run update servicing failed: %s", err)
+		// Create a new servicing container, which will start, complete the
+		// update, and merge back the results if it succeeded, all as part of
+		// the below function call.
+		ctx := context.Background()
+		svcID := c.ID + "_servicing"
+		logger := logrus.WithField("container", svcID)
+		if err := daemon.containerd.Create(ctx, svcID, spec, copts); err != nil {
+			c.SetExitCode(-1)
+			return errors.Wrap(err, "post-run update servicing failed")
+		}
+		_, err = daemon.containerd.Start(ctx, svcID, "", false, nil)
+		if err != nil {
+			logger.WithError(err).Warn("failed to run servicing container")
+			if err := daemon.containerd.Delete(ctx, svcID); err != nil {
+				logger.WithError(err).Warn("failed to delete servicing container")
+			}
+		} else {
+			if _, _, err := daemon.containerd.DeleteTask(ctx, svcID); err != nil {
+				logger.WithError(err).Warn("failed to delete servicing container task")
+			}
+			if err := daemon.containerd.Delete(ctx, svcID); err != nil {
+				logger.WithError(err).Warn("failed to delete servicing container")
+			}
 		}
 	}
 	return nil

+ 5 - 4
daemon/oci_linux.go

@@ -156,7 +156,7 @@ func setDevices(s *specs.Spec, c *container.Container) error {
 	return nil
 }
 
-func setRlimits(daemon *Daemon, s *specs.Spec, c *container.Container) error {
+func (daemon *Daemon) setRlimits(s *specs.Spec, c *container.Container) error {
 	var rlimits []specs.POSIXRlimit
 
 	// We want to leave the original HostConfig alone so make a copy here
@@ -755,6 +755,7 @@ func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) {
 	if err := setResources(&s, c.HostConfig.Resources); err != nil {
 		return nil, fmt.Errorf("linux runtime spec resources: %v", err)
 	}
+	s.Process.OOMScoreAdj = &c.HostConfig.OomScoreAdj
 	s.Linux.Sysctl = c.HostConfig.Sysctls
 
 	p := s.Linux.CgroupsPath
@@ -763,11 +764,11 @@ func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) {
 		if err != nil {
 			return nil, err
 		}
-		p, _ = cgroups.GetOwnCgroup("cpu")
+		_, err = cgroups.GetOwnCgroup("cpu")
 		if err != nil {
 			return nil, err
 		}
-		p = filepath.Join(initPath, p)
+		p = filepath.Join(initPath, s.Linux.CgroupsPath)
 	}
 
 	// Clean path to guard against things like ../../../BAD
@@ -782,7 +783,7 @@ func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) {
 	if err := setDevices(&s, c); err != nil {
 		return nil, fmt.Errorf("linux runtime spec devices: %v", err)
 	}
-	if err := setRlimits(daemon, &s, c); err != nil {
+	if err := daemon.setRlimits(&s, c); err != nil {
 		return nil, fmt.Errorf("linux runtime spec rlimits: %v", err)
 	}
 	if err := setUser(&s, c); err != nil {

+ 13 - 2
daemon/pause.go

@@ -1,9 +1,11 @@
 package daemon
 
 import (
+	"context"
 	"fmt"
 
 	"github.com/docker/docker/container"
+	"github.com/sirupsen/logrus"
 )
 
 // ContainerPause pauses a container
@@ -33,7 +35,7 @@ func (daemon *Daemon) containerPause(container *container.Container) error {
 
 	// We cannot Pause the container which is already paused
 	if container.Paused {
-		return fmt.Errorf("Container %s is already paused", container.ID)
+		return errNotPaused(container.ID)
 	}
 
 	// We cannot Pause the container which is restarting
@@ -41,9 +43,18 @@ func (daemon *Daemon) containerPause(container *container.Container) error {
 		return errContainerIsRestarting(container.ID)
 	}
 
-	if err := daemon.containerd.Pause(container.ID); err != nil {
+	if err := daemon.containerd.Pause(context.Background(), container.ID); err != nil {
 		return fmt.Errorf("Cannot pause container %s: %s", container.ID, err)
 	}
 
+	container.Paused = true
+	daemon.setStateCounter(container)
+	daemon.updateHealthMonitor(container)
+	daemon.LogContainerEvent(container, "pause")
+
+	if err := container.CheckpointTo(daemon.containersReplica); err != nil {
+		logrus.WithError(err).Warn("could not save container to disk")
+	}
+
 	return nil
 }

+ 0 - 4
daemon/reload.go

@@ -6,7 +6,6 @@ import (
 
 	"github.com/docker/docker/daemon/config"
 	"github.com/docker/docker/daemon/discovery"
-	"github.com/docker/docker/libcontainerd"
 	"github.com/sirupsen/logrus"
 )
 
@@ -303,9 +302,6 @@ func (daemon *Daemon) reloadLiveRestore(conf *config.Config, attributes map[stri
 	// update corresponding configuration
 	if conf.IsValueSet("live-restore") {
 		daemon.configStore.LiveRestoreEnabled = conf.LiveRestoreEnabled
-		if err := daemon.containerdRemote.UpdateOptions(libcontainerd.WithLiveRestore(conf.LiveRestoreEnabled)); err != nil {
-			return err
-		}
 	}
 
 	// prepare reload event attributes with updatable configurations

+ 3 - 2
daemon/resize.go

@@ -1,6 +1,7 @@
 package daemon
 
 import (
+	"context"
 	"fmt"
 
 	"github.com/docker/docker/libcontainerd"
@@ -18,7 +19,7 @@ func (daemon *Daemon) ContainerResize(name string, height, width int) error {
 		return errNotRunning(container.ID)
 	}
 
-	if err = daemon.containerd.Resize(container.ID, libcontainerd.InitFriendlyName, width, height); err == nil {
+	if err = daemon.containerd.ResizeTerminal(context.Background(), container.ID, libcontainerd.InitProcessName, width, height); err == nil {
 		attributes := map[string]string{
 			"height": fmt.Sprintf("%d", height),
 			"width":  fmt.Sprintf("%d", width),
@@ -36,5 +37,5 @@ func (daemon *Daemon) ContainerExecResize(name string, height, width int) error
 	if err != nil {
 		return err
 	}
-	return daemon.containerd.Resize(ec.ContainerID, ec.ID, width, height)
+	return daemon.containerd.ResizeTerminal(context.Background(), ec.ContainerID, ec.ID, width, height)
 }

+ 48 - 9
daemon/start.go

@@ -1,6 +1,7 @@
 package daemon
 
 import (
+	"context"
 	"runtime"
 	"time"
 
@@ -113,6 +114,11 @@ func (daemon *Daemon) containerStart(container *container.Container, checkpoint
 		return stateConflictError{errors.New("container is marked for removal and cannot be started")}
 	}
 
+	if checkpointDir != "" {
+		// TODO(mlaventure): how would we support that?
+		return notAllowedError{errors.New("custom checkpointdir is not supported")}
+	}
+
 	// if we encounter an error during start we need to ensure that any other
 	// setup has been cleaned up properly
 	defer func() {
@@ -152,28 +158,56 @@ func (daemon *Daemon) containerStart(container *container.Container, checkpoint
 		return systemError{err}
 	}
 
-	createOptions, err := daemon.getLibcontainerdCreateOptions(container)
-	if err != nil {
-		return err
-	}
-
 	if resetRestartManager {
 		container.ResetRestartManager(true)
 	}
 
-	if checkpointDir == "" {
-		checkpointDir = container.CheckpointDir()
+	if daemon.saveApparmorConfig(container); err != nil {
+		return err
 	}
 
-	if daemon.saveApparmorConfig(container); err != nil {
+	if checkpoint != "" {
+		checkpointDir, err = getCheckpointDir(checkpointDir, checkpoint, container.Name, container.ID, container.CheckpointDir(), false)
+		if err != nil {
+			return err
+		}
+	}
+
+	createOptions, err := daemon.getLibcontainerdCreateOptions(container)
+	if err != nil {
 		return err
 	}
 
-	if err := daemon.containerd.Create(container.ID, checkpoint, checkpointDir, *spec, container.InitializeStdio, createOptions...); err != nil {
+	err = daemon.containerd.Create(context.Background(), container.ID, spec, createOptions)
+	if err != nil {
+		return translateContainerdStartErr(container.Path, container.SetExitCode, err)
+	}
+
+	// TODO(mlaventure): we need to specify checkpoint options here
+	pid, err := daemon.containerd.Start(context.Background(), container.ID, checkpointDir,
+		container.StreamConfig.Stdin() != nil || container.Config.Tty,
+		container.InitializeStdio)
+	if err != nil {
+		if err := daemon.containerd.Delete(context.Background(), container.ID); err != nil {
+			logrus.WithError(err).WithField("container", container.ID).
+				Error("failed to delete failed start container")
+		}
 		return translateContainerdStartErr(container.Path, container.SetExitCode, err)
+	}
+
+	container.SetRunning(pid, true)
+	container.HasBeenManuallyStopped = false
+	container.HasBeenStartedBefore = true
+	daemon.setStateCounter(container)
+
+	daemon.initHealthMonitor(container)
 
+	if err := container.CheckpointTo(daemon.containersReplica); err != nil {
+		logrus.WithError(err).WithField("container", container.ID).
+			Errorf("failed to store container")
 	}
 
+	daemon.LogContainerEvent(container, "start")
 	containerActions.WithValues("start").UpdateSince(start)
 
 	return nil
@@ -209,5 +243,10 @@ func (daemon *Daemon) Cleanup(container *container.Container) {
 			logrus.Warnf("%s cleanup: Failed to umount volumes: %v", container.ID, err)
 		}
 	}
+
 	container.CancelAttachContext()
+
+	if err := daemon.containerd.Delete(context.Background(), container.ID); err != nil {
+		logrus.Errorf("%s cleanup: failed to delete container from containerd: %v", container.ID, err)
+	}
 }

+ 35 - 10
daemon/start_unix.go

@@ -3,29 +3,54 @@
 package daemon
 
 import (
+	"fmt"
+	"os/exec"
+	"path/filepath"
+
+	"github.com/containerd/containerd/linux/runcopts"
 	"github.com/docker/docker/container"
-	"github.com/docker/docker/libcontainerd"
 	"github.com/pkg/errors"
 )
 
-// getLibcontainerdCreateOptions callers must hold a lock on the container
-func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) ([]libcontainerd.CreateOption, error) {
-	createOptions := []libcontainerd.CreateOption{}
+func (daemon *Daemon) getRuntimeScript(container *container.Container) (string, error) {
+	name := container.HostConfig.Runtime
+	rt := daemon.configStore.GetRuntime(name)
+	if rt == nil {
+		return "", validationError{errors.Errorf("no such runtime '%s'", name)}
+	}
 
+	if len(rt.Args) > 0 {
+		// First check that the target exist, as using it in a script won't
+		// give us the right error
+		if _, err := exec.LookPath(rt.Path); err != nil {
+			return "", translateContainerdStartErr(container.Path, container.SetExitCode, err)
+		}
+		return filepath.Join(daemon.configStore.Root, "runtimes", name), nil
+	}
+	return rt.Path, nil
+}
+
+// getLibcontainerdCreateOptions callers must hold a lock on the container
+func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) (interface{}, error) {
 	// Ensure a runtime has been assigned to this container
 	if container.HostConfig.Runtime == "" {
 		container.HostConfig.Runtime = daemon.configStore.GetDefaultRuntimeName()
 		container.CheckpointTo(daemon.containersReplica)
 	}
 
-	rt := daemon.configStore.GetRuntime(container.HostConfig.Runtime)
-	if rt == nil {
-		return nil, validationError{errors.Errorf("no such runtime '%s'", container.HostConfig.Runtime)}
+	path, err := daemon.getRuntimeScript(container)
+	if err != nil {
+		return nil, err
 	}
+	opts := &runcopts.RuncOptions{
+		Runtime: path,
+		RuntimeRoot: filepath.Join(daemon.configStore.ExecRoot,
+			fmt.Sprintf("runtime-%s", container.HostConfig.Runtime)),
+	}
+
 	if UsingSystemd(daemon.configStore) {
-		rt.Args = append(rt.Args, "--systemd-cgroup=true")
+		opts.SystemdCgroup = true
 	}
-	createOptions = append(createOptions, libcontainerd.WithRuntime(rt.Path, rt.Args))
 
-	return createOptions, nil
+	return opts, nil
 }

+ 4 - 9
daemon/start_windows.go

@@ -3,12 +3,9 @@ package daemon
 import (
 	"github.com/Microsoft/opengcs/client"
 	"github.com/docker/docker/container"
-	"github.com/docker/docker/libcontainerd"
 )
 
-func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) ([]libcontainerd.CreateOption, error) {
-	createOptions := []libcontainerd.CreateOption{}
-
+func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) (interface{}, error) {
 	// LCOW options.
 	if container.OS == "linux" {
 		config := &client.Config{}
@@ -33,11 +30,9 @@ func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Contain
 		if err := config.Validate(); err != nil {
 			return nil, err
 		}
-		lcowOpts := &libcontainerd.LCOWOption{
-			Config: config,
-		}
-		createOptions = append(createOptions, lcowOpts)
+
+		return config, nil
 	}
 
-	return createOptions, nil
+	return nil, nil
 }

+ 8 - 7
daemon/top_unix.go

@@ -3,6 +3,7 @@
 package daemon
 
 import (
+	"context"
 	"fmt"
 	"os/exec"
 	"regexp"
@@ -50,16 +51,16 @@ func appendProcess2ProcList(procList *container.ContainerTopOKBody, fields []str
 	procList.Processes = append(procList.Processes, process)
 }
 
-func hasPid(pids []int, pid int) bool {
-	for _, i := range pids {
-		if i == pid {
+func hasPid(procs []uint32, pid int) bool {
+	for _, p := range procs {
+		if int(p) == pid {
 			return true
 		}
 	}
 	return false
 }
 
-func parsePSOutput(output []byte, pids []int) (*container.ContainerTopOKBody, error) {
+func parsePSOutput(output []byte, procs []uint32) (*container.ContainerTopOKBody, error) {
 	procList := &container.ContainerTopOKBody{}
 
 	lines := strings.Split(string(output), "\n")
@@ -101,7 +102,7 @@ func parsePSOutput(output []byte, pids []int) (*container.ContainerTopOKBody, er
 			return nil, fmt.Errorf("Unexpected pid '%s': %s", fields[pidIndex], err)
 		}
 
-		if hasPid(pids, p) {
+		if hasPid(procs, p) {
 			preContainedPidFlag = true
 			appendProcess2ProcList(procList, fields)
 			continue
@@ -138,7 +139,7 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*container.Conta
 		return nil, errContainerIsRestarting(container.ID)
 	}
 
-	pids, err := daemon.containerd.GetPidsForContainer(container.ID)
+	procs, err := daemon.containerd.ListPids(context.Background(), container.ID)
 	if err != nil {
 		return nil, err
 	}
@@ -147,7 +148,7 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*container.Conta
 	if err != nil {
 		return nil, fmt.Errorf("Error running ps: %v", err)
 	}
-	procList, err := parsePSOutput(output, pids)
+	procList, err := parsePSOutput(output, procs)
 	if err != nil {
 		return nil, err
 	}

+ 5 - 5
daemon/top_unix_test.go

@@ -36,7 +36,7 @@ func TestContainerTopValidatePSArgs(t *testing.T) {
 func TestContainerTopParsePSOutput(t *testing.T) {
 	tests := []struct {
 		output      []byte
-		pids        []int
+		pids        []uint32
 		errExpected bool
 	}{
 		{[]byte(`  PID COMMAND
@@ -44,26 +44,26 @@ func TestContainerTopParsePSOutput(t *testing.T) {
    43 bar
 		- -
   100 baz
-`), []int{42, 43}, false},
+`), []uint32{42, 43}, false},
 		{[]byte(`  UID COMMAND
    42 foo
    43 bar
 		- -
   100 baz
-`), []int{42, 43}, true},
+`), []uint32{42, 43}, true},
 		// unicode space (U+2003, 0xe2 0x80 0x83)
 		{[]byte(` PID COMMAND
    42 foo
    43 bar
 		- -
   100 baz
-`), []int{42, 43}, true},
+`), []uint32{42, 43}, true},
 		// the first space is U+2003, the second one is ascii.
 		{[]byte(` PID COMMAND
    42 foo
    43 bar
   100 baz
-`), []int{42, 43}, true},
+`), []uint32{42, 43}, true},
 	}
 
 	for _, f := range tests {

+ 11 - 1
daemon/top_windows.go

@@ -1,6 +1,7 @@
 package daemon
 
 import (
+	"context"
 	"errors"
 	"fmt"
 	"time"
@@ -34,7 +35,15 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*containertypes.
 		return nil, err
 	}
 
-	s, err := daemon.containerd.Summary(container.ID)
+	if !container.IsRunning() {
+		return nil, errNotRunning(container.ID)
+	}
+
+	if container.IsRestarting() {
+		return nil, errContainerIsRestarting(container.ID)
+	}
+
+	s, err := daemon.containerd.Summary(context.Background(), container.ID)
 	if err != nil {
 		return nil, err
 	}
@@ -49,5 +58,6 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*containertypes.
 			fmt.Sprintf("%02d:%02d:%02d.%03d", int(d.Hours()), int(d.Minutes())%60, int(d.Seconds())%60, int(d.Nanoseconds()/1000000)%1000),
 			units.HumanSize(float64(j.MemoryWorkingSetPrivateBytes))})
 	}
+
 	return procList, nil
 }

+ 12 - 1
daemon/unpause.go

@@ -1,9 +1,11 @@
 package daemon
 
 import (
+	"context"
 	"fmt"
 
 	"github.com/docker/docker/container"
+	"github.com/sirupsen/logrus"
 )
 
 // ContainerUnpause unpauses a container
@@ -30,9 +32,18 @@ func (daemon *Daemon) containerUnpause(container *container.Container) error {
 		return fmt.Errorf("Container %s is not paused", container.ID)
 	}
 
-	if err := daemon.containerd.Resume(container.ID); err != nil {
+	if err := daemon.containerd.Resume(context.Background(), container.ID); err != nil {
 		return fmt.Errorf("Cannot unpause container %s: %s", container.ID, err)
 	}
 
+	container.Paused = false
+	daemon.setStateCounter(container)
+	daemon.updateHealthMonitor(container)
+	daemon.LogContainerEvent(container, "unpause")
+
+	if err := container.CheckpointTo(daemon.containersReplica); err != nil {
+		logrus.WithError(err).Warnf("could not save container to disk")
+	}
+
 	return nil
 }

+ 2 - 1
daemon/update.go

@@ -1,6 +1,7 @@
 package daemon
 
 import (
+	"context"
 	"fmt"
 
 	"github.com/docker/docker/api/types/container"
@@ -76,7 +77,7 @@ func (daemon *Daemon) update(name string, hostConfig *container.HostConfig) erro
 	// If container is running (including paused), we need to update configs
 	// to the real world.
 	if container.IsRunning() && !container.IsRestarting() {
-		if err := daemon.containerd.UpdateResources(container.ID, toContainerdResources(hostConfig.Resources)); err != nil {
+		if err := daemon.containerd.UpdateResources(context.Background(), container.ID, toContainerdResources(hostConfig.Resources)); err != nil {
 			restoreConfig = true
 			// TODO: it would be nice if containerd responded with better errors here so we can classify this better.
 			return errCannotUpdate(container.ID, systemError{err})

+ 32 - 15
daemon/update_linux.go

@@ -7,26 +7,43 @@ import (
 
 	"github.com/docker/docker/api/types/container"
 	"github.com/docker/docker/libcontainerd"
+	specs "github.com/opencontainers/runtime-spec/specs-go"
 )
 
-func toContainerdResources(resources container.Resources) libcontainerd.Resources {
+func toContainerdResources(resources container.Resources) *libcontainerd.Resources {
 	var r libcontainerd.Resources
-	r.BlkioWeight = uint64(resources.BlkioWeight)
-	r.CpuShares = uint64(resources.CPUShares)
+
+	r.BlockIO = &specs.LinuxBlockIO{
+		Weight: &resources.BlkioWeight,
+	}
+
+	shares := uint64(resources.CPUShares)
+	r.CPU = &specs.LinuxCPU{
+		Shares: &shares,
+		Cpus:   resources.CpusetCpus,
+		Mems:   resources.CpusetMems,
+	}
+
+	var (
+		period uint64
+		quota  int64
+	)
 	if resources.NanoCPUs != 0 {
-		r.CpuPeriod = uint64(100 * time.Millisecond / time.Microsecond)
-		r.CpuQuota = uint64(resources.NanoCPUs) * r.CpuPeriod / 1e9
-	} else {
-		r.CpuPeriod = uint64(resources.CPUPeriod)
-		r.CpuQuota = uint64(resources.CPUQuota)
+		period = uint64(100 * time.Millisecond / time.Microsecond)
+		quota = resources.NanoCPUs * int64(period) / 1e9
 	}
-	r.CpusetCpus = resources.CpusetCpus
-	r.CpusetMems = resources.CpusetMems
-	r.MemoryLimit = uint64(resources.Memory)
+	r.CPU.Period = &period
+	r.CPU.Quota = &quota
+
+	r.Memory = &specs.LinuxMemory{
+		Limit:       &resources.Memory,
+		Reservation: &resources.MemoryReservation,
+		Kernel:      &resources.KernelMemory,
+	}
+
 	if resources.MemorySwap > 0 {
-		r.MemorySwap = uint64(resources.MemorySwap)
+		r.Memory.Swap = &resources.MemorySwap
 	}
-	r.MemoryReservation = uint64(resources.MemoryReservation)
-	r.KernelMemoryLimit = uint64(resources.KernelMemory)
-	return r
+
+	return &r
 }

+ 3 - 3
daemon/update_windows.go

@@ -7,7 +7,7 @@ import (
 	"github.com/docker/docker/libcontainerd"
 )
 
-func toContainerdResources(resources container.Resources) libcontainerd.Resources {
-	var r libcontainerd.Resources
-	return r
+func toContainerdResources(resources container.Resources) *libcontainerd.Resources {
+	// We don't support update, so do nothing
+	return nil
 }

+ 3 - 3
hack/make/.go-autogen

@@ -17,6 +17,7 @@ const (
 	Version            string = "$VERSION"
 	BuildTime          string = "$BUILDTIME"
 	IAmStatic          string = "${IAMSTATIC:-true}"
+	ContainerdCommitID string = "${CONTAINERD_COMMIT}"
 )
 
 // AUTOGENERATED FILE; see /go/src/github.com/docker/docker/hack/make/.go-autogen
@@ -31,9 +32,8 @@ package dockerversion
 // Default build-time variable for library-import.
 // This file is overridden on build with build-time informations.
 const (
-	ContainerdCommitID string = "${CONTAINERD_COMMIT}"
-	RuncCommitID       string = "${RUNC_COMMIT}"
-	InitCommitID       string = "${TINI_COMMIT}"
+	RuncCommitID string = "${RUNC_COMMIT}"
+	InitCommitID string = "${TINI_COMMIT}"
 )
 
 // AUTOGENERATED FILE; see /go/src/github.com/docker/docker/hack/make/.go-autogen

+ 3 - 1
integration-cli/daemon/daemon.go

@@ -222,7 +222,7 @@ func (d *Daemon) StartWithLogFile(out *os.File, providedArgs ...string) error {
 		return errors.Wrapf(err, "[%s] could not find docker binary in $PATH", d.id)
 	}
 	args := append(d.GlobalFlags,
-		"--containerd", "/var/run/docker/libcontainerd/docker-containerd.sock",
+		"--containerd", "/var/run/docker/containerd/docker-containerd.sock",
 		"--data-root", d.Root,
 		"--exec-root", d.execRoot,
 		"--pidfile", fmt.Sprintf("%s/docker.pid", d.Folder),
@@ -457,6 +457,8 @@ out2:
 		return err
 	}
 
+	d.cmd.Wait()
+
 	if err := os.Remove(fmt.Sprintf("%s/docker.pid", d.Folder)); err != nil {
 		return err
 	}

+ 1 - 1
integration-cli/docker_api_stats_test.go

@@ -285,7 +285,7 @@ func (s *DockerSuite) TestAPIStatsNoStreamConnectedContainers(c *check.C) {
 	id2 := strings.TrimSpace(out2)
 	c.Assert(waitRun(id2), checker.IsNil)
 
-	ch := make(chan error)
+	ch := make(chan error, 1)
 	go func() {
 		resp, body, err := request.Get(fmt.Sprintf("/containers/%s/stats?stream=false", id2))
 		defer body.Close()

+ 4 - 1
integration-cli/docker_cli_attach_test.go

@@ -147,7 +147,10 @@ func (s *DockerSuite) TestAttachDisconnect(c *check.C) {
 	c.Assert(err, check.IsNil)
 	defer stdout.Close()
 	c.Assert(cmd.Start(), check.IsNil)
-	defer cmd.Process.Kill()
+	defer func() {
+		cmd.Process.Kill()
+		cmd.Wait()
+	}()
 
 	_, err = stdin.Write([]byte("hello\n"))
 	c.Assert(err, check.IsNil)

+ 5 - 0
integration-cli/docker_cli_build_unix_test.go

@@ -149,6 +149,11 @@ func (s *DockerSuite) TestBuildCancellationKillsSleep(c *check.C) {
 	if err := buildCmd.Start(); err != nil {
 		c.Fatalf("failed to run build: %s", err)
 	}
+	// always clean up
+	defer func() {
+		buildCmd.Process.Kill()
+		buildCmd.Wait()
+	}()
 
 	matchCID := regexp.MustCompile("Running in (.+)")
 	scanner := bufio.NewScanner(stdoutBuild)

+ 8 - 6
integration-cli/docker_cli_daemon_test.go

@@ -28,6 +28,7 @@ import (
 	"github.com/docker/docker/api"
 	"github.com/docker/docker/api/types"
 	"github.com/docker/docker/client"
+	moby_daemon "github.com/docker/docker/daemon"
 	"github.com/docker/docker/integration-cli/checker"
 	"github.com/docker/docker/integration-cli/cli"
 	"github.com/docker/docker/integration-cli/daemon"
@@ -1448,7 +1449,8 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonAndContainerKill(c *chec
 	c.Assert(strings.Contains(string(mountOut), id), check.Equals, true, comment)
 
 	// kill the container
-	icmd.RunCommand(ctrBinary, "--address", "unix:///var/run/docker/libcontainerd/docker-containerd.sock", "containers", "kill", id).Assert(c, icmd.Success)
+	icmd.RunCommand(ctrBinary, "--address", "/var/run/docker/containerd/docker-containerd.sock",
+		"--namespace", moby_daemon.MainNamespace, "tasks", "kill", id).Assert(c, icmd.Success)
 
 	// restart daemon.
 	d.Restart(c)
@@ -1987,7 +1989,6 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithNames(c *check.C) {
 
 // TestDaemonRestartWithKilledRunningContainer requires live restore of running containers
 func (s *DockerDaemonSuite) TestDaemonRestartWithKilledRunningContainer(t *check.C) {
-	// TODO(mlaventure): Not sure what would the exit code be on windows
 	testRequires(t, DaemonIsLinux)
 	s.d.StartWithBusybox(t)
 
@@ -2008,7 +2009,8 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithKilledRunningContainer(t *check
 	}
 
 	// kill the container
-	icmd.RunCommand(ctrBinary, "--address", "unix:///var/run/docker/libcontainerd/docker-containerd.sock", "containers", "kill", cid).Assert(t, icmd.Success)
+	icmd.RunCommand(ctrBinary, "--address", "/var/run/docker/containerd/docker-containerd.sock",
+		"--namespace", moby_daemon.MainNamespace, "tasks", "kill", cid).Assert(t, icmd.Success)
 
 	// Give time to containerd to process the command if we don't
 	// the exit event might be received after we do the inspect
@@ -2076,7 +2078,6 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonCrash(c *check.C) {
 
 // TestDaemonRestartWithUnpausedRunningContainer requires live restore of running containers.
 func (s *DockerDaemonSuite) TestDaemonRestartWithUnpausedRunningContainer(t *check.C) {
-	// TODO(mlaventure): Not sure what would the exit code be on windows
 	testRequires(t, DaemonIsLinux)
 	s.d.StartWithBusybox(t, "--live-restore")
 
@@ -2103,8 +2104,9 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithUnpausedRunningContainer(t *che
 	// resume the container
 	result := icmd.RunCommand(
 		ctrBinary,
-		"--address", "unix:///var/run/docker/libcontainerd/docker-containerd.sock",
-		"containers", "resume", cid)
+		"--address", "/var/run/docker/containerd/docker-containerd.sock",
+		"--namespace", moby_daemon.MainNamespace,
+		"tasks", "resume", cid)
 	result.Assert(t, icmd.Success)
 
 	// Give time to containerd to process the command if we don't

+ 11 - 5
integration-cli/docker_cli_events_test.go

@@ -86,6 +86,7 @@ func (s *DockerSuite) TestEventsLimit(c *check.C) {
 	// timeouts creating so many containers simultaneously. This is a due to
 	// a bug in the Windows platform. It will be fixed in a Windows Update.
 	numContainers := 17
+	eventPerContainer := 7 // create, attach, network connect, start, die, network disconnect, destroy
 	numConcurrentContainers := numContainers
 	if testEnv.DaemonPlatform() == "windows" {
 		numConcurrentContainers = 4
@@ -93,17 +94,19 @@ func (s *DockerSuite) TestEventsLimit(c *check.C) {
 	sem := make(chan bool, numConcurrentContainers)
 	errChan := make(chan error, numContainers)
 
+	startTime := daemonUnixTime(c)
+
 	args := []string{"run", "--rm", "busybox", "true"}
 	for i := 0; i < numContainers; i++ {
 		sem <- true
-		go func() {
+		go func(i int) {
 			defer func() { <-sem }()
 			out, err := exec.Command(dockerBinary, args...).CombinedOutput()
 			if err != nil {
 				err = fmt.Errorf("%v: %s", err, string(out))
 			}
 			errChan <- err
-		}()
+		}(i)
 	}
 
 	// Wait for all goroutines to finish
@@ -116,10 +119,10 @@ func (s *DockerSuite) TestEventsLimit(c *check.C) {
 		c.Assert(err, checker.IsNil, check.Commentf("%q failed with error", strings.Join(args, " ")))
 	}
 
-	out, _ := dockerCmd(c, "events", "--since=0", "--until", daemonUnixTime(c))
+	out, _ := dockerCmd(c, "events", "--since="+startTime, "--until", daemonUnixTime(c))
 	events := strings.Split(out, "\n")
 	nEvents := len(events) - 1
-	c.Assert(nEvents, checker.Equals, 256, check.Commentf("events should be limited to 256, but received %d", nEvents))
+	c.Assert(nEvents, checker.Equals, numContainers*eventPerContainer, check.Commentf("events should be limited to 256, but received %d", nEvents))
 }
 
 func (s *DockerSuite) TestEventsContainerEvents(c *check.C) {
@@ -533,7 +536,10 @@ func (s *DockerSuite) TestEventsAttach(c *check.C) {
 	c.Assert(err, checker.IsNil)
 	defer stdout.Close()
 	c.Assert(cmd.Start(), checker.IsNil)
-	defer cmd.Process.Kill()
+	defer func() {
+		cmd.Process.Kill()
+		cmd.Wait()
+	}()
 
 	// Make sure we're done attaching by writing/reading some stuff
 	_, err = stdin.Write([]byte("hello\n"))

+ 1 - 0
integration-cli/docker_cli_logs_test.go

@@ -230,6 +230,7 @@ func (s *DockerSuite) TestLogsFollowSlowStdoutConsumer(c *check.C) {
 	stdout, err := logCmd.StdoutPipe()
 	c.Assert(err, checker.IsNil)
 	c.Assert(logCmd.Start(), checker.IsNil)
+	defer func() { go logCmd.Wait() }()
 
 	// First read slowly
 	bytes1, err := ConsumeWithSpeed(stdout, 10, 50*time.Millisecond, stopSlowRead)

+ 1 - 0
integration-cli/docker_cli_network_unix_test.go

@@ -1625,6 +1625,7 @@ func (s *DockerSuite) TestEmbeddedDNSInvalidInput(c *check.C) {
 func (s *DockerSuite) TestDockerNetworkConnectFailsNoInspectChange(c *check.C) {
 	dockerCmd(c, "run", "-d", "--name=bb", "busybox", "top")
 	c.Assert(waitRun("bb"), check.IsNil)
+	defer dockerCmd(c, "stop", "bb")
 
 	ns0 := inspectField(c, "bb", "NetworkSettings.Networks.bridge")
 

+ 1 - 0
integration-cli/docker_cli_run_test.go

@@ -2249,6 +2249,7 @@ func (s *DockerSuite) TestRunSlowStdoutConsumer(c *check.C) {
 	if err := cont.Start(); err != nil {
 		c.Fatal(err)
 	}
+	defer func() { go cont.Wait() }()
 	n, err := ConsumeWithSpeed(stdout, 10000, 5*time.Millisecond, nil)
 	if err != nil {
 		c.Fatal(err)

+ 2 - 0
integration-cli/docker_deprecated_api_v124_test.go

@@ -206,8 +206,10 @@ func (s *DockerSuite) TestDeprecatedPostContainersStartWithLinksInHostConfigIdLi
 	testRequires(c, DaemonIsLinux)
 	name := "test-host-config-links"
 	out, _ := dockerCmd(c, "run", "--name", "link0", "-d", "busybox", "top")
+	defer dockerCmd(c, "stop", "link0")
 	id := strings.TrimSpace(out)
 	dockerCmd(c, "create", "--name", name, "--link", id, "busybox", "top")
+	defer dockerCmd(c, "stop", name)
 
 	hc := inspectFieldJSON(c, name, "HostConfig")
 	config := `{"HostConfig":` + hc + `}`

+ 1 - 1
integration-cli/events_utils_test.go

@@ -69,7 +69,7 @@ func (e *eventObserver) Start() error {
 // Stop stops the events command.
 func (e *eventObserver) Stop() {
 	e.command.Process.Kill()
-	e.command.Process.Release()
+	e.command.Wait()
 }
 
 // Match tries to match the events output with a given matcher.

+ 10 - 2
integration/service/create_test.go

@@ -1,6 +1,7 @@
 package service
 
 import (
+	"runtime"
 	"testing"
 	"time"
 
@@ -42,8 +43,15 @@ func TestCreateWithLBSandbox(t *testing.T) {
 	})
 	require.NoError(t, err)
 
+	pollSettings := func(config *poll.Settings) {
+		if runtime.GOARCH == "arm" {
+			config.Timeout = 30 * time.Second
+			config.Delay = 100 * time.Millisecond
+		}
+	}
+
 	serviceID := serviceResp.ID
-	poll.WaitOn(t, serviceRunningTasksCount(client, serviceID, instances))
+	poll.WaitOn(t, serviceRunningTasksCount(client, serviceID, instances), pollSettings)
 
 	_, _, err = client.ServiceInspectWithRaw(context.Background(), serviceID, types.ServiceInspectOptions{})
 	require.NoError(t, err)
@@ -55,7 +63,7 @@ func TestCreateWithLBSandbox(t *testing.T) {
 	err = client.ServiceRemove(context.Background(), serviceID)
 	require.NoError(t, err)
 
-	poll.WaitOn(t, serviceIsRemoved(client, serviceID))
+	poll.WaitOn(t, serviceIsRemoved(client, serviceID), pollSettings)
 	err = client.NetworkRemove(context.Background(), overlayID)
 	require.NoError(t, err)
 

+ 0 - 46
libcontainerd/client.go

@@ -1,46 +0,0 @@
-package libcontainerd
-
-import (
-	"fmt"
-	"sync"
-
-	"github.com/docker/docker/pkg/locker"
-)
-
-// clientCommon contains the platform agnostic fields used in the client structure
-type clientCommon struct {
-	backend    Backend
-	containers map[string]*container
-	locker     *locker.Locker
-	mapMutex   sync.RWMutex // protects read/write operations from containers map
-}
-
-func (clnt *client) lock(containerID string) {
-	clnt.locker.Lock(containerID)
-}
-
-func (clnt *client) unlock(containerID string) {
-	clnt.locker.Unlock(containerID)
-}
-
-// must hold a lock for cont.containerID
-func (clnt *client) appendContainer(cont *container) {
-	clnt.mapMutex.Lock()
-	clnt.containers[cont.containerID] = cont
-	clnt.mapMutex.Unlock()
-}
-func (clnt *client) deleteContainer(containerID string) {
-	clnt.mapMutex.Lock()
-	delete(clnt.containers, containerID)
-	clnt.mapMutex.Unlock()
-}
-
-func (clnt *client) getContainer(containerID string) (*container, error) {
-	clnt.mapMutex.RLock()
-	container, ok := clnt.containers[containerID]
-	defer clnt.mapMutex.RUnlock()
-	if !ok {
-		return nil, fmt.Errorf("invalid container: %s", containerID) // fixme: typed error
-	}
-	return container, nil
-}

+ 802 - 0
libcontainerd/client_daemon.go

@@ -0,0 +1,802 @@
+// +build !windows
+
+package libcontainerd
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"reflect"
+	"runtime"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+
+	"google.golang.org/grpc"
+
+	"github.com/containerd/containerd"
+	eventsapi "github.com/containerd/containerd/api/services/events/v1"
+	"github.com/containerd/containerd/api/types"
+	"github.com/containerd/containerd/archive"
+	"github.com/containerd/containerd/content"
+	"github.com/containerd/containerd/images"
+	"github.com/containerd/containerd/linux/runcopts"
+	"github.com/containerd/typeurl"
+	"github.com/docker/docker/pkg/ioutils"
+	"github.com/opencontainers/image-spec/specs-go/v1"
+	"github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
+)
+
+// InitProcessName is the name given to the first process of a
+// container
+const InitProcessName = "init"
+
+type container struct {
+	sync.Mutex
+
+	bundleDir string
+	ctr       containerd.Container
+	task      containerd.Task
+	execs     map[string]containerd.Process
+	oomKilled bool
+}
+
+type client struct {
+	sync.RWMutex // protects containers map
+
+	remote   *containerd.Client
+	stateDir string
+	logger   *logrus.Entry
+
+	namespace  string
+	backend    Backend
+	eventQ     queue
+	containers map[string]*container
+}
+
+func (c *client) Restore(ctx context.Context, id string, attachStdio StdioCallback) (alive bool, pid int, err error) {
+	c.Lock()
+	defer c.Unlock()
+
+	var cio containerd.IO
+	defer func() {
+		err = wrapError(err)
+	}()
+
+	ctr, err := c.remote.LoadContainer(ctx, id)
+	if err != nil {
+		return false, -1, errors.WithStack(err)
+	}
+
+	defer func() {
+		if err != nil && cio != nil {
+			cio.Cancel()
+			cio.Close()
+		}
+	}()
+
+	t, err := ctr.Task(ctx, func(fifos *containerd.FIFOSet) (containerd.IO, error) {
+		io, err := newIOPipe(fifos)
+		if err != nil {
+			return nil, err
+		}
+
+		cio, err = attachStdio(io)
+		return cio, err
+	})
+	if err != nil && !strings.Contains(err.Error(), "no running task found") {
+		return false, -1, err
+	}
+
+	if t != nil {
+		s, err := t.Status(ctx)
+		if err != nil {
+			return false, -1, err
+		}
+
+		alive = s.Status != containerd.Stopped
+		pid = int(t.Pid())
+	}
+	c.containers[id] = &container{
+		bundleDir: filepath.Join(c.stateDir, id),
+		ctr:       ctr,
+		task:      t,
+		// TODO(mlaventure): load execs
+	}
+
+	c.logger.WithFields(logrus.Fields{
+		"container": id,
+		"alive":     alive,
+		"pid":       pid,
+	}).Debug("restored container")
+
+	return alive, pid, nil
+}
+
+func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, runtimeOptions interface{}) error {
+	if ctr := c.getContainer(id); ctr != nil {
+		return errors.WithStack(newConflictError("id already in use"))
+	}
+
+	bdir, err := prepareBundleDir(filepath.Join(c.stateDir, id), ociSpec)
+	if err != nil {
+		return wrapSystemError(errors.Wrap(err, "prepare bundle dir failed"))
+	}
+
+	c.logger.WithField("bundle", bdir).WithField("root", ociSpec.Root.Path).Debug("bundle dir created")
+
+	cdCtr, err := c.remote.NewContainer(ctx, id,
+		containerd.WithSpec(ociSpec),
+		// TODO(mlaventure): when containerd support lcow, revisit runtime value
+		containerd.WithRuntime(fmt.Sprintf("io.containerd.runtime.v1.%s", runtime.GOOS), runtimeOptions))
+	if err != nil {
+		return err
+	}
+
+	c.Lock()
+	c.containers[id] = &container{
+		bundleDir: bdir,
+		ctr:       cdCtr,
+	}
+	c.Unlock()
+
+	return nil
+}
+
+// Start create and start a task for the specified containerd id
+func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin bool, attachStdio StdioCallback) (int, error) {
+	ctr := c.getContainer(id)
+	switch {
+	case ctr == nil:
+		return -1, errors.WithStack(newNotFoundError("no such container"))
+	case ctr.task != nil:
+		return -1, errors.WithStack(newConflictError("container already started"))
+	}
+
+	var (
+		cp             *types.Descriptor
+		t              containerd.Task
+		cio            containerd.IO
+		err            error
+		stdinCloseSync = make(chan struct{})
+	)
+
+	if checkpointDir != "" {
+		// write checkpoint to the content store
+		tar := archive.Diff(ctx, "", checkpointDir)
+		cp, err = c.writeContent(ctx, images.MediaTypeContainerd1Checkpoint, checkpointDir, tar)
+		// remove the checkpoint when we're done
+		defer func() {
+			if cp != nil {
+				err := c.remote.ContentStore().Delete(context.Background(), cp.Digest)
+				if err != nil {
+					c.logger.WithError(err).WithFields(logrus.Fields{
+						"ref":    checkpointDir,
+						"digest": cp.Digest,
+					}).Warnf("failed to delete temporary checkpoint entry")
+				}
+			}
+		}()
+		if err := tar.Close(); err != nil {
+			return -1, errors.Wrap(err, "failed to close checkpoint tar stream")
+		}
+		if err != nil {
+			return -1, errors.Wrapf(err, "failed to upload checkpoint to containerd")
+		}
+	}
+
+	spec, err := ctr.ctr.Spec(ctx)
+	if err != nil {
+		return -1, errors.Wrap(err, "failed to retrieve spec")
+	}
+	uid, gid := getSpecUser(spec)
+	t, err = ctr.ctr.NewTask(ctx,
+		func(id string) (containerd.IO, error) {
+			cio, err = c.createIO(ctr.bundleDir, id, InitProcessName, stdinCloseSync, withStdin, spec.Process.Terminal, attachStdio)
+			return cio, err
+		},
+		func(_ context.Context, _ *containerd.Client, info *containerd.TaskInfo) error {
+			info.Checkpoint = cp
+			info.Options = &runcopts.CreateOptions{
+				IoUid: uint32(uid),
+				IoGid: uint32(gid),
+			}
+			return nil
+		})
+	if err != nil {
+		close(stdinCloseSync)
+		if cio != nil {
+			cio.Cancel()
+			cio.Close()
+		}
+		return -1, err
+	}
+
+	c.Lock()
+	c.containers[id].task = t
+	c.Unlock()
+
+	// Signal c.createIO that it can call CloseIO
+	close(stdinCloseSync)
+
+	if err := t.Start(ctx); err != nil {
+		if _, err := t.Delete(ctx); err != nil {
+			c.logger.WithError(err).WithField("container", id).
+				Error("failed to delete task after fail start")
+		}
+		c.Lock()
+		c.containers[id].task = nil
+		c.Unlock()
+		return -1, err
+	}
+
+	return int(t.Pid()), nil
+}
+
+func (c *client) Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio StdioCallback) (int, error) {
+	ctr := c.getContainer(containerID)
+	switch {
+	case ctr == nil:
+		return -1, errors.WithStack(newNotFoundError("no such container"))
+	case ctr.task == nil:
+		return -1, errors.WithStack(newInvalidParameterError("container is not running"))
+	case ctr.execs != nil && ctr.execs[processID] != nil:
+		return -1, errors.WithStack(newConflictError("id already in use"))
+	}
+
+	var (
+		p              containerd.Process
+		cio            containerd.IO
+		err            error
+		stdinCloseSync = make(chan struct{})
+	)
+	defer func() {
+		if err != nil {
+			if cio != nil {
+				cio.Cancel()
+				cio.Close()
+			}
+		}
+	}()
+
+	p, err = ctr.task.Exec(ctx, processID, spec, func(id string) (containerd.IO, error) {
+		cio, err = c.createIO(ctr.bundleDir, containerID, processID, stdinCloseSync, withStdin, spec.Terminal, attachStdio)
+		return cio, err
+	})
+	if err != nil {
+		close(stdinCloseSync)
+		if cio != nil {
+			cio.Cancel()
+			cio.Close()
+		}
+		return -1, err
+	}
+
+	ctr.Lock()
+	if ctr.execs == nil {
+		ctr.execs = make(map[string]containerd.Process)
+	}
+	ctr.execs[processID] = p
+	ctr.Unlock()
+
+	// Signal c.createIO that it can call CloseIO
+	close(stdinCloseSync)
+
+	if err = p.Start(ctx); err != nil {
+		p.Delete(context.Background())
+		ctr.Lock()
+		delete(ctr.execs, processID)
+		ctr.Unlock()
+		return -1, err
+	}
+
+	return int(p.Pid()), nil
+}
+
+func (c *client) SignalProcess(ctx context.Context, containerID, processID string, signal int) error {
+	p, err := c.getProcess(containerID, processID)
+	if err != nil {
+		return err
+	}
+	return p.Kill(ctx, syscall.Signal(signal))
+}
+
+func (c *client) ResizeTerminal(ctx context.Context, containerID, processID string, width, height int) error {
+	p, err := c.getProcess(containerID, processID)
+	if err != nil {
+		return err
+	}
+
+	return p.Resize(ctx, uint32(width), uint32(height))
+}
+
+func (c *client) CloseStdin(ctx context.Context, containerID, processID string) error {
+	p, err := c.getProcess(containerID, processID)
+	if err != nil {
+		return err
+	}
+
+	return p.CloseIO(ctx, containerd.WithStdinCloser)
+}
+
+func (c *client) Pause(ctx context.Context, containerID string) error {
+	p, err := c.getProcess(containerID, InitProcessName)
+	if err != nil {
+		return err
+	}
+
+	return p.(containerd.Task).Pause(ctx)
+}
+
+func (c *client) Resume(ctx context.Context, containerID string) error {
+	p, err := c.getProcess(containerID, InitProcessName)
+	if err != nil {
+		return err
+	}
+
+	return p.(containerd.Task).Resume(ctx)
+}
+
+func (c *client) Stats(ctx context.Context, containerID string) (*Stats, error) {
+	p, err := c.getProcess(containerID, InitProcessName)
+	if err != nil {
+		return nil, err
+	}
+
+	m, err := p.(containerd.Task).Metrics(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	v, err := typeurl.UnmarshalAny(m.Data)
+	if err != nil {
+		return nil, err
+	}
+	return interfaceToStats(m.Timestamp, v), nil
+}
+
+func (c *client) ListPids(ctx context.Context, containerID string) ([]uint32, error) {
+	p, err := c.getProcess(containerID, InitProcessName)
+	if err != nil {
+		return nil, err
+	}
+
+	pis, err := p.(containerd.Task).Pids(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	var pids []uint32
+	for _, i := range pis {
+		pids = append(pids, i.Pid)
+	}
+
+	return pids, nil
+}
+
+func (c *client) Summary(ctx context.Context, containerID string) ([]Summary, error) {
+	p, err := c.getProcess(containerID, InitProcessName)
+	if err != nil {
+		return nil, err
+	}
+
+	pis, err := p.(containerd.Task).Pids(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	var infos []Summary
+	for _, pi := range pis {
+		i, err := typeurl.UnmarshalAny(pi.Info)
+		if err != nil {
+			return nil, errors.Wrap(err, "unable to decode process details")
+		}
+		s, err := summaryFromInterface(i)
+		if err != nil {
+			return nil, err
+		}
+		infos = append(infos, *s)
+	}
+
+	return infos, nil
+}
+
+func (c *client) DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error) {
+	p, err := c.getProcess(containerID, InitProcessName)
+	if err != nil {
+		return 255, time.Now(), nil
+	}
+
+	status, err := p.(containerd.Task).Delete(ctx)
+	if err != nil {
+		return 255, time.Now(), nil
+	}
+
+	c.Lock()
+	if ctr, ok := c.containers[containerID]; ok {
+		ctr.task = nil
+	}
+	c.Unlock()
+
+	return status.ExitCode(), status.ExitTime(), nil
+}
+
+func (c *client) Delete(ctx context.Context, containerID string) error {
+	ctr := c.getContainer(containerID)
+	if ctr == nil {
+		return errors.WithStack(newNotFoundError("no such container"))
+	}
+
+	if err := ctr.ctr.Delete(ctx); err != nil {
+		return err
+	}
+
+	if os.Getenv("LIBCONTAINERD_NOCLEAN") == "1" {
+		if err := os.RemoveAll(ctr.bundleDir); err != nil {
+			c.logger.WithError(err).WithFields(logrus.Fields{
+				"container": containerID,
+				"bundle":    ctr.bundleDir,
+			}).Error("failed to remove state dir")
+		}
+	}
+
+	c.removeContainer(containerID)
+
+	return nil
+}
+
+func (c *client) Status(ctx context.Context, containerID string) (Status, error) {
+	ctr := c.getContainer(containerID)
+	if ctr == nil {
+		return StatusUnknown, errors.WithStack(newNotFoundError("no such container"))
+	}
+
+	s, err := ctr.task.Status(ctx)
+	if err != nil {
+		return StatusUnknown, err
+	}
+
+	return Status(s.Status), nil
+}
+
+func (c *client) CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error {
+	p, err := c.getProcess(containerID, InitProcessName)
+	if err != nil {
+		return err
+	}
+
+	img, err := p.(containerd.Task).Checkpoint(ctx)
+	if err != nil {
+		return err
+	}
+	// Whatever happens, delete the checkpoint from containerd
+	defer func() {
+		err := c.remote.ImageService().Delete(context.Background(), img.Name())
+		if err != nil {
+			c.logger.WithError(err).WithField("digest", img.Target().Digest).
+				Warnf("failed to delete checkpoint image")
+		}
+	}()
+
+	b, err := content.ReadBlob(ctx, c.remote.ContentStore(), img.Target().Digest)
+	if err != nil {
+		return wrapSystemError(errors.Wrapf(err, "failed to retrieve checkpoint data"))
+	}
+	var index v1.Index
+	if err := json.Unmarshal(b, &index); err != nil {
+		return wrapSystemError(errors.Wrapf(err, "failed to decode checkpoint data"))
+	}
+
+	var cpDesc *v1.Descriptor
+	for _, m := range index.Manifests {
+		if m.MediaType == images.MediaTypeContainerd1Checkpoint {
+			cpDesc = &m
+			break
+		}
+	}
+	if cpDesc == nil {
+		return wrapSystemError(errors.Wrapf(err, "invalid checkpoint"))
+	}
+
+	rat, err := c.remote.ContentStore().ReaderAt(ctx, cpDesc.Digest)
+	if err != nil {
+		return wrapSystemError(errors.Wrapf(err, "failed to get checkpoint reader"))
+	}
+	defer rat.Close()
+	_, err = archive.Apply(ctx, checkpointDir, content.NewReader(rat))
+	if err != nil {
+		return wrapSystemError(errors.Wrapf(err, "failed to read checkpoint reader"))
+	}
+
+	return err
+}
+
+func (c *client) getContainer(id string) *container {
+	c.RLock()
+	ctr := c.containers[id]
+	c.RUnlock()
+
+	return ctr
+}
+
+func (c *client) removeContainer(id string) {
+	c.Lock()
+	delete(c.containers, id)
+	c.Unlock()
+}
+
+func (c *client) getProcess(containerID, processID string) (containerd.Process, error) {
+	ctr := c.getContainer(containerID)
+	switch {
+	case ctr == nil:
+		return nil, errors.WithStack(newNotFoundError("no such container"))
+	case ctr.task == nil:
+		return nil, errors.WithStack(newNotFoundError("container is not running"))
+	case processID == InitProcessName:
+		return ctr.task, nil
+	default:
+		ctr.Lock()
+		defer ctr.Unlock()
+		if ctr.execs == nil {
+			return nil, errors.WithStack(newNotFoundError("no execs"))
+		}
+	}
+
+	p := ctr.execs[processID]
+	if p == nil {
+		return nil, errors.WithStack(newNotFoundError("no such exec"))
+	}
+
+	return p, nil
+}
+
+// createIO creates the io to be used by a process
+// This needs to get a pointer to interface as upon closure the process may not have yet been registered
+func (c *client) createIO(bundleDir, containerID, processID string, stdinCloseSync chan struct{}, withStdin, withTerminal bool, attachStdio StdioCallback) (containerd.IO, error) {
+	fifos := newFIFOSet(bundleDir, containerID, processID, withStdin, withTerminal)
+	io, err := newIOPipe(fifos)
+	if err != nil {
+		return nil, err
+	}
+
+	if io.Stdin != nil {
+		var (
+			err       error
+			stdinOnce sync.Once
+		)
+		pipe := io.Stdin
+		io.Stdin = ioutils.NewWriteCloserWrapper(pipe, func() error {
+			stdinOnce.Do(func() {
+				err = pipe.Close()
+				// Do the rest in a new routine to avoid a deadlock if the
+				// Exec/Start call failed.
+				go func() {
+					<-stdinCloseSync
+					p, err := c.getProcess(containerID, processID)
+					if err == nil {
+						err = p.CloseIO(context.Background(), containerd.WithStdinCloser)
+						if err != nil && strings.Contains(err.Error(), "transport is closing") {
+							err = nil
+						}
+					}
+				}()
+			})
+			return err
+		})
+	}
+
+	cio, err := attachStdio(io)
+	if err != nil {
+		io.Cancel()
+		io.Close()
+	}
+	return cio, err
+}
+
+func (c *client) processEvent(ctr *container, et EventType, ei EventInfo) {
+	c.eventQ.append(ei.ContainerID, func() {
+		err := c.backend.ProcessEvent(ei.ContainerID, et, ei)
+		if err != nil {
+			c.logger.WithError(err).WithFields(logrus.Fields{
+				"container":  ei.ContainerID,
+				"event":      et,
+				"event-info": ei,
+			}).Error("failed to process event")
+		}
+
+		if et == EventExit && ei.ProcessID != ei.ContainerID {
+			var p containerd.Process
+			ctr.Lock()
+			if ctr.execs != nil {
+				p = ctr.execs[ei.ProcessID]
+			}
+			ctr.Unlock()
+			if p == nil {
+				c.logger.WithError(errors.New("no such process")).
+					WithFields(logrus.Fields{
+						"container": ei.ContainerID,
+						"process":   ei.ProcessID,
+					}).Error("exit event")
+				return
+			}
+			_, err = p.Delete(context.Background())
+			if err != nil {
+				c.logger.WithError(err).WithFields(logrus.Fields{
+					"container": ei.ContainerID,
+					"process":   ei.ProcessID,
+				}).Warn("failed to delete process")
+			}
+			c.Lock()
+			delete(ctr.execs, ei.ProcessID)
+			c.Unlock()
+		}
+	})
+}
+
+func (c *client) processEventStream(ctx context.Context) {
+	var (
+		err         error
+		eventStream eventsapi.Events_SubscribeClient
+		ev          *eventsapi.Envelope
+		et          EventType
+		ei          EventInfo
+		ctr         *container
+	)
+	defer func() {
+		if err != nil {
+			select {
+			case <-ctx.Done():
+				c.logger.WithError(ctx.Err()).
+					Info("stopping event stream following graceful shutdown")
+			default:
+				go c.processEventStream(ctx)
+			}
+		}
+	}()
+
+	eventStream, err = c.remote.EventService().Subscribe(ctx, &eventsapi.SubscribeRequest{
+		Filters: []string{"namespace==" + c.namespace + ",topic~=/tasks/.+"},
+	}, grpc.FailFast(false))
+	if err != nil {
+		return
+	}
+
+	var oomKilled bool
+	for {
+		ev, err = eventStream.Recv()
+		if err != nil {
+			c.logger.WithError(err).Error("failed to get event")
+			return
+		}
+
+		if ev.Event == nil {
+			c.logger.WithField("event", ev).Warn("invalid event")
+			continue
+		}
+
+		v, err := typeurl.UnmarshalAny(ev.Event)
+		if err != nil {
+			c.logger.WithError(err).WithField("event", ev).Warn("failed to unmarshal event")
+			continue
+		}
+
+		c.logger.WithField("topic", ev.Topic).Debug("event")
+
+		switch t := v.(type) {
+		case *eventsapi.TaskCreate:
+			et = EventCreate
+			ei = EventInfo{
+				ContainerID: t.ContainerID,
+				ProcessID:   t.ContainerID,
+				Pid:         t.Pid,
+			}
+		case *eventsapi.TaskStart:
+			et = EventStart
+			ei = EventInfo{
+				ContainerID: t.ContainerID,
+				ProcessID:   t.ContainerID,
+				Pid:         t.Pid,
+			}
+		case *eventsapi.TaskExit:
+			et = EventExit
+			ei = EventInfo{
+				ContainerID: t.ContainerID,
+				ProcessID:   t.ID,
+				Pid:         t.Pid,
+				ExitCode:    t.ExitStatus,
+				ExitedAt:    t.ExitedAt,
+			}
+		case *eventsapi.TaskOOM:
+			et = EventOOM
+			ei = EventInfo{
+				ContainerID: t.ContainerID,
+				OOMKilled:   true,
+			}
+			oomKilled = true
+		case *eventsapi.TaskExecAdded:
+			et = EventExecAdded
+			ei = EventInfo{
+				ContainerID: t.ContainerID,
+				ProcessID:   t.ExecID,
+			}
+		case *eventsapi.TaskExecStarted:
+			et = EventExecStarted
+			ei = EventInfo{
+				ContainerID: t.ContainerID,
+				ProcessID:   t.ExecID,
+				Pid:         t.Pid,
+			}
+		case *eventsapi.TaskPaused:
+			et = EventPaused
+			ei = EventInfo{
+				ContainerID: t.ContainerID,
+			}
+		case *eventsapi.TaskResumed:
+			et = EventResumed
+			ei = EventInfo{
+				ContainerID: t.ContainerID,
+			}
+		default:
+			c.logger.WithFields(logrus.Fields{
+				"topic": ev.Topic,
+				"type":  reflect.TypeOf(t)},
+			).Info("ignoring event")
+			continue
+		}
+
+		ctr = c.getContainer(ei.ContainerID)
+		if ctr == nil {
+			c.logger.WithField("container", ei.ContainerID).Warn("unknown container")
+			continue
+		}
+
+		if oomKilled {
+			ctr.oomKilled = true
+			oomKilled = false
+		}
+		ei.OOMKilled = ctr.oomKilled
+
+		c.processEvent(ctr, et, ei)
+	}
+}
+
+func (c *client) writeContent(ctx context.Context, mediaType, ref string, r io.Reader) (*types.Descriptor, error) {
+	writer, err := c.remote.ContentStore().Writer(ctx, ref, 0, "")
+	if err != nil {
+		return nil, err
+	}
+	defer writer.Close()
+	size, err := io.Copy(writer, r)
+	if err != nil {
+		return nil, err
+	}
+	labels := map[string]string{
+		"containerd.io/gc.root": time.Now().UTC().Format(time.RFC3339),
+	}
+	if err := writer.Commit(ctx, 0, "", content.WithLabels(labels)); err != nil {
+		return nil, err
+	}
+	return &types.Descriptor{
+		MediaType: mediaType,
+		Digest:    writer.Digest(),
+		Size_:     size,
+	}, nil
+}
+
+func wrapError(err error) error {
+	if err != nil {
+		msg := err.Error()
+		for _, s := range []string{"container does not exist", "not found", "no such container"} {
+			if strings.Contains(msg, s) {
+				return wrapNotFoundError(err)
+			}
+		}
+	}
+	return err
+}

+ 96 - 0
libcontainerd/client_daemon_linux.go

@@ -0,0 +1,96 @@
+package libcontainerd
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/containerd/containerd"
+	"github.com/docker/docker/pkg/idtools"
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+func summaryFromInterface(i interface{}) (*Summary, error) {
+	return &Summary{}, nil
+}
+
+func (c *client) UpdateResources(ctx context.Context, containerID string, resources *Resources) error {
+	p, err := c.getProcess(containerID, InitProcessName)
+	if err != nil {
+		return err
+	}
+
+	// go doesn't like the alias in 1.8, this means this need to be
+	// platform specific
+	return p.(containerd.Task).Update(ctx, containerd.WithResources((*specs.LinuxResources)(resources)))
+}
+
+func hostIDFromMap(id uint32, mp []specs.LinuxIDMapping) int {
+	for _, m := range mp {
+		if id >= m.ContainerID && id <= m.ContainerID+m.Size-1 {
+			return int(m.HostID + id - m.ContainerID)
+		}
+	}
+	return 0
+}
+
+func getSpecUser(ociSpec *specs.Spec) (int, int) {
+	var (
+		uid int
+		gid int
+	)
+
+	for _, ns := range ociSpec.Linux.Namespaces {
+		if ns.Type == specs.UserNamespace {
+			uid = hostIDFromMap(0, ociSpec.Linux.UIDMappings)
+			gid = hostIDFromMap(0, ociSpec.Linux.GIDMappings)
+			break
+		}
+	}
+
+	return uid, gid
+}
+
+func prepareBundleDir(bundleDir string, ociSpec *specs.Spec) (string, error) {
+	uid, gid := getSpecUser(ociSpec)
+	if uid == 0 && gid == 0 {
+		return bundleDir, idtools.MkdirAllAndChownNew(bundleDir, 0755, idtools.IDPair{0, 0})
+	}
+
+	p := string(filepath.Separator)
+	components := strings.Split(bundleDir, string(filepath.Separator))
+	for _, d := range components[1:] {
+		p = filepath.Join(p, d)
+		fi, err := os.Stat(p)
+		if err != nil && !os.IsNotExist(err) {
+			return "", err
+		}
+		if os.IsNotExist(err) || fi.Mode()&1 == 0 {
+			p = fmt.Sprintf("%s.%d.%d", p, uid, gid)
+			if err := idtools.MkdirAndChown(p, 0700, idtools.IDPair{uid, gid}); err != nil && !os.IsExist(err) {
+				return "", err
+			}
+		}
+	}
+
+	return p, nil
+}
+
+func newFIFOSet(bundleDir, containerID, processID string, withStdin, withTerminal bool) *containerd.FIFOSet {
+	fifos := &containerd.FIFOSet{
+		Terminal: withTerminal,
+		Out:      filepath.Join(bundleDir, processID+"-stdout"),
+	}
+
+	if withStdin {
+		fifos.In = filepath.Join(bundleDir, processID+"-stdin")
+	}
+
+	if !fifos.Terminal {
+		fifos.Err = filepath.Join(bundleDir, processID+"-stderr")
+	}
+
+	return fifos
+}

+ 53 - 0
libcontainerd/client_daemon_windows.go

@@ -0,0 +1,53 @@
+package libcontainerd
+
+import (
+	"fmt"
+
+	"github.com/containerd/containerd"
+	"github.com/containerd/containerd/windows/hcsshimtypes"
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/pkg/errors"
+)
+
+func summaryFromInterface(i interface{}) (*Summary, error) {
+	switch pd := i.(type) {
+	case *hcsshimtypes.ProcessDetails:
+		return &Summary{
+			CreateTimestamp:              pd.CreatedAt,
+			ImageName:                    pd.ImageName,
+			KernelTime100ns:              pd.KernelTime_100Ns,
+			MemoryCommitBytes:            pd.MemoryCommitBytes,
+			MemoryWorkingSetPrivateBytes: pd.MemoryWorkingSetPrivateBytes,
+			MemoryWorkingSetSharedBytes:  pd.MemoryWorkingSetSharedBytes,
+			ProcessId:                    pd.ProcessID,
+			UserTime100ns:                pd.UserTime_100Ns,
+		}, nil
+	default:
+		return nil, errors.Errorf("Unknown process details type %T", pd)
+	}
+}
+
+func prepareBundleDir(bundleDir string, ociSpec *specs.Spec) (string, error) {
+	return bundleDir, nil
+}
+
+func pipeName(containerID, processID, name string) string {
+	return fmt.Sprintf(`\\.\pipe\containerd-%s-%s-%s`, containerID, processID, name)
+}
+
+func newFIFOSet(bundleDir, containerID, processID string, withStdin, withTerminal bool) *containerd.FIFOSet {
+	fifos := &containerd.FIFOSet{
+		Terminal: withTerminal,
+		Out:      pipeName(containerID, processID, "stdout"),
+	}
+
+	if withStdin {
+		fifos.In = pipeName(containerID, processID, "stdin")
+	}
+
+	if !fifos.Terminal {
+		fifos.Err = pipeName(containerID, processID, "stderr")
+	}
+
+	return fifos
+}

+ 0 - 616
libcontainerd/client_linux.go

@@ -1,616 +0,0 @@
-package libcontainerd
-
-import (
-	"fmt"
-	"os"
-	"strings"
-	"sync"
-	"time"
-
-	containerd "github.com/containerd/containerd/api/grpc/types"
-	containerd_runtime_types "github.com/containerd/containerd/runtime"
-	"github.com/docker/docker/pkg/ioutils"
-	"github.com/docker/docker/pkg/mount"
-	"github.com/golang/protobuf/ptypes"
-	"github.com/golang/protobuf/ptypes/timestamp"
-	specs "github.com/opencontainers/runtime-spec/specs-go"
-	"github.com/sirupsen/logrus"
-	"golang.org/x/net/context"
-	"golang.org/x/sys/unix"
-)
-
-type client struct {
-	clientCommon
-
-	// Platform specific properties below here.
-	remote        *remote
-	q             queue
-	exitNotifiers map[string]*exitNotifier
-	liveRestore   bool
-}
-
-// GetServerVersion returns the connected server version information
-func (clnt *client) GetServerVersion(ctx context.Context) (*ServerVersion, error) {
-	resp, err := clnt.remote.apiClient.GetServerVersion(ctx, &containerd.GetServerVersionRequest{})
-	if err != nil {
-		return nil, err
-	}
-
-	sv := &ServerVersion{
-		GetServerVersionResponse: *resp,
-	}
-
-	return sv, nil
-}
-
-// AddProcess is the handler for adding a process to an already running
-// container. It's called through docker exec. It returns the system pid of the
-// exec'd process.
-func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendlyName string, specp Process, attachStdio StdioCallback) (pid int, err error) {
-	clnt.lock(containerID)
-	defer clnt.unlock(containerID)
-	container, err := clnt.getContainer(containerID)
-	if err != nil {
-		return -1, err
-	}
-
-	spec, err := container.spec()
-	if err != nil {
-		return -1, err
-	}
-	sp := spec.Process
-	sp.Args = specp.Args
-	sp.Terminal = specp.Terminal
-	if len(specp.Env) > 0 {
-		sp.Env = specp.Env
-	}
-	if specp.Cwd != nil {
-		sp.Cwd = *specp.Cwd
-	}
-	if specp.User != nil {
-		sp.User = specs.User{
-			UID:            specp.User.UID,
-			GID:            specp.User.GID,
-			AdditionalGids: specp.User.AdditionalGids,
-		}
-	}
-	if specp.Capabilities != nil {
-		sp.Capabilities.Bounding = specp.Capabilities
-		sp.Capabilities.Effective = specp.Capabilities
-		sp.Capabilities.Inheritable = specp.Capabilities
-		sp.Capabilities.Permitted = specp.Capabilities
-	}
-
-	p := container.newProcess(processFriendlyName)
-
-	r := &containerd.AddProcessRequest{
-		Args:     sp.Args,
-		Cwd:      sp.Cwd,
-		Terminal: sp.Terminal,
-		Id:       containerID,
-		Env:      sp.Env,
-		User: &containerd.User{
-			Uid:            sp.User.UID,
-			Gid:            sp.User.GID,
-			AdditionalGids: sp.User.AdditionalGids,
-		},
-		Pid:             processFriendlyName,
-		Stdin:           p.fifo(unix.Stdin),
-		Stdout:          p.fifo(unix.Stdout),
-		Stderr:          p.fifo(unix.Stderr),
-		Capabilities:    sp.Capabilities.Effective,
-		ApparmorProfile: sp.ApparmorProfile,
-		SelinuxLabel:    sp.SelinuxLabel,
-		NoNewPrivileges: sp.NoNewPrivileges,
-		Rlimits:         convertRlimits(sp.Rlimits),
-	}
-
-	fifoCtx, cancel := context.WithCancel(context.Background())
-	defer func() {
-		if err != nil {
-			cancel()
-		}
-	}()
-
-	iopipe, err := p.openFifos(fifoCtx, sp.Terminal)
-	if err != nil {
-		return -1, err
-	}
-
-	resp, err := clnt.remote.apiClient.AddProcess(ctx, r)
-	if err != nil {
-		p.closeFifos(iopipe)
-		return -1, err
-	}
-
-	var stdinOnce sync.Once
-	stdin := iopipe.Stdin
-	iopipe.Stdin = ioutils.NewWriteCloserWrapper(stdin, func() error {
-		var err error
-		stdinOnce.Do(func() { // on error from attach we don't know if stdin was already closed
-			err = stdin.Close()
-			if err2 := p.sendCloseStdin(); err == nil {
-				err = err2
-			}
-		})
-		return err
-	})
-
-	container.processes[processFriendlyName] = p
-
-	if err := attachStdio(*iopipe); err != nil {
-		p.closeFifos(iopipe)
-		return -1, err
-	}
-
-	return int(resp.SystemPid), nil
-}
-
-func (clnt *client) SignalProcess(containerID string, pid string, sig int) error {
-	clnt.lock(containerID)
-	defer clnt.unlock(containerID)
-	_, err := clnt.remote.apiClient.Signal(context.Background(), &containerd.SignalRequest{
-		Id:     containerID,
-		Pid:    pid,
-		Signal: uint32(sig),
-	})
-	return err
-}
-
-func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error {
-	clnt.lock(containerID)
-	defer clnt.unlock(containerID)
-	if _, err := clnt.getContainer(containerID); err != nil {
-		return err
-	}
-	_, err := clnt.remote.apiClient.UpdateProcess(context.Background(), &containerd.UpdateProcessRequest{
-		Id:     containerID,
-		Pid:    processFriendlyName,
-		Width:  uint32(width),
-		Height: uint32(height),
-	})
-	return err
-}
-
-func (clnt *client) Pause(containerID string) error {
-	return clnt.setState(containerID, StatePause)
-}
-
-func (clnt *client) setState(containerID, state string) error {
-	clnt.lock(containerID)
-	container, err := clnt.getContainer(containerID)
-	if err != nil {
-		clnt.unlock(containerID)
-		return err
-	}
-	if container.systemPid == 0 {
-		clnt.unlock(containerID)
-		return fmt.Errorf("No active process for container %s", containerID)
-	}
-	st := "running"
-	if state == StatePause {
-		st = "paused"
-	}
-	chstate := make(chan struct{})
-	_, err = clnt.remote.apiClient.UpdateContainer(context.Background(), &containerd.UpdateContainerRequest{
-		Id:     containerID,
-		Pid:    InitFriendlyName,
-		Status: st,
-	})
-	if err != nil {
-		clnt.unlock(containerID)
-		return err
-	}
-	container.pauseMonitor.append(state, chstate)
-	clnt.unlock(containerID)
-	<-chstate
-	return nil
-}
-
-func (clnt *client) Resume(containerID string) error {
-	return clnt.setState(containerID, StateResume)
-}
-
-func (clnt *client) Stats(containerID string) (*Stats, error) {
-	resp, err := clnt.remote.apiClient.Stats(context.Background(), &containerd.StatsRequest{containerID})
-	if err != nil {
-		return nil, err
-	}
-	return (*Stats)(resp), nil
-}
-
-// Take care of the old 1.11.0 behavior in case the version upgrade
-// happened without a clean daemon shutdown
-func (clnt *client) cleanupOldRootfs(containerID string) {
-	// Unmount and delete the bundle folder
-	if mts, err := mount.GetMounts(); err == nil {
-		for _, mts := range mts {
-			if strings.HasSuffix(mts.Mountpoint, containerID+"/rootfs") {
-				if err := unix.Unmount(mts.Mountpoint, unix.MNT_DETACH); err == nil {
-					os.RemoveAll(strings.TrimSuffix(mts.Mountpoint, "/rootfs"))
-				}
-				break
-			}
-		}
-	}
-}
-
-func (clnt *client) setExited(containerID string, exitCode uint32) error {
-	clnt.lock(containerID)
-	defer clnt.unlock(containerID)
-
-	err := clnt.backend.StateChanged(containerID, StateInfo{
-		CommonStateInfo: CommonStateInfo{
-			State:    StateExit,
-			ExitCode: exitCode,
-		}})
-
-	clnt.cleanupOldRootfs(containerID)
-
-	return err
-}
-
-func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) {
-	cont, err := clnt.getContainerdContainer(containerID)
-	if err != nil {
-		return nil, err
-	}
-	pids := make([]int, len(cont.Pids))
-	for i, p := range cont.Pids {
-		pids[i] = int(p)
-	}
-	return pids, nil
-}
-
-// Summary returns a summary of the processes running in a container.
-// This is a no-op on Linux.
-func (clnt *client) Summary(containerID string) ([]Summary, error) {
-	return nil, nil
-}
-
-func (clnt *client) getContainerdContainer(containerID string) (*containerd.Container, error) {
-	resp, err := clnt.remote.apiClient.State(context.Background(), &containerd.StateRequest{Id: containerID})
-	if err != nil {
-		return nil, err
-	}
-	for _, cont := range resp.Containers {
-		if cont.Id == containerID {
-			return cont, nil
-		}
-	}
-	return nil, fmt.Errorf("invalid state response")
-}
-
-func (clnt *client) UpdateResources(containerID string, resources Resources) error {
-	clnt.lock(containerID)
-	defer clnt.unlock(containerID)
-	container, err := clnt.getContainer(containerID)
-	if err != nil {
-		return err
-	}
-	if container.systemPid == 0 {
-		return fmt.Errorf("No active process for container %s", containerID)
-	}
-	_, err = clnt.remote.apiClient.UpdateContainer(context.Background(), &containerd.UpdateContainerRequest{
-		Id:        containerID,
-		Pid:       InitFriendlyName,
-		Resources: (*containerd.UpdateResource)(&resources),
-	})
-	return err
-}
-
-func (clnt *client) getExitNotifier(containerID string) *exitNotifier {
-	clnt.mapMutex.RLock()
-	defer clnt.mapMutex.RUnlock()
-	return clnt.exitNotifiers[containerID]
-}
-
-func (clnt *client) getOrCreateExitNotifier(containerID string) *exitNotifier {
-	clnt.mapMutex.Lock()
-	w, ok := clnt.exitNotifiers[containerID]
-	defer clnt.mapMutex.Unlock()
-	if !ok {
-		w = &exitNotifier{c: make(chan struct{}), client: clnt}
-		clnt.exitNotifiers[containerID] = w
-	}
-	return w
-}
-
-func (clnt *client) restore(cont *containerd.Container, lastEvent *containerd.Event, attachStdio StdioCallback, options ...CreateOption) (err error) {
-	clnt.lock(cont.Id)
-	defer clnt.unlock(cont.Id)
-
-	logrus.Debugf("libcontainerd: restore container %s state %s", cont.Id, cont.Status)
-
-	containerID := cont.Id
-	if _, err := clnt.getContainer(containerID); err == nil {
-		return fmt.Errorf("container %s is already active", containerID)
-	}
-
-	defer func() {
-		if err != nil {
-			clnt.deleteContainer(cont.Id)
-		}
-	}()
-
-	container := clnt.newContainer(cont.BundlePath, options...)
-	container.systemPid = systemPid(cont)
-
-	var terminal bool
-	for _, p := range cont.Processes {
-		if p.Pid == InitFriendlyName {
-			terminal = p.Terminal
-		}
-	}
-
-	fifoCtx, cancel := context.WithCancel(context.Background())
-	defer func() {
-		if err != nil {
-			cancel()
-		}
-	}()
-
-	iopipe, err := container.openFifos(fifoCtx, terminal)
-	if err != nil {
-		return err
-	}
-	var stdinOnce sync.Once
-	stdin := iopipe.Stdin
-	iopipe.Stdin = ioutils.NewWriteCloserWrapper(stdin, func() error {
-		var err error
-		stdinOnce.Do(func() { // on error from attach we don't know if stdin was already closed
-			err = stdin.Close()
-		})
-		return err
-	})
-
-	if err := attachStdio(*iopipe); err != nil {
-		container.closeFifos(iopipe)
-		return err
-	}
-
-	clnt.appendContainer(container)
-
-	err = clnt.backend.StateChanged(containerID, StateInfo{
-		CommonStateInfo: CommonStateInfo{
-			State: StateRestore,
-			Pid:   container.systemPid,
-		}})
-
-	if err != nil {
-		container.closeFifos(iopipe)
-		return err
-	}
-
-	if lastEvent != nil {
-		// This should only be a pause or resume event
-		if lastEvent.Type == StatePause || lastEvent.Type == StateResume {
-			return clnt.backend.StateChanged(containerID, StateInfo{
-				CommonStateInfo: CommonStateInfo{
-					State: lastEvent.Type,
-					Pid:   container.systemPid,
-				}})
-		}
-
-		logrus.Warnf("libcontainerd: unexpected backlog event: %#v", lastEvent)
-	}
-
-	return nil
-}
-
-func (clnt *client) getContainerLastEventSinceTime(id string, tsp *timestamp.Timestamp) (*containerd.Event, error) {
-	er := &containerd.EventsRequest{
-		Timestamp:  tsp,
-		StoredOnly: true,
-		Id:         id,
-	}
-	events, err := clnt.remote.apiClient.Events(context.Background(), er)
-	if err != nil {
-		logrus.Errorf("libcontainerd: failed to get container events stream for %s: %q", er.Id, err)
-		return nil, err
-	}
-
-	var ev *containerd.Event
-	for {
-		e, err := events.Recv()
-		if err != nil {
-			if err.Error() == "EOF" {
-				break
-			}
-			logrus.Errorf("libcontainerd: failed to get container event for %s: %q", id, err)
-			return nil, err
-		}
-		ev = e
-		logrus.Debugf("libcontainerd: received past event %#v", ev)
-	}
-
-	return ev, nil
-}
-
-func (clnt *client) getContainerLastEvent(id string) (*containerd.Event, error) {
-	ev, err := clnt.getContainerLastEventSinceTime(id, clnt.remote.restoreFromTimestamp)
-	if err == nil && ev == nil {
-		// If ev is nil and the container is running in containerd,
-		// we already consumed all the event of the
-		// container, included the "exit" one.
-		// Thus, we request all events containerd has in memory for
-		// this container in order to get the last one (which should
-		// be an exit event)
-		logrus.Warnf("libcontainerd: client is out of sync, restore was called on a fully synced container (%s).", id)
-		// Request all events since beginning of time
-		t := time.Unix(0, 0)
-		tsp, err := ptypes.TimestampProto(t)
-		if err != nil {
-			logrus.Errorf("libcontainerd: getLastEventSinceTime() failed to convert timestamp: %q", err)
-			return nil, err
-		}
-
-		return clnt.getContainerLastEventSinceTime(id, tsp)
-	}
-
-	return ev, err
-}
-
-func (clnt *client) Restore(containerID string, attachStdio StdioCallback, options ...CreateOption) error {
-	// Synchronize with live events
-	clnt.remote.Lock()
-	defer clnt.remote.Unlock()
-	// Check that containerd still knows this container.
-	//
-	// In the unlikely event that Restore for this container process
-	// the its past event before the main loop, the event will be
-	// processed twice. However, this is not an issue as all those
-	// events will do is change the state of the container to be
-	// exactly the same.
-	cont, err := clnt.getContainerdContainer(containerID)
-	// Get its last event
-	ev, eerr := clnt.getContainerLastEvent(containerID)
-	if err != nil || containerd_runtime_types.State(cont.Status) == containerd_runtime_types.Stopped {
-		if err != nil {
-			logrus.Warnf("libcontainerd: failed to retrieve container %s state: %v", containerID, err)
-		}
-		if ev != nil && (ev.Pid != InitFriendlyName || ev.Type != StateExit) {
-			// Wait a while for the exit event
-			timeout := time.NewTimer(10 * time.Second)
-			tick := time.NewTicker(100 * time.Millisecond)
-		stop:
-			for {
-				select {
-				case <-timeout.C:
-					break stop
-				case <-tick.C:
-					ev, eerr = clnt.getContainerLastEvent(containerID)
-					if eerr != nil {
-						break stop
-					}
-					if ev != nil && ev.Pid == InitFriendlyName && ev.Type == StateExit {
-						break stop
-					}
-				}
-			}
-			timeout.Stop()
-			tick.Stop()
-		}
-
-		// get the exit status for this container, if we don't have
-		// one, indicate an error
-		ec := uint32(255)
-		if eerr == nil && ev != nil && ev.Pid == InitFriendlyName && ev.Type == StateExit {
-			ec = ev.Status
-		}
-		clnt.setExited(containerID, ec)
-
-		return nil
-	}
-
-	// container is still alive
-	if clnt.liveRestore {
-		if err := clnt.restore(cont, ev, attachStdio, options...); err != nil {
-			logrus.Errorf("libcontainerd: error restoring %s: %v", containerID, err)
-		}
-		return nil
-	}
-
-	// Kill the container if liveRestore == false
-	w := clnt.getOrCreateExitNotifier(containerID)
-	clnt.lock(cont.Id)
-	container := clnt.newContainer(cont.BundlePath)
-	container.systemPid = systemPid(cont)
-	clnt.appendContainer(container)
-	clnt.unlock(cont.Id)
-
-	container.discardFifos()
-
-	if err := clnt.Signal(containerID, int(unix.SIGTERM)); err != nil {
-		logrus.Errorf("libcontainerd: error sending sigterm to %v: %v", containerID, err)
-	}
-
-	// Let the main loop handle the exit event
-	clnt.remote.Unlock()
-
-	if ev != nil && ev.Type == StatePause {
-		// resume container, it depends on the main loop, so we do it after Unlock()
-		logrus.Debugf("libcontainerd: %s was paused, resuming it so it can die", containerID)
-		if err := clnt.Resume(containerID); err != nil {
-			return fmt.Errorf("failed to resume container: %v", err)
-		}
-	}
-
-	select {
-	case <-time.After(10 * time.Second):
-		if err := clnt.Signal(containerID, int(unix.SIGKILL)); err != nil {
-			logrus.Errorf("libcontainerd: error sending sigkill to %v: %v", containerID, err)
-		}
-		select {
-		case <-time.After(2 * time.Second):
-		case <-w.wait():
-			// relock because of the defer
-			clnt.remote.Lock()
-			return nil
-		}
-	case <-w.wait():
-		// relock because of the defer
-		clnt.remote.Lock()
-		return nil
-	}
-	// relock because of the defer
-	clnt.remote.Lock()
-
-	clnt.deleteContainer(containerID)
-
-	return clnt.setExited(containerID, uint32(255))
-}
-
-func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error {
-	clnt.lock(containerID)
-	defer clnt.unlock(containerID)
-	if _, err := clnt.getContainer(containerID); err != nil {
-		return err
-	}
-
-	_, err := clnt.remote.apiClient.CreateCheckpoint(context.Background(), &containerd.CreateCheckpointRequest{
-		Id: containerID,
-		Checkpoint: &containerd.Checkpoint{
-			Name:        checkpointID,
-			Exit:        exit,
-			Tcp:         true,
-			UnixSockets: true,
-			Shell:       false,
-			EmptyNS:     []string{"network"},
-		},
-		CheckpointDir: checkpointDir,
-	})
-	return err
-}
-
-func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error {
-	clnt.lock(containerID)
-	defer clnt.unlock(containerID)
-	if _, err := clnt.getContainer(containerID); err != nil {
-		return err
-	}
-
-	_, err := clnt.remote.apiClient.DeleteCheckpoint(context.Background(), &containerd.DeleteCheckpointRequest{
-		Id:            containerID,
-		Name:          checkpointID,
-		CheckpointDir: checkpointDir,
-	})
-	return err
-}
-
-func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) {
-	clnt.lock(containerID)
-	defer clnt.unlock(containerID)
-	if _, err := clnt.getContainer(containerID); err != nil {
-		return nil, err
-	}
-
-	resp, err := clnt.remote.apiClient.ListCheckpoint(context.Background(), &containerd.ListCheckpointRequest{
-		Id:            containerID,
-		CheckpointDir: checkpointDir,
-	})
-	if err != nil {
-		return nil, err
-	}
-	return (*Checkpoints)(resp), nil
-}

+ 1340 - 0
libcontainerd/client_local_windows.go

@@ -0,0 +1,1340 @@
+package libcontainerd
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"path"
+	"path/filepath"
+	"regexp"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+
+	"github.com/Microsoft/hcsshim"
+	opengcs "github.com/Microsoft/opengcs/client"
+	"github.com/docker/docker/pkg/sysinfo"
+	"github.com/docker/docker/pkg/system"
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
+	"golang.org/x/sys/windows"
+)
+
+const InitProcessName = "init"
+
+type process struct {
+	id         string
+	pid        int
+	hcsProcess hcsshim.Process
+}
+
+type container struct {
+	sync.Mutex
+
+	// The ociSpec is required, as client.Create() needs a spec, but can
+	// be called from the RestartManager context which does not otherwise
+	// have access to the Spec
+	ociSpec *specs.Spec
+
+	isWindows           bool
+	manualStopRequested bool
+	hcsContainer        hcsshim.Container
+
+	id            string
+	status        Status
+	exitedAt      time.Time
+	exitCode      uint32
+	waitCh        chan struct{}
+	init          *process
+	execs         map[string]*process
+	updatePending bool
+}
+
+// Win32 error codes that are used for various workarounds
+// These really should be ALL_CAPS to match golangs syscall library and standard
+// Win32 error conventions, but golint insists on CamelCase.
+const (
+	CoEClassstring     = syscall.Errno(0x800401F3) // Invalid class string
+	ErrorNoNetwork     = syscall.Errno(1222)       // The network is not present or not started
+	ErrorBadPathname   = syscall.Errno(161)        // The specified path is invalid
+	ErrorInvalidObject = syscall.Errno(0x800710D8) // The object identifier does not represent a valid object
+)
+
+// defaultOwner is a tag passed to HCS to allow it to differentiate between
+// container creator management stacks. We hard code "docker" in the case
+// of docker.
+const defaultOwner = "docker"
+
+// Create is the entrypoint to create a container from a spec.
+// Table below shows the fields required for HCS JSON calling parameters,
+// where if not populated, is omitted.
+// +-----------------+--------------------------------------------+---------------------------------------------------+
+// |                 | Isolation=Process                          | Isolation=Hyper-V                                 |
+// +-----------------+--------------------------------------------+---------------------------------------------------+
+// | VolumePath      | \\?\\Volume{GUIDa}                         |                                                   |
+// | LayerFolderPath | %root%\windowsfilter\containerID           | %root%\windowsfilter\containerID (servicing only) |
+// | Layers[]        | ID=GUIDb;Path=%root%\windowsfilter\layerID | ID=GUIDb;Path=%root%\windowsfilter\layerID        |
+// | HvRuntime       |                                            | ImagePath=%root%\BaseLayerID\UtilityVM            |
+// +-----------------+--------------------------------------------+---------------------------------------------------+
+//
+// Isolation=Process example:
+//
+// {
+//	"SystemType": "Container",
+//	"Name": "5e0055c814a6005b8e57ac59f9a522066e0af12b48b3c26a9416e23907698776",
+//	"Owner": "docker",
+//	"VolumePath": "\\\\\\\\?\\\\Volume{66d1ef4c-7a00-11e6-8948-00155ddbef9d}",
+//	"IgnoreFlushesDuringBoot": true,
+//	"LayerFolderPath": "C:\\\\control\\\\windowsfilter\\\\5e0055c814a6005b8e57ac59f9a522066e0af12b48b3c26a9416e23907698776",
+//	"Layers": [{
+//		"ID": "18955d65-d45a-557b-bf1c-49d6dfefc526",
+//		"Path": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c"
+//	}],
+//	"HostName": "5e0055c814a6",
+//	"MappedDirectories": [],
+//	"HvPartition": false,
+//	"EndpointList": ["eef2649d-bb17-4d53-9937-295a8efe6f2c"],
+//	"Servicing": false
+//}
+//
+// Isolation=Hyper-V example:
+//
+//{
+//	"SystemType": "Container",
+//	"Name": "475c2c58933b72687a88a441e7e0ca4bd72d76413c5f9d5031fee83b98f6045d",
+//	"Owner": "docker",
+//	"IgnoreFlushesDuringBoot": true,
+//	"Layers": [{
+//		"ID": "18955d65-d45a-557b-bf1c-49d6dfefc526",
+//		"Path": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c"
+//	}],
+//	"HostName": "475c2c58933b",
+//	"MappedDirectories": [],
+//	"HvPartition": true,
+//	"EndpointList": ["e1bb1e61-d56f-405e-b75d-fd520cefa0cb"],
+//	"DNSSearchList": "a.com,b.com,c.com",
+//	"HvRuntime": {
+//		"ImagePath": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c\\\\UtilityVM"
+//	},
+//	"Servicing": false
+//}
+func (c *client) Create(_ context.Context, id string, spec *specs.Spec, runtimeOptions interface{}) error {
+	if ctr := c.getContainer(id); ctr != nil {
+		return errors.WithStack(newConflictError("id already in use"))
+	}
+
+	// spec.Linux must be nil for Windows containers, but spec.Windows
+	// will be filled in regardless of container platform.  This is a
+	// temporary workaround due to LCOW requiring layer folder paths,
+	// which are stored under spec.Windows.
+	//
+	// TODO: @darrenstahlmsft fix this once the OCI spec is updated to
+	// support layer folder paths for LCOW
+	if spec.Linux == nil {
+		return c.createWindows(id, spec, runtimeOptions)
+	}
+	return c.createLinux(id, spec, runtimeOptions)
+}
+
+func (c *client) createWindows(id string, spec *specs.Spec, runtimeOptions interface{}) error {
+	logger := c.logger.WithField("container", id)
+	configuration := &hcsshim.ContainerConfig{
+		SystemType: "Container",
+		Name:       id,
+		Owner:      defaultOwner,
+		IgnoreFlushesDuringBoot: spec.Windows.IgnoreFlushesDuringBoot,
+		HostName:                spec.Hostname,
+		HvPartition:             false,
+		Servicing:               spec.Windows.Servicing,
+	}
+
+	if spec.Windows.Resources != nil {
+		if spec.Windows.Resources.CPU != nil {
+			if spec.Windows.Resources.CPU.Count != nil {
+				// This check is being done here rather than in adaptContainerSettings
+				// because we don't want to update the HostConfig in case this container
+				// is moved to a host with more CPUs than this one.
+				cpuCount := *spec.Windows.Resources.CPU.Count
+				hostCPUCount := uint64(sysinfo.NumCPU())
+				if cpuCount > hostCPUCount {
+					c.logger.Warnf("Changing requested CPUCount of %d to current number of processors, %d", cpuCount, hostCPUCount)
+					cpuCount = hostCPUCount
+				}
+				configuration.ProcessorCount = uint32(cpuCount)
+			}
+			if spec.Windows.Resources.CPU.Shares != nil {
+				configuration.ProcessorWeight = uint64(*spec.Windows.Resources.CPU.Shares)
+			}
+			if spec.Windows.Resources.CPU.Maximum != nil {
+				configuration.ProcessorMaximum = int64(*spec.Windows.Resources.CPU.Maximum)
+			}
+		}
+		if spec.Windows.Resources.Memory != nil {
+			if spec.Windows.Resources.Memory.Limit != nil {
+				configuration.MemoryMaximumInMB = int64(*spec.Windows.Resources.Memory.Limit) / 1024 / 1024
+			}
+		}
+		if spec.Windows.Resources.Storage != nil {
+			if spec.Windows.Resources.Storage.Bps != nil {
+				configuration.StorageBandwidthMaximum = *spec.Windows.Resources.Storage.Bps
+			}
+			if spec.Windows.Resources.Storage.Iops != nil {
+				configuration.StorageIOPSMaximum = *spec.Windows.Resources.Storage.Iops
+			}
+		}
+	}
+
+	if spec.Windows.HyperV != nil {
+		configuration.HvPartition = true
+	}
+
+	if spec.Windows.Network != nil {
+		configuration.EndpointList = spec.Windows.Network.EndpointList
+		configuration.AllowUnqualifiedDNSQuery = spec.Windows.Network.AllowUnqualifiedDNSQuery
+		if spec.Windows.Network.DNSSearchList != nil {
+			configuration.DNSSearchList = strings.Join(spec.Windows.Network.DNSSearchList, ",")
+		}
+		configuration.NetworkSharedContainerName = spec.Windows.Network.NetworkSharedContainerName
+	}
+
+	if cs, ok := spec.Windows.CredentialSpec.(string); ok {
+		configuration.Credentials = cs
+	}
+
+	// We must have least two layers in the spec, the bottom one being a
+	// base image, the top one being the RW layer.
+	if spec.Windows.LayerFolders == nil || len(spec.Windows.LayerFolders) < 2 {
+		return fmt.Errorf("OCI spec is invalid - at least two LayerFolders must be supplied to the runtime")
+	}
+
+	// Strip off the top-most layer as that's passed in separately to HCS
+	configuration.LayerFolderPath = spec.Windows.LayerFolders[len(spec.Windows.LayerFolders)-1]
+	layerFolders := spec.Windows.LayerFolders[:len(spec.Windows.LayerFolders)-1]
+
+	if configuration.HvPartition {
+		// We don't currently support setting the utility VM image explicitly.
+		// TODO @swernli/jhowardmsft circa RS3/4, this may be re-locatable.
+		if spec.Windows.HyperV.UtilityVMPath != "" {
+			return errors.New("runtime does not support an explicit utility VM path for Hyper-V containers")
+		}
+
+		// Find the upper-most utility VM image.
+		var uvmImagePath string
+		for _, path := range layerFolders {
+			fullPath := filepath.Join(path, "UtilityVM")
+			_, err := os.Stat(fullPath)
+			if err == nil {
+				uvmImagePath = fullPath
+				break
+			}
+			if !os.IsNotExist(err) {
+				return err
+			}
+		}
+		if uvmImagePath == "" {
+			return errors.New("utility VM image could not be found")
+		}
+		configuration.HvRuntime = &hcsshim.HvRuntime{ImagePath: uvmImagePath}
+
+		if spec.Root.Path != "" {
+			return errors.New("OCI spec is invalid - Root.Path must be omitted for a Hyper-V container")
+		}
+	} else {
+		const volumeGUIDRegex = `^\\\\\?\\(Volume)\{{0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}(\}){0,1}\}\\$`
+		if _, err := regexp.MatchString(volumeGUIDRegex, spec.Root.Path); err != nil {
+			return fmt.Errorf(`OCI spec is invalid - Root.Path '%s' must be a volume GUID path in the format '\\?\Volume{GUID}\'`, spec.Root.Path)
+		}
+		// HCS API requires the trailing backslash to be removed
+		configuration.VolumePath = spec.Root.Path[:len(spec.Root.Path)-1]
+	}
+
+	if spec.Root.Readonly {
+		return errors.New(`OCI spec is invalid - Root.Readonly must not be set on Windows`)
+	}
+
+	for _, layerPath := range layerFolders {
+		_, filename := filepath.Split(layerPath)
+		g, err := hcsshim.NameToGuid(filename)
+		if err != nil {
+			return err
+		}
+		configuration.Layers = append(configuration.Layers, hcsshim.Layer{
+			ID:   g.ToString(),
+			Path: layerPath,
+		})
+	}
+
+	// Add the mounts (volumes, bind mounts etc) to the structure
+	var mds []hcsshim.MappedDir
+	var mps []hcsshim.MappedPipe
+	for _, mount := range spec.Mounts {
+		const pipePrefix = `\\.\pipe\`
+		if mount.Type != "" {
+			return fmt.Errorf("OCI spec is invalid - Mount.Type '%s' must not be set", mount.Type)
+		}
+		if strings.HasPrefix(mount.Destination, pipePrefix) {
+			mp := hcsshim.MappedPipe{
+				HostPath:          mount.Source,
+				ContainerPipeName: mount.Destination[len(pipePrefix):],
+			}
+			mps = append(mps, mp)
+		} else {
+			md := hcsshim.MappedDir{
+				HostPath:      mount.Source,
+				ContainerPath: mount.Destination,
+				ReadOnly:      false,
+			}
+			for _, o := range mount.Options {
+				if strings.ToLower(o) == "ro" {
+					md.ReadOnly = true
+				}
+			}
+			mds = append(mds, md)
+		}
+	}
+	configuration.MappedDirectories = mds
+	if len(mps) > 0 && system.GetOSVersion().Build < 16210 { // replace with Win10 RS3 build number at RTM
+		return errors.New("named pipe mounts are not supported on this version of Windows")
+	}
+	configuration.MappedPipes = mps
+
+	hcsContainer, err := hcsshim.CreateContainer(id, configuration)
+	if err != nil {
+		return err
+	}
+
+	// Construct a container object for calling start on it.
+	ctr := &container{
+		id:           id,
+		execs:        make(map[string]*process),
+		isWindows:    true,
+		ociSpec:      spec,
+		hcsContainer: hcsContainer,
+		status:       StatusCreated,
+		waitCh:       make(chan struct{}),
+	}
+
+	// Start the container. If this is a servicing container, this call
+	// will block until the container is done with the servicing
+	// execution.
+	logger.Debug("starting container")
+	if err = hcsContainer.Start(); err != nil {
+		c.logger.WithError(err).Error("failed to start container")
+		ctr.debugGCS()
+		if err := c.terminateContainer(ctr); err != nil {
+			c.logger.WithError(err).Error("failed to cleanup after a failed Start")
+		} else {
+			c.logger.Debug("cleaned up after failed Start by calling Terminate")
+		}
+		return err
+	}
+	ctr.debugGCS()
+
+	c.Lock()
+	c.containers[id] = ctr
+	c.Unlock()
+
+	logger.Debug("createWindows() completed successfully")
+	return nil
+
+}
+
+func (c *client) createLinux(id string, spec *specs.Spec, runtimeOptions interface{}) error {
+	logrus.Debugf("libcontainerd: createLinux(): containerId %s ", id)
+	logger := c.logger.WithField("container", id)
+
+	if runtimeOptions == nil {
+		return fmt.Errorf("lcow option must be supplied to the runtime")
+	}
+	lcowConfig, ok := runtimeOptions.(*opengcs.Config)
+	if !ok {
+		return fmt.Errorf("lcow option must be supplied to the runtime")
+	}
+
+	configuration := &hcsshim.ContainerConfig{
+		HvPartition:   true,
+		Name:          id,
+		SystemType:    "container",
+		ContainerType: "linux",
+		Owner:         defaultOwner,
+		TerminateOnLastHandleClosed: true,
+	}
+
+	if lcowConfig.ActualMode == opengcs.ModeActualVhdx {
+		configuration.HvRuntime = &hcsshim.HvRuntime{
+			ImagePath:          lcowConfig.Vhdx,
+			BootSource:         "Vhd",
+			WritableBootSource: false,
+		}
+	} else {
+		configuration.HvRuntime = &hcsshim.HvRuntime{
+			ImagePath:           lcowConfig.KirdPath,
+			LinuxKernelFile:     lcowConfig.KernelFile,
+			LinuxInitrdFile:     lcowConfig.InitrdFile,
+			LinuxBootParameters: lcowConfig.BootParameters,
+		}
+	}
+
+	if spec.Windows == nil {
+		return fmt.Errorf("spec.Windows must not be nil for LCOW containers")
+	}
+
+	// We must have least one layer in the spec
+	if spec.Windows.LayerFolders == nil || len(spec.Windows.LayerFolders) == 0 {
+		return fmt.Errorf("OCI spec is invalid - at least one LayerFolders must be supplied to the runtime")
+	}
+
+	// Strip off the top-most layer as that's passed in separately to HCS
+	configuration.LayerFolderPath = spec.Windows.LayerFolders[len(spec.Windows.LayerFolders)-1]
+	layerFolders := spec.Windows.LayerFolders[:len(spec.Windows.LayerFolders)-1]
+
+	for _, layerPath := range layerFolders {
+		_, filename := filepath.Split(layerPath)
+		g, err := hcsshim.NameToGuid(filename)
+		if err != nil {
+			return err
+		}
+		configuration.Layers = append(configuration.Layers, hcsshim.Layer{
+			ID:   g.ToString(),
+			Path: filepath.Join(layerPath, "layer.vhd"),
+		})
+	}
+
+	if spec.Windows.Network != nil {
+		configuration.EndpointList = spec.Windows.Network.EndpointList
+		configuration.AllowUnqualifiedDNSQuery = spec.Windows.Network.AllowUnqualifiedDNSQuery
+		if spec.Windows.Network.DNSSearchList != nil {
+			configuration.DNSSearchList = strings.Join(spec.Windows.Network.DNSSearchList, ",")
+		}
+		configuration.NetworkSharedContainerName = spec.Windows.Network.NetworkSharedContainerName
+	}
+
+	// Add the mounts (volumes, bind mounts etc) to the structure. We have to do
+	// some translation for both the mapped directories passed into HCS and in
+	// the spec.
+	//
+	// For HCS, we only pass in the mounts from the spec which are type "bind".
+	// Further, the "ContainerPath" field (which is a little mis-leadingly
+	// named when it applies to the utility VM rather than the container in the
+	// utility VM) is moved to under /tmp/gcs/<ID>/binds, where this is passed
+	// by the caller through a 'uvmpath' option.
+	//
+	// We do similar translation for the mounts in the spec by stripping out
+	// the uvmpath option, and translating the Source path to the location in the
+	// utility VM calculated above.
+	//
+	// From inside the utility VM, you would see a 9p mount such as in the following
+	// where a host folder has been mapped to /target. The line with /tmp/gcs/<ID>/binds
+	// specifically:
+	//
+	//	/ # mount
+	//	rootfs on / type rootfs (rw,size=463736k,nr_inodes=115934)
+	//	proc on /proc type proc (rw,relatime)
+	//	sysfs on /sys type sysfs (rw,relatime)
+	//	udev on /dev type devtmpfs (rw,relatime,size=498100k,nr_inodes=124525,mode=755)
+	//	tmpfs on /run type tmpfs (rw,relatime)
+	//	cgroup on /sys/fs/cgroup type cgroup (rw,relatime,cpuset,cpu,cpuacct,blkio,memory,devices,freezer,net_cls,perf_event,net_prio,hugetlb,pids,rdma)
+	//	mqueue on /dev/mqueue type mqueue (rw,relatime)
+	//	devpts on /dev/pts type devpts (rw,relatime,mode=600,ptmxmode=000)
+	//	/binds/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/target on /binds/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/target type 9p (rw,sync,dirsync,relatime,trans=fd,rfdno=6,wfdno=6)
+	//	/dev/pmem0 on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/layer0 type ext4 (ro,relatime,block_validity,delalloc,norecovery,barrier,dax,user_xattr,acl)
+	//	/dev/sda on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch type ext4 (rw,relatime,block_validity,delalloc,barrier,user_xattr,acl)
+	//	overlay on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/rootfs type overlay (rw,relatime,lowerdir=/tmp/base/:/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/layer0,upperdir=/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch/upper,workdir=/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch/work)
+	//
+	//  /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc # ls -l
+	//	total 16
+	//	drwx------    3 0        0               60 Sep  7 18:54 binds
+	//	-rw-r--r--    1 0        0             3345 Sep  7 18:54 config.json
+	//	drwxr-xr-x   10 0        0             4096 Sep  6 17:26 layer0
+	//	drwxr-xr-x    1 0        0             4096 Sep  7 18:54 rootfs
+	//	drwxr-xr-x    5 0        0             4096 Sep  7 18:54 scratch
+	//
+	//	/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc # ls -l binds
+	//	total 0
+	//	drwxrwxrwt    2 0        0             4096 Sep  7 16:51 target
+
+	mds := []hcsshim.MappedDir{}
+	specMounts := []specs.Mount{}
+	for _, mount := range spec.Mounts {
+		specMount := mount
+		if mount.Type == "bind" {
+			// Strip out the uvmpath from the options
+			updatedOptions := []string{}
+			uvmPath := ""
+			readonly := false
+			for _, opt := range mount.Options {
+				dropOption := false
+				elements := strings.SplitN(opt, "=", 2)
+				switch elements[0] {
+				case "uvmpath":
+					uvmPath = elements[1]
+					dropOption = true
+				case "rw":
+				case "ro":
+					readonly = true
+				case "rbind":
+				default:
+					return fmt.Errorf("unsupported option %q", opt)
+				}
+				if !dropOption {
+					updatedOptions = append(updatedOptions, opt)
+				}
+			}
+			mount.Options = updatedOptions
+			if uvmPath == "" {
+				return fmt.Errorf("no uvmpath for bind mount %+v", mount)
+			}
+			md := hcsshim.MappedDir{
+				HostPath:          mount.Source,
+				ContainerPath:     path.Join(uvmPath, mount.Destination),
+				CreateInUtilityVM: true,
+				ReadOnly:          readonly,
+			}
+			mds = append(mds, md)
+			specMount.Source = path.Join(uvmPath, mount.Destination)
+		}
+		specMounts = append(specMounts, specMount)
+	}
+	configuration.MappedDirectories = mds
+
+	hcsContainer, err := hcsshim.CreateContainer(id, configuration)
+	if err != nil {
+		return err
+	}
+
+	spec.Mounts = specMounts
+
+	// Construct a container object for calling start on it.
+	ctr := &container{
+		id:           id,
+		execs:        make(map[string]*process),
+		isWindows:    true,
+		ociSpec:      spec,
+		hcsContainer: hcsContainer,
+		status:       StatusCreated,
+		waitCh:       make(chan struct{}),
+	}
+
+	// Start the container. If this is a servicing container, this call
+	// will block until the container is done with the servicing
+	// execution.
+	logger.Debug("starting container")
+	if err = hcsContainer.Start(); err != nil {
+		c.logger.WithError(err).Error("failed to start container")
+		ctr.debugGCS()
+		if err := c.terminateContainer(ctr); err != nil {
+			c.logger.WithError(err).Error("failed to cleanup after a failed Start")
+		} else {
+			c.logger.Debug("cleaned up after failed Start by calling Terminate")
+		}
+		return err
+	}
+	ctr.debugGCS()
+
+	c.Lock()
+	c.containers[id] = ctr
+	c.Unlock()
+
+	c.eventQ.append(id, func() {
+		ei := EventInfo{
+			ContainerID: id,
+		}
+		c.logger.WithFields(logrus.Fields{
+			"container": ctr.id,
+			"event":     EventCreate,
+		}).Info("sending event")
+		err := c.backend.ProcessEvent(id, EventCreate, ei)
+		if err != nil {
+			c.logger.WithError(err).WithFields(logrus.Fields{
+				"container": id,
+				"event":     EventCreate,
+			}).Error("failed to process event")
+		}
+	})
+
+	logger.Debug("createLinux() completed successfully")
+	return nil
+}
+
+func (c *client) Start(_ context.Context, id, _ string, withStdin bool, attachStdio StdioCallback) (int, error) {
+	ctr := c.getContainer(id)
+	switch {
+	case ctr == nil:
+		return -1, errors.WithStack(newNotFoundError("no such container"))
+	case ctr.init != nil:
+		return -1, errors.WithStack(newConflictError("container already started"))
+	}
+
+	logger := c.logger.WithField("container", id)
+
+	// Note we always tell HCS to create stdout as it's required
+	// regardless of '-i' or '-t' options, so that docker can always grab
+	// the output through logs. We also tell HCS to always create stdin,
+	// even if it's not used - it will be closed shortly. Stderr is only
+	// created if it we're not -t.
+	var (
+		emulateConsole   bool
+		createStdErrPipe bool
+	)
+	if ctr.ociSpec.Process != nil {
+		emulateConsole = ctr.ociSpec.Process.Terminal
+		createStdErrPipe = !ctr.ociSpec.Process.Terminal && !ctr.ociSpec.Windows.Servicing
+	}
+
+	createProcessParms := &hcsshim.ProcessConfig{
+		EmulateConsole:   emulateConsole,
+		WorkingDirectory: ctr.ociSpec.Process.Cwd,
+		CreateStdInPipe:  !ctr.ociSpec.Windows.Servicing,
+		CreateStdOutPipe: !ctr.ociSpec.Windows.Servicing,
+		CreateStdErrPipe: createStdErrPipe,
+	}
+
+	if ctr.ociSpec.Process != nil && ctr.ociSpec.Process.ConsoleSize != nil {
+		createProcessParms.ConsoleSize[0] = uint(ctr.ociSpec.Process.ConsoleSize.Height)
+		createProcessParms.ConsoleSize[1] = uint(ctr.ociSpec.Process.ConsoleSize.Width)
+	}
+
+	// Configure the environment for the process
+	createProcessParms.Environment = setupEnvironmentVariables(ctr.ociSpec.Process.Env)
+	if ctr.isWindows {
+		createProcessParms.CommandLine = strings.Join(ctr.ociSpec.Process.Args, " ")
+	} else {
+		createProcessParms.CommandArgs = ctr.ociSpec.Process.Args
+	}
+	createProcessParms.User = ctr.ociSpec.Process.User.Username
+
+	// LCOW requires the raw OCI spec passed through HCS and onwards to
+	// GCS for the utility VM.
+	if !ctr.isWindows {
+		ociBuf, err := json.Marshal(ctr.ociSpec)
+		if err != nil {
+			return -1, err
+		}
+		ociRaw := json.RawMessage(ociBuf)
+		createProcessParms.OCISpecification = &ociRaw
+	}
+
+	ctr.Lock()
+	defer ctr.Unlock()
+
+	// Start the command running in the container.
+	newProcess, err := ctr.hcsContainer.CreateProcess(createProcessParms)
+	if err != nil {
+		logger.WithError(err).Error("CreateProcess() failed")
+		return -1, err
+	}
+	defer func() {
+		if err != nil {
+			if err := newProcess.Kill(); err != nil {
+				logger.WithError(err).Error("failed to kill process")
+			}
+			go func() {
+				if err := newProcess.Wait(); err != nil {
+					logger.WithError(err).Error("failed to wait for process")
+				}
+				if err := newProcess.Close(); err != nil {
+					logger.WithError(err).Error("failed to clean process resources")
+				}
+			}()
+		}
+	}()
+	p := &process{
+		hcsProcess: newProcess,
+		id:         InitProcessName,
+		pid:        newProcess.Pid(),
+	}
+	logger.WithField("pid", p.pid).Debug("init process started")
+
+	// If this is a servicing container, wait on the process synchronously here and
+	// if it succeeds, wait for it cleanly shutdown and merge into the parent container.
+	if ctr.ociSpec.Windows.Servicing {
+		// reapProcess takes the lock
+		ctr.Unlock()
+		defer ctr.Lock()
+		exitCode := c.reapProcess(ctr, p)
+
+		if exitCode != 0 {
+			return -1, errors.Errorf("libcontainerd: servicing container %s returned non-zero exit code %d", ctr.id, exitCode)
+		}
+
+		return p.pid, nil
+	}
+
+	var (
+		stdout, stderr io.ReadCloser
+		stdin          io.WriteCloser
+	)
+	stdin, stdout, stderr, err = newProcess.Stdio()
+	if err != nil {
+		logger.WithError(err).Error("failed to get stdio pipes")
+		return -1, err
+	}
+
+	iopipe := &IOPipe{Terminal: ctr.ociSpec.Process.Terminal}
+	iopipe.Stdin = createStdInCloser(stdin, newProcess)
+
+	// Convert io.ReadClosers to io.Readers
+	if stdout != nil {
+		iopipe.Stdout = ioutil.NopCloser(&autoClosingReader{ReadCloser: stdout})
+	}
+	if stderr != nil {
+		iopipe.Stderr = ioutil.NopCloser(&autoClosingReader{ReadCloser: stderr})
+	}
+
+	_, err = attachStdio(iopipe)
+	if err != nil {
+		logger.WithError(err).Error("failed to attache stdio")
+		return -1, err
+	}
+	ctr.status = StatusRunning
+	ctr.init = p
+
+	// Spin up a go routine waiting for exit to handle cleanup
+	go c.reapProcess(ctr, p)
+
+	// Generate the associated event
+	c.eventQ.append(id, func() {
+		ei := EventInfo{
+			ContainerID: id,
+			ProcessID:   InitProcessName,
+			Pid:         uint32(p.pid),
+		}
+		c.logger.WithFields(logrus.Fields{
+			"container":  ctr.id,
+			"event":      EventStart,
+			"event-info": ei,
+		}).Info("sending event")
+		err := c.backend.ProcessEvent(ei.ContainerID, EventStart, ei)
+		if err != nil {
+			c.logger.WithError(err).WithFields(logrus.Fields{
+				"container":  id,
+				"event":      EventStart,
+				"event-info": ei,
+			}).Error("failed to process event")
+		}
+	})
+	logger.Debug("start() completed")
+	return p.pid, nil
+}
+
+// Exec adds a process in an running container
+func (c *client) Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio StdioCallback) (int, error) {
+	ctr := c.getContainer(containerID)
+	switch {
+	case ctr == nil:
+		return -1, errors.WithStack(newNotFoundError("no such container"))
+	case ctr.hcsContainer == nil:
+		return -1, errors.WithStack(newInvalidParameterError("container is not running"))
+	case ctr.execs != nil && ctr.execs[processID] != nil:
+		return -1, errors.WithStack(newConflictError("id already in use"))
+	}
+	logger := c.logger.WithFields(logrus.Fields{
+		"container": containerID,
+		"exec":      processID,
+	})
+
+	// Note we always tell HCS to
+	// create stdout as it's required regardless of '-i' or '-t' options, so that
+	// docker can always grab the output through logs. We also tell HCS to always
+	// create stdin, even if it's not used - it will be closed shortly. Stderr
+	// is only created if it we're not -t.
+	createProcessParms := hcsshim.ProcessConfig{
+		CreateStdInPipe:  true,
+		CreateStdOutPipe: true,
+		CreateStdErrPipe: !spec.Terminal,
+	}
+	if spec.Terminal {
+		createProcessParms.EmulateConsole = true
+		if spec.ConsoleSize != nil {
+			createProcessParms.ConsoleSize[0] = uint(spec.ConsoleSize.Height)
+			createProcessParms.ConsoleSize[1] = uint(spec.ConsoleSize.Width)
+		}
+	}
+
+	// Take working directory from the process to add if it is defined,
+	// otherwise take from the first process.
+	if spec.Cwd != "" {
+		createProcessParms.WorkingDirectory = spec.Cwd
+	} else {
+		createProcessParms.WorkingDirectory = ctr.ociSpec.Process.Cwd
+	}
+
+	// Configure the environment for the process
+	createProcessParms.Environment = setupEnvironmentVariables(spec.Env)
+	if ctr.isWindows {
+		createProcessParms.CommandLine = strings.Join(spec.Args, " ")
+	} else {
+		createProcessParms.CommandArgs = spec.Args
+	}
+	createProcessParms.User = spec.User.Username
+
+	logger.Debugf("exec commandLine: %s", createProcessParms.CommandLine)
+
+	// Start the command running in the container.
+	var (
+		stdout, stderr io.ReadCloser
+		stdin          io.WriteCloser
+	)
+	newProcess, err := ctr.hcsContainer.CreateProcess(&createProcessParms)
+	if err != nil {
+		logger.WithError(err).Errorf("exec's CreateProcess() failed")
+		return -1, err
+	}
+	pid := newProcess.Pid()
+	defer func() {
+		if err != nil {
+			if err := newProcess.Kill(); err != nil {
+				logger.WithError(err).Error("failed to kill process")
+			}
+			go func() {
+				if err := newProcess.Wait(); err != nil {
+					logger.WithError(err).Error("failed to wait for process")
+				}
+				if err := newProcess.Close(); err != nil {
+					logger.WithError(err).Error("failed to clean process resources")
+				}
+			}()
+		}
+	}()
+
+	stdin, stdout, stderr, err = newProcess.Stdio()
+	if err != nil {
+		logger.WithError(err).Error("getting std pipes failed")
+		return -1, err
+	}
+
+	iopipe := &IOPipe{Terminal: spec.Terminal}
+	iopipe.Stdin = createStdInCloser(stdin, newProcess)
+
+	// Convert io.ReadClosers to io.Readers
+	if stdout != nil {
+		iopipe.Stdout = ioutil.NopCloser(&autoClosingReader{ReadCloser: stdout})
+	}
+	if stderr != nil {
+		iopipe.Stderr = ioutil.NopCloser(&autoClosingReader{ReadCloser: stderr})
+	}
+
+	// Tell the engine to attach streams back to the client
+	_, err = attachStdio(iopipe)
+	if err != nil {
+		return -1, err
+	}
+
+	p := &process{
+		id:         processID,
+		pid:        pid,
+		hcsProcess: newProcess,
+	}
+
+	// Add the process to the container's list of processes
+	ctr.Lock()
+	ctr.execs[processID] = p
+	ctr.Unlock()
+
+	// Spin up a go routine waiting for exit to handle cleanup
+	go c.reapProcess(ctr, p)
+
+	c.eventQ.append(ctr.id, func() {
+		ei := EventInfo{
+			ContainerID: ctr.id,
+			ProcessID:   p.id,
+			Pid:         uint32(p.pid),
+		}
+		c.logger.WithFields(logrus.Fields{
+			"container":  ctr.id,
+			"event":      EventExecAdded,
+			"event-info": ei,
+		}).Info("sending event")
+		err := c.backend.ProcessEvent(ctr.id, EventExecAdded, ei)
+		if err != nil {
+			c.logger.WithError(err).WithFields(logrus.Fields{
+				"container":  ctr.id,
+				"event":      EventExecAdded,
+				"event-info": ei,
+			}).Error("failed to process event")
+		}
+		err = c.backend.ProcessEvent(ctr.id, EventExecStarted, ei)
+		if err != nil {
+			c.logger.WithError(err).WithFields(logrus.Fields{
+				"container":  ctr.id,
+				"event":      EventExecStarted,
+				"event-info": ei,
+			}).Error("failed to process event")
+		}
+	})
+
+	return pid, nil
+}
+
+// Signal handles `docker stop` on Windows. While Linux has support for
+// the full range of signals, signals aren't really implemented on Windows.
+// We fake supporting regular stop and -9 to force kill.
+func (c *client) SignalProcess(_ context.Context, containerID, processID string, signal int) error {
+	ctr, p, err := c.getProcess(containerID, processID)
+	if err != nil {
+		return err
+	}
+
+	ctr.manualStopRequested = true
+
+	logger := c.logger.WithFields(logrus.Fields{
+		"container": containerID,
+		"process":   processID,
+		"pid":       p.pid,
+		"signal":    signal,
+	})
+	logger.Debug("Signal()")
+
+	if processID == InitProcessName {
+		if syscall.Signal(signal) == syscall.SIGKILL {
+			// Terminate the compute system
+			if err := ctr.hcsContainer.Terminate(); err != nil {
+				if !hcsshim.IsPending(err) {
+					logger.WithError(err).Error("failed to terminate hccshim container")
+				}
+			}
+		} else {
+			// Shut down the container
+			if err := ctr.hcsContainer.Shutdown(); err != nil {
+				if !hcsshim.IsPending(err) && !hcsshim.IsAlreadyStopped(err) {
+					// ignore errors
+					logger.WithError(err).Error("failed to shutdown hccshim container")
+				}
+			}
+		}
+	} else {
+		return p.hcsProcess.Kill()
+	}
+
+	return nil
+}
+
+// Resize handles a CLI event to resize an interactive docker run or docker
+// exec window.
+func (c *client) ResizeTerminal(_ context.Context, containerID, processID string, width, height int) error {
+	_, p, err := c.getProcess(containerID, processID)
+	if err != nil {
+		return err
+	}
+
+	c.logger.WithFields(logrus.Fields{
+		"container": containerID,
+		"process":   processID,
+		"height":    height,
+		"width":     width,
+		"pid":       p.pid,
+	}).Debug("resizing")
+	return p.hcsProcess.ResizeConsole(uint16(height), uint16(width))
+}
+
+func (c *client) CloseStdin(_ context.Context, containerID, processID string) error {
+	_, p, err := c.getProcess(containerID, processID)
+	if err != nil {
+		return err
+	}
+
+	return p.hcsProcess.CloseStdin()
+}
+
+// Pause handles pause requests for containers
+func (c *client) Pause(_ context.Context, containerID string) error {
+	ctr, _, err := c.getProcess(containerID, InitProcessName)
+	if err != nil {
+		return err
+	}
+
+	if ctr.ociSpec.Windows.HyperV == nil {
+		return errors.New("cannot pause Windows Server Containers")
+	}
+
+	ctr.Lock()
+	defer ctr.Unlock()
+
+	if err = ctr.hcsContainer.Pause(); err != nil {
+		return err
+	}
+
+	ctr.status = StatusPaused
+
+	c.eventQ.append(containerID, func() {
+		err := c.backend.ProcessEvent(containerID, EventPaused, EventInfo{
+			ContainerID: containerID,
+			ProcessID:   InitProcessName,
+		})
+		c.logger.WithFields(logrus.Fields{
+			"container": ctr.id,
+			"event":     EventPaused,
+		}).Info("sending event")
+		if err != nil {
+			c.logger.WithError(err).WithFields(logrus.Fields{
+				"container": containerID,
+				"event":     EventPaused,
+			}).Error("failed to process event")
+		}
+	})
+
+	return nil
+}
+
+// Resume handles resume requests for containers
+func (c *client) Resume(_ context.Context, containerID string) error {
+	ctr, _, err := c.getProcess(containerID, InitProcessName)
+	if err != nil {
+		return err
+	}
+
+	if ctr.ociSpec.Windows.HyperV == nil {
+		return errors.New("cannot resume Windows Server Containers")
+	}
+
+	ctr.Lock()
+	defer ctr.Unlock()
+
+	if err = ctr.hcsContainer.Resume(); err != nil {
+		return err
+	}
+
+	ctr.status = StatusRunning
+
+	c.eventQ.append(containerID, func() {
+		err := c.backend.ProcessEvent(containerID, EventResumed, EventInfo{
+			ContainerID: containerID,
+			ProcessID:   InitProcessName,
+		})
+		c.logger.WithFields(logrus.Fields{
+			"container": ctr.id,
+			"event":     EventResumed,
+		}).Info("sending event")
+		if err != nil {
+			c.logger.WithError(err).WithFields(logrus.Fields{
+				"container": containerID,
+				"event":     EventResumed,
+			}).Error("failed to process event")
+		}
+	})
+
+	return nil
+}
+
+// Stats handles stats requests for containers
+func (c *client) Stats(_ context.Context, containerID string) (*Stats, error) {
+	ctr, _, err := c.getProcess(containerID, InitProcessName)
+	if err != nil {
+		return nil, err
+	}
+
+	readAt := time.Now()
+	s, err := ctr.hcsContainer.Statistics()
+	if err != nil {
+		return nil, err
+	}
+	return &Stats{
+		Read:     readAt,
+		HCSStats: &s,
+	}, nil
+}
+
+// Restore is the handler for restoring a container
+func (c *client) Restore(ctx context.Context, id string, attachStdio StdioCallback) (bool, int, error) {
+	c.logger.WithField("container", id).Debug("restore()")
+
+	// TODO Windows: On RS1, a re-attach isn't possible.
+	// However, there is a scenario in which there is an issue.
+	// Consider a background container. The daemon dies unexpectedly.
+	// HCS will still have the compute service alive and running.
+	// For consistence, we call in to shoot it regardless if HCS knows about it
+	// We explicitly just log a warning if the terminate fails.
+	// Then we tell the backend the container exited.
+	if hc, err := hcsshim.OpenContainer(id); err == nil {
+		const terminateTimeout = time.Minute * 2
+		err := hc.Terminate()
+
+		if hcsshim.IsPending(err) {
+			err = hc.WaitTimeout(terminateTimeout)
+		} else if hcsshim.IsAlreadyStopped(err) {
+			err = nil
+		}
+
+		if err != nil {
+			c.logger.WithField("container", id).WithError(err).Debug("terminate failed on restore")
+			return false, -1, err
+		}
+	}
+	return false, -1, nil
+}
+
+// GetPidsForContainer returns a list of process IDs running in a container.
+// Not used on Windows.
+func (c *client) ListPids(_ context.Context, _ string) ([]uint32, error) {
+	return nil, errors.New("not implemented on Windows")
+}
+
+// Summary returns a summary of the processes running in a container.
+// This is present in Windows to support docker top. In linux, the
+// engine shells out to ps to get process information. On Windows, as
+// the containers could be Hyper-V containers, they would not be
+// visible on the container host. However, libcontainerd does have
+// that information.
+func (c *client) Summary(_ context.Context, containerID string) ([]Summary, error) {
+	ctr, _, err := c.getProcess(containerID, InitProcessName)
+	if err != nil {
+		return nil, err
+	}
+
+	p, err := ctr.hcsContainer.ProcessList()
+	if err != nil {
+		return nil, err
+	}
+
+	pl := make([]Summary, len(p))
+	for i := range p {
+		pl[i] = Summary(p[i])
+	}
+	return pl, nil
+}
+
+func (c *client) DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error) {
+	ec := -1
+	ctr := c.getContainer(containerID)
+	if ctr == nil {
+		return uint32(ec), time.Now(), errors.WithStack(newNotFoundError("no such container"))
+	}
+
+	select {
+	case <-ctx.Done():
+		return uint32(ec), time.Now(), errors.WithStack(ctx.Err())
+	case <-ctr.waitCh:
+	default:
+		return uint32(ec), time.Now(), errors.New("container is not stopped")
+	}
+
+	ctr.Lock()
+	defer ctr.Unlock()
+	return ctr.exitCode, ctr.exitedAt, nil
+}
+
+func (c *client) Delete(_ context.Context, containerID string) error {
+	c.Lock()
+	defer c.Unlock()
+	ctr := c.containers[containerID]
+	if ctr == nil {
+		return errors.WithStack(newNotFoundError("no such container"))
+	}
+
+	ctr.Lock()
+	defer ctr.Unlock()
+
+	switch ctr.status {
+	case StatusCreated:
+		if err := c.shutdownContainer(ctr); err != nil {
+			return err
+		}
+		fallthrough
+	case StatusStopped:
+		delete(c.containers, containerID)
+		return nil
+	}
+
+	return errors.WithStack(newInvalidParameterError("container is not stopped"))
+}
+
+func (c *client) Status(ctx context.Context, containerID string) (Status, error) {
+	c.Lock()
+	defer c.Unlock()
+	ctr := c.containers[containerID]
+	if ctr == nil {
+		return StatusUnknown, errors.WithStack(newNotFoundError("no such container"))
+	}
+
+	ctr.Lock()
+	defer ctr.Unlock()
+	return ctr.status, nil
+}
+
+func (c *client) UpdateResources(ctx context.Context, containerID string, resources *Resources) error {
+	// Updating resource isn't supported on Windows
+	// but we should return nil for enabling updating container
+	return nil
+}
+
+func (c *client) CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error {
+	return errors.New("Windows: Containers do not support checkpoints")
+}
+
+func (c *client) getContainer(id string) *container {
+	c.Lock()
+	ctr := c.containers[id]
+	c.Unlock()
+
+	return ctr
+}
+
+func (c *client) getProcess(containerID, processID string) (*container, *process, error) {
+	ctr := c.getContainer(containerID)
+	switch {
+	case ctr == nil:
+		return nil, nil, errors.WithStack(newNotFoundError("no such container"))
+	case ctr.init == nil:
+		return nil, nil, errors.WithStack(newNotFoundError("container is not running"))
+	case processID == InitProcessName:
+		return ctr, ctr.init, nil
+	default:
+		ctr.Lock()
+		defer ctr.Unlock()
+		if ctr.execs == nil {
+			return nil, nil, errors.WithStack(newNotFoundError("no execs"))
+		}
+	}
+
+	p := ctr.execs[processID]
+	if p == nil {
+		return nil, nil, errors.WithStack(newNotFoundError("no such exec"))
+	}
+
+	return ctr, p, nil
+}
+
+func (c *client) shutdownContainer(ctr *container) error {
+	const shutdownTimeout = time.Minute * 5
+	err := ctr.hcsContainer.Shutdown()
+
+	if hcsshim.IsPending(err) {
+		err = ctr.hcsContainer.WaitTimeout(shutdownTimeout)
+	} else if hcsshim.IsAlreadyStopped(err) {
+		err = nil
+	}
+
+	if err != nil {
+		c.logger.WithError(err).WithField("container", ctr.id).
+			Debug("failed to shutdown container, terminating it")
+		return c.terminateContainer(ctr)
+	}
+
+	return nil
+}
+
+func (c *client) terminateContainer(ctr *container) error {
+	const terminateTimeout = time.Minute * 5
+	err := ctr.hcsContainer.Terminate()
+
+	if hcsshim.IsPending(err) {
+		err = ctr.hcsContainer.WaitTimeout(terminateTimeout)
+	} else if hcsshim.IsAlreadyStopped(err) {
+		err = nil
+	}
+
+	if err != nil {
+		c.logger.WithError(err).WithField("container", ctr.id).
+			Debug("failed to terminate container")
+		return err
+	}
+
+	return nil
+}
+
+func (c *client) reapProcess(ctr *container, p *process) int {
+	logger := c.logger.WithFields(logrus.Fields{
+		"container": ctr.id,
+		"process":   p.id,
+	})
+
+	// Block indefinitely for the process to exit.
+	if err := p.hcsProcess.Wait(); err != nil {
+		if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE {
+			logger.WithError(err).Warnf("Wait() failed (container may have been killed)")
+		}
+		// Fall through here, do not return. This ensures we attempt to
+		// continue the shutdown in HCS and tell the docker engine that the
+		// process/container has exited to avoid a container being dropped on
+		// the floor.
+	}
+	exitedAt := time.Now()
+
+	exitCode, err := p.hcsProcess.ExitCode()
+	if err != nil {
+		if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE {
+			logger.WithError(err).Warnf("unable to get exit code for process")
+		}
+		// Since we got an error retrieving the exit code, make sure that the
+		// code we return doesn't incorrectly indicate success.
+		exitCode = -1
+
+		// Fall through here, do not return. This ensures we attempt to
+		// continue the shutdown in HCS and tell the docker engine that the
+		// process/container has exited to avoid a container being dropped on
+		// the floor.
+	}
+
+	if err := p.hcsProcess.Close(); err != nil {
+		logger.WithError(err).Warnf("failed to cleanup hcs process resources")
+	}
+
+	var pendingUpdates bool
+	if p.id == InitProcessName {
+		// Update container status
+		ctr.Lock()
+		ctr.status = StatusStopped
+		ctr.exitedAt = exitedAt
+		ctr.exitCode = uint32(exitCode)
+		close(ctr.waitCh)
+		ctr.Unlock()
+
+		// Handle any servicing
+		if exitCode == 0 && ctr.isWindows && !ctr.ociSpec.Windows.Servicing {
+			pendingUpdates, err = ctr.hcsContainer.HasPendingUpdates()
+			logger.Infof("Pending updates: %v", pendingUpdates)
+			if err != nil {
+				logger.WithError(err).
+					Warnf("failed to check for pending updates (container may have been killed)")
+			}
+		}
+
+		if err := c.shutdownContainer(ctr); err != nil {
+			logger.WithError(err).Warn("failed to shutdown container")
+		} else {
+			logger.Debug("completed container shutdown")
+		}
+
+		if err := ctr.hcsContainer.Close(); err != nil {
+			logger.WithError(err).Error("failed to clean hcs container resources")
+		}
+	}
+
+	if !(ctr.isWindows && ctr.ociSpec.Windows.Servicing) {
+		c.eventQ.append(ctr.id, func() {
+			ei := EventInfo{
+				ContainerID:   ctr.id,
+				ProcessID:     p.id,
+				Pid:           uint32(p.pid),
+				ExitCode:      uint32(exitCode),
+				ExitedAt:      exitedAt,
+				UpdatePending: pendingUpdates,
+			}
+			c.logger.WithFields(logrus.Fields{
+				"container":  ctr.id,
+				"event":      EventExit,
+				"event-info": ei,
+			}).Info("sending event")
+			err := c.backend.ProcessEvent(ctr.id, EventExit, ei)
+			if err != nil {
+				c.logger.WithError(err).WithFields(logrus.Fields{
+					"container":  ctr.id,
+					"event":      EventExit,
+					"event-info": ei,
+				}).Error("failed to process event")
+			}
+			if p.id != InitProcessName {
+				ctr.Lock()
+				delete(ctr.execs, p.id)
+				ctr.Unlock()
+			}
+		})
+	}
+
+	return exitCode
+}

+ 0 - 104
libcontainerd/client_solaris.go

@@ -1,104 +0,0 @@
-package libcontainerd
-
-import (
-	containerd "github.com/containerd/containerd/api/grpc/types"
-	"golang.org/x/net/context"
-)
-
-type client struct {
-	clientCommon
-
-	// Platform specific properties below here.
-	remote        *remote
-	q             queue
-	exitNotifiers map[string]*exitNotifier
-	liveRestore   bool
-}
-
-// GetServerVersion returns the connected server version information
-func (clnt *client) GetServerVersion(ctx context.Context) (*ServerVersion, error) {
-	resp, err := clnt.remote.apiClient.GetServerVersion(ctx, &containerd.GetServerVersionRequest{})
-	if err != nil {
-		return nil, err
-	}
-
-	sv := &ServerVersion{
-		GetServerVersionResponse: *resp,
-	}
-
-	return sv, nil
-}
-
-func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendlyName string, specp Process, attachStdio StdioCallback) (int, error) {
-	return -1, nil
-}
-
-func (clnt *client) SignalProcess(containerID string, pid string, sig int) error {
-	return nil
-}
-
-func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error {
-	return nil
-}
-
-func (clnt *client) Pause(containerID string) error {
-	return nil
-}
-
-func (clnt *client) Resume(containerID string) error {
-	return nil
-}
-
-func (clnt *client) Stats(containerID string) (*Stats, error) {
-	return nil, nil
-}
-
-func (clnt *client) getExitNotifier(containerID string) *exitNotifier {
-	clnt.mapMutex.RLock()
-	defer clnt.mapMutex.RUnlock()
-	return clnt.exitNotifiers[containerID]
-}
-
-func (clnt *client) getOrCreateExitNotifier(containerID string) *exitNotifier {
-	clnt.mapMutex.Lock()
-	defer clnt.mapMutex.Unlock()
-	w, ok := clnt.exitNotifiers[containerID]
-	if !ok {
-		w = &exitNotifier{c: make(chan struct{}), client: clnt}
-		clnt.exitNotifiers[containerID] = w
-	}
-	return w
-}
-
-// Restore is the handler for restoring a container
-func (clnt *client) Restore(containerID string, attachStdio StdioCallback, options ...CreateOption) error {
-	return nil
-}
-
-func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) {
-	return nil, nil
-}
-
-// Summary returns a summary of the processes running in a container.
-func (clnt *client) Summary(containerID string) ([]Summary, error) {
-	return nil, nil
-}
-
-// UpdateResources updates resources for a running container.
-func (clnt *client) UpdateResources(containerID string, resources Resources) error {
-	// Updating resource isn't supported on Solaris
-	// but we should return nil for enabling updating container
-	return nil
-}
-
-func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error {
-	return nil
-}
-
-func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error {
-	return nil
-}
-
-func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) {
-	return nil, nil
-}

+ 0 - 141
libcontainerd/client_unix.go

@@ -1,141 +0,0 @@
-// +build linux solaris
-
-package libcontainerd
-
-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"path/filepath"
-	"strings"
-	"sync"
-
-	containerd "github.com/containerd/containerd/api/grpc/types"
-	"github.com/docker/docker/pkg/idtools"
-	specs "github.com/opencontainers/runtime-spec/specs-go"
-	"github.com/sirupsen/logrus"
-	"golang.org/x/net/context"
-)
-
-func (clnt *client) prepareBundleDir(uid, gid int) (string, error) {
-	root, err := filepath.Abs(clnt.remote.stateDir)
-	if err != nil {
-		return "", err
-	}
-	if uid == 0 && gid == 0 {
-		return root, nil
-	}
-	p := string(filepath.Separator)
-	for _, d := range strings.Split(root, string(filepath.Separator))[1:] {
-		p = filepath.Join(p, d)
-		fi, err := os.Stat(p)
-		if err != nil && !os.IsNotExist(err) {
-			return "", err
-		}
-		if os.IsNotExist(err) || fi.Mode()&1 == 0 {
-			p = fmt.Sprintf("%s.%d.%d", p, uid, gid)
-			if err := idtools.MkdirAndChown(p, 0700, idtools.IDPair{uid, gid}); err != nil && !os.IsExist(err) {
-				return "", err
-			}
-		}
-	}
-	return p, nil
-}
-
-func (clnt *client) Create(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) (err error) {
-	clnt.lock(containerID)
-	defer clnt.unlock(containerID)
-
-	if _, err := clnt.getContainer(containerID); err == nil {
-		return fmt.Errorf("Container %s is already active", containerID)
-	}
-
-	uid, gid, err := getRootIDs(spec)
-	if err != nil {
-		return err
-	}
-	dir, err := clnt.prepareBundleDir(uid, gid)
-	if err != nil {
-		return err
-	}
-
-	container := clnt.newContainer(filepath.Join(dir, containerID), options...)
-	if err := container.clean(); err != nil {
-		return err
-	}
-
-	defer func() {
-		if err != nil {
-			container.clean()
-			clnt.deleteContainer(containerID)
-		}
-	}()
-
-	if err := idtools.MkdirAllAndChown(container.dir, 0700, idtools.IDPair{uid, gid}); err != nil && !os.IsExist(err) {
-		return err
-	}
-
-	f, err := os.Create(filepath.Join(container.dir, configFilename))
-	if err != nil {
-		return err
-	}
-	defer f.Close()
-	if err := json.NewEncoder(f).Encode(spec); err != nil {
-		return err
-	}
-	return container.start(&spec, checkpoint, checkpointDir, attachStdio)
-}
-
-func (clnt *client) Signal(containerID string, sig int) error {
-	clnt.lock(containerID)
-	defer clnt.unlock(containerID)
-	_, err := clnt.remote.apiClient.Signal(context.Background(), &containerd.SignalRequest{
-		Id:     containerID,
-		Pid:    InitFriendlyName,
-		Signal: uint32(sig),
-	})
-	return err
-}
-
-func (clnt *client) newContainer(dir string, options ...CreateOption) *container {
-	container := &container{
-		containerCommon: containerCommon{
-			process: process{
-				dir: dir,
-				processCommon: processCommon{
-					containerID:  filepath.Base(dir),
-					client:       clnt,
-					friendlyName: InitFriendlyName,
-				},
-			},
-			processes: make(map[string]*process),
-		},
-	}
-	for _, option := range options {
-		if err := option.Apply(container); err != nil {
-			logrus.Errorf("libcontainerd: newContainer(): %v", err)
-		}
-	}
-	return container
-}
-
-type exitNotifier struct {
-	id     string
-	client *client
-	c      chan struct{}
-	once   sync.Once
-}
-
-func (en *exitNotifier) close() {
-	en.once.Do(func() {
-		close(en.c)
-		en.client.mapMutex.Lock()
-		if en == en.client.exitNotifiers[en.id] {
-			delete(en.client.exitNotifiers, en.id)
-		}
-		en.client.mapMutex.Unlock()
-	})
-}
-func (en *exitNotifier) wait() <-chan struct{} {
-	return en.c
-}

+ 0 - 886
libcontainerd/client_windows.go

@@ -1,886 +0,0 @@
-package libcontainerd
-
-import (
-	"encoding/json"
-	"errors"
-	"fmt"
-	"io"
-	"io/ioutil"
-	"os"
-	"path"
-	"path/filepath"
-	"regexp"
-	"strings"
-	"syscall"
-	"time"
-
-	"golang.org/x/net/context"
-
-	"github.com/Microsoft/hcsshim"
-	opengcs "github.com/Microsoft/opengcs/client"
-	"github.com/docker/docker/pkg/sysinfo"
-	"github.com/docker/docker/pkg/system"
-	specs "github.com/opencontainers/runtime-spec/specs-go"
-	"github.com/sirupsen/logrus"
-)
-
-type client struct {
-	clientCommon
-
-	// Platform specific properties below here (none presently on Windows)
-}
-
-// Win32 error codes that are used for various workarounds
-// These really should be ALL_CAPS to match golangs syscall library and standard
-// Win32 error conventions, but golint insists on CamelCase.
-const (
-	CoEClassstring     = syscall.Errno(0x800401F3) // Invalid class string
-	ErrorNoNetwork     = syscall.Errno(1222)       // The network is not present or not started
-	ErrorBadPathname   = syscall.Errno(161)        // The specified path is invalid
-	ErrorInvalidObject = syscall.Errno(0x800710D8) // The object identifier does not represent a valid object
-)
-
-// defaultOwner is a tag passed to HCS to allow it to differentiate between
-// container creator management stacks. We hard code "docker" in the case
-// of docker.
-const defaultOwner = "docker"
-
-// Create is the entrypoint to create a container from a spec, and if successfully
-// created, start it too. Table below shows the fields required for HCS JSON calling parameters,
-// where if not populated, is omitted.
-// +-----------------+--------------------------------------------+---------------------------------------------------+
-// |                 | Isolation=Process                          | Isolation=Hyper-V                                 |
-// +-----------------+--------------------------------------------+---------------------------------------------------+
-// | VolumePath      | \\?\\Volume{GUIDa}                         |                                                   |
-// | LayerFolderPath | %root%\windowsfilter\containerID           | %root%\windowsfilter\containerID (servicing only) |
-// | Layers[]        | ID=GUIDb;Path=%root%\windowsfilter\layerID | ID=GUIDb;Path=%root%\windowsfilter\layerID        |
-// | HvRuntime       |                                            | ImagePath=%root%\BaseLayerID\UtilityVM            |
-// +-----------------+--------------------------------------------+---------------------------------------------------+
-//
-// Isolation=Process example:
-//
-// {
-//	"SystemType": "Container",
-//	"Name": "5e0055c814a6005b8e57ac59f9a522066e0af12b48b3c26a9416e23907698776",
-//	"Owner": "docker",
-//	"VolumePath": "\\\\\\\\?\\\\Volume{66d1ef4c-7a00-11e6-8948-00155ddbef9d}",
-//	"IgnoreFlushesDuringBoot": true,
-//	"LayerFolderPath": "C:\\\\control\\\\windowsfilter\\\\5e0055c814a6005b8e57ac59f9a522066e0af12b48b3c26a9416e23907698776",
-//	"Layers": [{
-//		"ID": "18955d65-d45a-557b-bf1c-49d6dfefc526",
-//		"Path": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c"
-//	}],
-//	"HostName": "5e0055c814a6",
-//	"MappedDirectories": [],
-//	"HvPartition": false,
-//	"EndpointList": ["eef2649d-bb17-4d53-9937-295a8efe6f2c"],
-//	"Servicing": false
-//}
-//
-// Isolation=Hyper-V example:
-//
-//{
-//	"SystemType": "Container",
-//	"Name": "475c2c58933b72687a88a441e7e0ca4bd72d76413c5f9d5031fee83b98f6045d",
-//	"Owner": "docker",
-//	"IgnoreFlushesDuringBoot": true,
-//	"Layers": [{
-//		"ID": "18955d65-d45a-557b-bf1c-49d6dfefc526",
-//		"Path": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c"
-//	}],
-//	"HostName": "475c2c58933b",
-//	"MappedDirectories": [],
-//	"HvPartition": true,
-//	"EndpointList": ["e1bb1e61-d56f-405e-b75d-fd520cefa0cb"],
-//	"DNSSearchList": "a.com,b.com,c.com",
-//	"HvRuntime": {
-//		"ImagePath": "C:\\\\control\\\\windowsfilter\\\\65bf96e5760a09edf1790cb229e2dfb2dbd0fcdc0bf7451bae099106bfbfea0c\\\\UtilityVM"
-//	},
-//	"Servicing": false
-//}
-func (clnt *client) Create(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) error {
-	clnt.lock(containerID)
-	defer clnt.unlock(containerID)
-	if b, err := json.Marshal(spec); err == nil {
-		logrus.Debugln("libcontainerd: client.Create() with spec", string(b))
-	}
-
-	// spec.Linux must be nil for Windows containers, but spec.Windows will be filled in regardless of container platform.
-	// This is a temporary workaround due to LCOW requiring layer folder paths, which are stored under spec.Windows.
-	// TODO: @darrenstahlmsft fix this once the OCI spec is updated to support layer folder paths for LCOW
-	if spec.Linux == nil {
-		return clnt.createWindows(containerID, checkpoint, checkpointDir, spec, attachStdio, options...)
-	}
-	return clnt.createLinux(containerID, checkpoint, checkpointDir, spec, attachStdio, options...)
-}
-
-func (clnt *client) createWindows(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) error {
-	configuration := &hcsshim.ContainerConfig{
-		SystemType: "Container",
-		Name:       containerID,
-		Owner:      defaultOwner,
-		IgnoreFlushesDuringBoot: spec.Windows.IgnoreFlushesDuringBoot,
-		HostName:                spec.Hostname,
-		HvPartition:             false,
-		Servicing:               spec.Windows.Servicing,
-	}
-
-	if spec.Windows.Resources != nil {
-		if spec.Windows.Resources.CPU != nil {
-			if spec.Windows.Resources.CPU.Count != nil {
-				// This check is being done here rather than in adaptContainerSettings
-				// because we don't want to update the HostConfig in case this container
-				// is moved to a host with more CPUs than this one.
-				cpuCount := *spec.Windows.Resources.CPU.Count
-				hostCPUCount := uint64(sysinfo.NumCPU())
-				if cpuCount > hostCPUCount {
-					logrus.Warnf("Changing requested CPUCount of %d to current number of processors, %d", cpuCount, hostCPUCount)
-					cpuCount = hostCPUCount
-				}
-				configuration.ProcessorCount = uint32(cpuCount)
-			}
-			if spec.Windows.Resources.CPU.Shares != nil {
-				configuration.ProcessorWeight = uint64(*spec.Windows.Resources.CPU.Shares)
-			}
-			if spec.Windows.Resources.CPU.Maximum != nil {
-				configuration.ProcessorMaximum = int64(*spec.Windows.Resources.CPU.Maximum)
-			}
-		}
-		if spec.Windows.Resources.Memory != nil {
-			if spec.Windows.Resources.Memory.Limit != nil {
-				configuration.MemoryMaximumInMB = int64(*spec.Windows.Resources.Memory.Limit) / 1024 / 1024
-			}
-		}
-		if spec.Windows.Resources.Storage != nil {
-			if spec.Windows.Resources.Storage.Bps != nil {
-				configuration.StorageBandwidthMaximum = *spec.Windows.Resources.Storage.Bps
-			}
-			if spec.Windows.Resources.Storage.Iops != nil {
-				configuration.StorageIOPSMaximum = *spec.Windows.Resources.Storage.Iops
-			}
-		}
-	}
-
-	if spec.Windows.HyperV != nil {
-		configuration.HvPartition = true
-	}
-
-	if spec.Windows.Network != nil {
-		configuration.EndpointList = spec.Windows.Network.EndpointList
-		configuration.AllowUnqualifiedDNSQuery = spec.Windows.Network.AllowUnqualifiedDNSQuery
-		if spec.Windows.Network.DNSSearchList != nil {
-			configuration.DNSSearchList = strings.Join(spec.Windows.Network.DNSSearchList, ",")
-		}
-		configuration.NetworkSharedContainerName = spec.Windows.Network.NetworkSharedContainerName
-	}
-
-	if cs, ok := spec.Windows.CredentialSpec.(string); ok {
-		configuration.Credentials = cs
-	}
-
-	// We must have least two layers in the spec, the bottom one being a base image,
-	// the top one being the RW layer.
-	if spec.Windows.LayerFolders == nil || len(spec.Windows.LayerFolders) < 2 {
-		return fmt.Errorf("OCI spec is invalid - at least two LayerFolders must be supplied to the runtime")
-	}
-
-	// Strip off the top-most layer as that's passed in separately to HCS
-	configuration.LayerFolderPath = spec.Windows.LayerFolders[len(spec.Windows.LayerFolders)-1]
-	layerFolders := spec.Windows.LayerFolders[:len(spec.Windows.LayerFolders)-1]
-
-	if configuration.HvPartition {
-		// We don't currently support setting the utility VM image explicitly.
-		// TODO @swernli/jhowardmsft circa RS3/4, this may be re-locatable.
-		if spec.Windows.HyperV.UtilityVMPath != "" {
-			return errors.New("runtime does not support an explicit utility VM path for Hyper-V containers")
-		}
-
-		// Find the upper-most utility VM image.
-		var uvmImagePath string
-		for _, path := range layerFolders {
-			fullPath := filepath.Join(path, "UtilityVM")
-			_, err := os.Stat(fullPath)
-			if err == nil {
-				uvmImagePath = fullPath
-				break
-			}
-			if !os.IsNotExist(err) {
-				return err
-			}
-		}
-		if uvmImagePath == "" {
-			return errors.New("utility VM image could not be found")
-		}
-		configuration.HvRuntime = &hcsshim.HvRuntime{ImagePath: uvmImagePath}
-
-		if spec.Root.Path != "" {
-			return errors.New("OCI spec is invalid - Root.Path must be omitted for a Hyper-V container")
-		}
-	} else {
-		const volumeGUIDRegex = `^\\\\\?\\(Volume)\{{0,1}[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}(\}){0,1}\}\\$`
-		if _, err := regexp.MatchString(volumeGUIDRegex, spec.Root.Path); err != nil {
-			return fmt.Errorf(`OCI spec is invalid - Root.Path '%s' must be a volume GUID path in the format '\\?\Volume{GUID}\'`, spec.Root.Path)
-		}
-		// HCS API requires the trailing backslash to be removed
-		configuration.VolumePath = spec.Root.Path[:len(spec.Root.Path)-1]
-	}
-
-	if spec.Root.Readonly {
-		return errors.New(`OCI spec is invalid - Root.Readonly must not be set on Windows`)
-	}
-
-	for _, layerPath := range layerFolders {
-		_, filename := filepath.Split(layerPath)
-		g, err := hcsshim.NameToGuid(filename)
-		if err != nil {
-			return err
-		}
-		configuration.Layers = append(configuration.Layers, hcsshim.Layer{
-			ID:   g.ToString(),
-			Path: layerPath,
-		})
-	}
-
-	// Add the mounts (volumes, bind mounts etc) to the structure
-	var mds []hcsshim.MappedDir
-	var mps []hcsshim.MappedPipe
-	for _, mount := range spec.Mounts {
-		const pipePrefix = `\\.\pipe\`
-		if mount.Type != "" {
-			return fmt.Errorf("OCI spec is invalid - Mount.Type '%s' must not be set", mount.Type)
-		}
-		if strings.HasPrefix(mount.Destination, pipePrefix) {
-			mp := hcsshim.MappedPipe{
-				HostPath:          mount.Source,
-				ContainerPipeName: mount.Destination[len(pipePrefix):],
-			}
-			mps = append(mps, mp)
-		} else {
-			md := hcsshim.MappedDir{
-				HostPath:      mount.Source,
-				ContainerPath: mount.Destination,
-				ReadOnly:      false,
-			}
-			for _, o := range mount.Options {
-				if strings.ToLower(o) == "ro" {
-					md.ReadOnly = true
-				}
-			}
-			mds = append(mds, md)
-		}
-	}
-	configuration.MappedDirectories = mds
-	if len(mps) > 0 && system.GetOSVersion().Build < 16210 { // replace with Win10 RS3 build number at RTM
-		return errors.New("named pipe mounts are not supported on this version of Windows")
-	}
-	configuration.MappedPipes = mps
-
-	hcsContainer, err := hcsshim.CreateContainer(containerID, configuration)
-	if err != nil {
-		return err
-	}
-
-	// Construct a container object for calling start on it.
-	container := &container{
-		containerCommon: containerCommon{
-			process: process{
-				processCommon: processCommon{
-					containerID:  containerID,
-					client:       clnt,
-					friendlyName: InitFriendlyName,
-				},
-			},
-			processes: make(map[string]*process),
-		},
-		isWindows:    true,
-		ociSpec:      spec,
-		hcsContainer: hcsContainer,
-	}
-
-	container.options = options
-	for _, option := range options {
-		if err := option.Apply(container); err != nil {
-			logrus.Errorf("libcontainerd: %v", err)
-		}
-	}
-
-	// Call start, and if it fails, delete the container from our
-	// internal structure, start will keep HCS in sync by deleting the
-	// container there.
-	logrus.Debugf("libcontainerd: createWindows() id=%s, Calling start()", containerID)
-	if err := container.start(attachStdio); err != nil {
-		clnt.deleteContainer(containerID)
-		return err
-	}
-
-	logrus.Debugf("libcontainerd: createWindows() id=%s completed successfully", containerID)
-	return nil
-
-}
-
-func (clnt *client) createLinux(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) error {
-	logrus.Debugf("libcontainerd: createLinux(): containerId %s ", containerID)
-
-	var lcowOpt *LCOWOption
-	for _, option := range options {
-		if lcow, ok := option.(*LCOWOption); ok {
-			lcowOpt = lcow
-		}
-	}
-	if lcowOpt == nil || lcowOpt.Config == nil {
-		return fmt.Errorf("lcow option must be supplied to the runtime")
-	}
-
-	configuration := &hcsshim.ContainerConfig{
-		HvPartition:   true,
-		Name:          containerID,
-		SystemType:    "container",
-		ContainerType: "linux",
-		Owner:         defaultOwner,
-		TerminateOnLastHandleClosed: true,
-	}
-
-	if lcowOpt.Config.ActualMode == opengcs.ModeActualVhdx {
-		configuration.HvRuntime = &hcsshim.HvRuntime{
-			ImagePath:          lcowOpt.Config.Vhdx,
-			BootSource:         "Vhd",
-			WritableBootSource: false,
-		}
-	} else {
-		configuration.HvRuntime = &hcsshim.HvRuntime{
-			ImagePath:           lcowOpt.Config.KirdPath,
-			LinuxKernelFile:     lcowOpt.Config.KernelFile,
-			LinuxInitrdFile:     lcowOpt.Config.InitrdFile,
-			LinuxBootParameters: lcowOpt.Config.BootParameters,
-		}
-	}
-
-	if spec.Windows == nil {
-		return fmt.Errorf("spec.Windows must not be nil for LCOW containers")
-	}
-
-	// We must have least one layer in the spec
-	if spec.Windows.LayerFolders == nil || len(spec.Windows.LayerFolders) == 0 {
-		return fmt.Errorf("OCI spec is invalid - at least one LayerFolders must be supplied to the runtime")
-	}
-
-	// Strip off the top-most layer as that's passed in separately to HCS
-	configuration.LayerFolderPath = spec.Windows.LayerFolders[len(spec.Windows.LayerFolders)-1]
-	layerFolders := spec.Windows.LayerFolders[:len(spec.Windows.LayerFolders)-1]
-
-	for _, layerPath := range layerFolders {
-		_, filename := filepath.Split(layerPath)
-		g, err := hcsshim.NameToGuid(filename)
-		if err != nil {
-			return err
-		}
-		configuration.Layers = append(configuration.Layers, hcsshim.Layer{
-			ID:   g.ToString(),
-			Path: filepath.Join(layerPath, "layer.vhd"),
-		})
-	}
-
-	if spec.Windows.Network != nil {
-		configuration.EndpointList = spec.Windows.Network.EndpointList
-		configuration.AllowUnqualifiedDNSQuery = spec.Windows.Network.AllowUnqualifiedDNSQuery
-		if spec.Windows.Network.DNSSearchList != nil {
-			configuration.DNSSearchList = strings.Join(spec.Windows.Network.DNSSearchList, ",")
-		}
-		configuration.NetworkSharedContainerName = spec.Windows.Network.NetworkSharedContainerName
-	}
-
-	// Add the mounts (volumes, bind mounts etc) to the structure. We have to do
-	// some translation for both the mapped directories passed into HCS and in
-	// the spec.
-	//
-	// For HCS, we only pass in the mounts from the spec which are type "bind".
-	// Further, the "ContainerPath" field (which is a little mis-leadingly
-	// named when it applies to the utility VM rather than the container in the
-	// utility VM) is moved to under /tmp/gcs/<ID>/binds, where this is passed
-	// by the caller through a 'uvmpath' option.
-	//
-	// We do similar translation for the mounts in the spec by stripping out
-	// the uvmpath option, and translating the Source path to the location in the
-	// utility VM calculated above.
-	//
-	// From inside the utility VM, you would see a 9p mount such as in the following
-	// where a host folder has been mapped to /target. The line with /tmp/gcs/<ID>/binds
-	// specifically:
-	//
-	//	/ # mount
-	//	rootfs on / type rootfs (rw,size=463736k,nr_inodes=115934)
-	//	proc on /proc type proc (rw,relatime)
-	//	sysfs on /sys type sysfs (rw,relatime)
-	//	udev on /dev type devtmpfs (rw,relatime,size=498100k,nr_inodes=124525,mode=755)
-	//	tmpfs on /run type tmpfs (rw,relatime)
-	//	cgroup on /sys/fs/cgroup type cgroup (rw,relatime,cpuset,cpu,cpuacct,blkio,memory,devices,freezer,net_cls,perf_event,net_prio,hugetlb,pids,rdma)
-	//	mqueue on /dev/mqueue type mqueue (rw,relatime)
-	//	devpts on /dev/pts type devpts (rw,relatime,mode=600,ptmxmode=000)
-	//	/binds/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/target on /binds/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/target type 9p (rw,sync,dirsync,relatime,trans=fd,rfdno=6,wfdno=6)
-	//	/dev/pmem0 on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/layer0 type ext4 (ro,relatime,block_validity,delalloc,norecovery,barrier,dax,user_xattr,acl)
-	//	/dev/sda on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch type ext4 (rw,relatime,block_validity,delalloc,barrier,user_xattr,acl)
-	//	overlay on /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/rootfs type overlay (rw,relatime,lowerdir=/tmp/base/:/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/layer0,upperdir=/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch/upper,workdir=/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc/scratch/work)
-	//
-	//  /tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc # ls -l
-	//	total 16
-	//	drwx------    3 0        0               60 Sep  7 18:54 binds
-	//	-rw-r--r--    1 0        0             3345 Sep  7 18:54 config.json
-	//	drwxr-xr-x   10 0        0             4096 Sep  6 17:26 layer0
-	//	drwxr-xr-x    1 0        0             4096 Sep  7 18:54 rootfs
-	//	drwxr-xr-x    5 0        0             4096 Sep  7 18:54 scratch
-	//
-	//	/tmp/gcs/b3ea9126d67702173647ece2744f7c11181c0150e9890fc9a431849838033edc # ls -l binds
-	//	total 0
-	//	drwxrwxrwt    2 0        0             4096 Sep  7 16:51 target
-
-	mds := []hcsshim.MappedDir{}
-	specMounts := []specs.Mount{}
-	for _, mount := range spec.Mounts {
-		specMount := mount
-		if mount.Type == "bind" {
-			// Strip out the uvmpath from the options
-			updatedOptions := []string{}
-			uvmPath := ""
-			readonly := false
-			for _, opt := range mount.Options {
-				dropOption := false
-				elements := strings.SplitN(opt, "=", 2)
-				switch elements[0] {
-				case "uvmpath":
-					uvmPath = elements[1]
-					dropOption = true
-				case "rw":
-				case "ro":
-					readonly = true
-				case "rbind":
-				default:
-					return fmt.Errorf("unsupported option %q", opt)
-				}
-				if !dropOption {
-					updatedOptions = append(updatedOptions, opt)
-				}
-			}
-			mount.Options = updatedOptions
-			if uvmPath == "" {
-				return fmt.Errorf("no uvmpath for bind mount %+v", mount)
-			}
-			md := hcsshim.MappedDir{
-				HostPath:          mount.Source,
-				ContainerPath:     path.Join(uvmPath, mount.Destination),
-				CreateInUtilityVM: true,
-				ReadOnly:          readonly,
-			}
-			mds = append(mds, md)
-			specMount.Source = path.Join(uvmPath, mount.Destination)
-		}
-		specMounts = append(specMounts, specMount)
-	}
-	configuration.MappedDirectories = mds
-
-	hcsContainer, err := hcsshim.CreateContainer(containerID, configuration)
-	if err != nil {
-		return err
-	}
-
-	spec.Mounts = specMounts
-
-	// Construct a container object for calling start on it.
-	container := &container{
-		containerCommon: containerCommon{
-			process: process{
-				processCommon: processCommon{
-					containerID:  containerID,
-					client:       clnt,
-					friendlyName: InitFriendlyName,
-				},
-			},
-			processes: make(map[string]*process),
-		},
-		ociSpec:      spec,
-		hcsContainer: hcsContainer,
-	}
-
-	container.options = options
-	for _, option := range options {
-		if err := option.Apply(container); err != nil {
-			logrus.Errorf("libcontainerd: createLinux() %v", err)
-		}
-	}
-
-	// Call start, and if it fails, delete the container from our
-	// internal structure, start will keep HCS in sync by deleting the
-	// container there.
-	logrus.Debugf("libcontainerd: createLinux() id=%s, Calling start()", containerID)
-	if err := container.start(attachStdio); err != nil {
-		clnt.deleteContainer(containerID)
-		return err
-	}
-
-	logrus.Debugf("libcontainerd: createLinux() id=%s completed successfully", containerID)
-	return nil
-}
-
-// AddProcess is the handler for adding a process to an already running
-// container. It's called through docker exec. It returns the system pid of the
-// exec'd process.
-func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendlyName string, procToAdd Process, attachStdio StdioCallback) (int, error) {
-	clnt.lock(containerID)
-	defer clnt.unlock(containerID)
-	container, err := clnt.getContainer(containerID)
-	if err != nil {
-		return -1, err
-	}
-
-	defer container.debugGCS()
-
-	// Note we always tell HCS to
-	// create stdout as it's required regardless of '-i' or '-t' options, so that
-	// docker can always grab the output through logs. We also tell HCS to always
-	// create stdin, even if it's not used - it will be closed shortly. Stderr
-	// is only created if it we're not -t.
-	createProcessParms := hcsshim.ProcessConfig{
-		CreateStdInPipe:  true,
-		CreateStdOutPipe: true,
-		CreateStdErrPipe: !procToAdd.Terminal,
-	}
-	if procToAdd.Terminal {
-		createProcessParms.EmulateConsole = true
-		if procToAdd.ConsoleSize != nil {
-			createProcessParms.ConsoleSize[0] = uint(procToAdd.ConsoleSize.Height)
-			createProcessParms.ConsoleSize[1] = uint(procToAdd.ConsoleSize.Width)
-		}
-	}
-
-	// Take working directory from the process to add if it is defined,
-	// otherwise take from the first process.
-	if procToAdd.Cwd != "" {
-		createProcessParms.WorkingDirectory = procToAdd.Cwd
-	} else {
-		createProcessParms.WorkingDirectory = container.ociSpec.Process.Cwd
-	}
-
-	// Configure the environment for the process
-	createProcessParms.Environment = setupEnvironmentVariables(procToAdd.Env)
-	if container.isWindows {
-		createProcessParms.CommandLine = strings.Join(procToAdd.Args, " ")
-	} else {
-		createProcessParms.CommandArgs = procToAdd.Args
-	}
-	createProcessParms.User = procToAdd.User.Username
-
-	logrus.Debugf("libcontainerd: commandLine: %s", createProcessParms.CommandLine)
-
-	// Start the command running in the container.
-	var stdout, stderr io.ReadCloser
-	var stdin io.WriteCloser
-	newProcess, err := container.hcsContainer.CreateProcess(&createProcessParms)
-	if err != nil {
-		logrus.Errorf("libcontainerd: AddProcess(%s) CreateProcess() failed %s", containerID, err)
-		return -1, err
-	}
-
-	pid := newProcess.Pid()
-
-	stdin, stdout, stderr, err = newProcess.Stdio()
-	if err != nil {
-		logrus.Errorf("libcontainerd: %s getting std pipes failed %s", containerID, err)
-		return -1, err
-	}
-
-	iopipe := &IOPipe{Terminal: procToAdd.Terminal}
-	iopipe.Stdin = createStdInCloser(stdin, newProcess)
-
-	// Convert io.ReadClosers to io.Readers
-	if stdout != nil {
-		iopipe.Stdout = ioutil.NopCloser(&autoClosingReader{ReadCloser: stdout})
-	}
-	if stderr != nil {
-		iopipe.Stderr = ioutil.NopCloser(&autoClosingReader{ReadCloser: stderr})
-	}
-
-	proc := &process{
-		processCommon: processCommon{
-			containerID:  containerID,
-			friendlyName: processFriendlyName,
-			client:       clnt,
-			systemPid:    uint32(pid),
-		},
-		hcsProcess: newProcess,
-	}
-
-	// Add the process to the container's list of processes
-	container.processes[processFriendlyName] = proc
-
-	// Tell the engine to attach streams back to the client
-	if err := attachStdio(*iopipe); err != nil {
-		return -1, err
-	}
-
-	// Spin up a go routine waiting for exit to handle cleanup
-	go container.waitExit(proc, false)
-
-	return pid, nil
-}
-
-// Signal handles `docker stop` on Windows. While Linux has support for
-// the full range of signals, signals aren't really implemented on Windows.
-// We fake supporting regular stop and -9 to force kill.
-func (clnt *client) Signal(containerID string, sig int) error {
-	var (
-		cont *container
-		err  error
-	)
-
-	// Get the container as we need it to get the container handle.
-	clnt.lock(containerID)
-	defer clnt.unlock(containerID)
-	if cont, err = clnt.getContainer(containerID); err != nil {
-		return err
-	}
-
-	cont.manualStopRequested = true
-
-	logrus.Debugf("libcontainerd: Signal() containerID=%s sig=%d pid=%d", containerID, sig, cont.systemPid)
-
-	if syscall.Signal(sig) == syscall.SIGKILL {
-		// Terminate the compute system
-		if err := cont.hcsContainer.Terminate(); err != nil {
-			if !hcsshim.IsPending(err) {
-				logrus.Errorf("libcontainerd: failed to terminate %s - %q", containerID, err)
-			}
-		}
-	} else {
-		// Shut down the container
-		if err := cont.hcsContainer.Shutdown(); err != nil {
-			if !hcsshim.IsPending(err) && !hcsshim.IsAlreadyStopped(err) {
-				// ignore errors
-				logrus.Warnf("libcontainerd: failed to shutdown container %s: %q", containerID, err)
-			}
-		}
-	}
-
-	return nil
-}
-
-// While Linux has support for the full range of signals, signals aren't really implemented on Windows.
-// We try to terminate the specified process whatever signal is requested.
-func (clnt *client) SignalProcess(containerID string, processFriendlyName string, sig int) error {
-	clnt.lock(containerID)
-	defer clnt.unlock(containerID)
-	cont, err := clnt.getContainer(containerID)
-	if err != nil {
-		return err
-	}
-
-	for _, p := range cont.processes {
-		if p.friendlyName == processFriendlyName {
-			return p.hcsProcess.Kill()
-		}
-	}
-
-	return fmt.Errorf("SignalProcess could not find process %s in %s", processFriendlyName, containerID)
-}
-
-// Resize handles a CLI event to resize an interactive docker run or docker exec
-// window.
-func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error {
-	// Get the libcontainerd container object
-	clnt.lock(containerID)
-	defer clnt.unlock(containerID)
-	cont, err := clnt.getContainer(containerID)
-	if err != nil {
-		return err
-	}
-
-	h, w := uint16(height), uint16(width)
-
-	if processFriendlyName == InitFriendlyName {
-		logrus.Debugln("libcontainerd: resizing systemPID in", containerID, cont.process.systemPid)
-		return cont.process.hcsProcess.ResizeConsole(w, h)
-	}
-
-	for _, p := range cont.processes {
-		if p.friendlyName == processFriendlyName {
-			logrus.Debugln("libcontainerd: resizing exec'd process", containerID, p.systemPid)
-			return p.hcsProcess.ResizeConsole(w, h)
-		}
-	}
-
-	return fmt.Errorf("Resize could not find containerID %s to resize", containerID)
-
-}
-
-// Pause handles pause requests for containers
-func (clnt *client) Pause(containerID string) error {
-	unlockContainer := true
-	// Get the libcontainerd container object
-	clnt.lock(containerID)
-	defer func() {
-		if unlockContainer {
-			clnt.unlock(containerID)
-		}
-	}()
-	container, err := clnt.getContainer(containerID)
-	if err != nil {
-		return err
-	}
-
-	if container.ociSpec.Windows.HyperV == nil {
-		return errors.New("cannot pause Windows Server Containers")
-	}
-
-	err = container.hcsContainer.Pause()
-	if err != nil {
-		return err
-	}
-
-	// Unlock container before calling back into the daemon
-	unlockContainer = false
-	clnt.unlock(containerID)
-
-	return clnt.backend.StateChanged(containerID, StateInfo{
-		CommonStateInfo: CommonStateInfo{
-			State: StatePause,
-		}})
-}
-
-// Resume handles resume requests for containers
-func (clnt *client) Resume(containerID string) error {
-	unlockContainer := true
-	// Get the libcontainerd container object
-	clnt.lock(containerID)
-	defer func() {
-		if unlockContainer {
-			clnt.unlock(containerID)
-		}
-	}()
-	container, err := clnt.getContainer(containerID)
-	if err != nil {
-		return err
-	}
-
-	// This should never happen, since Windows Server Containers cannot be paused
-
-	if container.ociSpec.Windows.HyperV == nil {
-		return errors.New("cannot resume Windows Server Containers")
-	}
-
-	err = container.hcsContainer.Resume()
-	if err != nil {
-		return err
-	}
-
-	// Unlock container before calling back into the daemon
-	unlockContainer = false
-	clnt.unlock(containerID)
-
-	return clnt.backend.StateChanged(containerID, StateInfo{
-		CommonStateInfo: CommonStateInfo{
-			State: StateResume,
-		}})
-}
-
-// Stats handles stats requests for containers
-func (clnt *client) Stats(containerID string) (*Stats, error) {
-	// Get the libcontainerd container object
-	clnt.lock(containerID)
-	defer clnt.unlock(containerID)
-	container, err := clnt.getContainer(containerID)
-	if err != nil {
-		return nil, err
-	}
-	s, err := container.hcsContainer.Statistics()
-	if err != nil {
-		return nil, err
-	}
-	st := Stats(s)
-	return &st, nil
-}
-
-// Restore is the handler for restoring a container
-func (clnt *client) Restore(containerID string, _ StdioCallback, unusedOnWindows ...CreateOption) error {
-	logrus.Debugf("libcontainerd: Restore(%s)", containerID)
-
-	// TODO Windows: On RS1, a re-attach isn't possible.
-	// However, there is a scenario in which there is an issue.
-	// Consider a background container. The daemon dies unexpectedly.
-	// HCS will still have the compute service alive and running.
-	// For consistence, we call in to shoot it regardless if HCS knows about it
-	// We explicitly just log a warning if the terminate fails.
-	// Then we tell the backend the container exited.
-	if hc, err := hcsshim.OpenContainer(containerID); err == nil {
-		const terminateTimeout = time.Minute * 2
-		err := hc.Terminate()
-
-		if hcsshim.IsPending(err) {
-			err = hc.WaitTimeout(terminateTimeout)
-		} else if hcsshim.IsAlreadyStopped(err) {
-			err = nil
-		}
-
-		if err != nil {
-			logrus.Warnf("libcontainerd: failed to terminate %s on restore - %q", containerID, err)
-			return err
-		}
-	}
-	return clnt.backend.StateChanged(containerID, StateInfo{
-		CommonStateInfo: CommonStateInfo{
-			State:    StateExit,
-			ExitCode: 1 << 31,
-		}})
-}
-
-// GetPidsForContainer returns a list of process IDs running in a container.
-// Not used on Windows.
-func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) {
-	return nil, errors.New("not implemented on Windows")
-}
-
-// Summary returns a summary of the processes running in a container.
-// This is present in Windows to support docker top. In linux, the
-// engine shells out to ps to get process information. On Windows, as
-// the containers could be Hyper-V containers, they would not be
-// visible on the container host. However, libcontainerd does have
-// that information.
-func (clnt *client) Summary(containerID string) ([]Summary, error) {
-
-	// Get the libcontainerd container object
-	clnt.lock(containerID)
-	defer clnt.unlock(containerID)
-	container, err := clnt.getContainer(containerID)
-	if err != nil {
-		return nil, err
-	}
-	p, err := container.hcsContainer.ProcessList()
-	if err != nil {
-		return nil, err
-	}
-	pl := make([]Summary, len(p))
-	for i := range p {
-		pl[i] = Summary(p[i])
-	}
-	return pl, nil
-}
-
-// UpdateResources updates resources for a running container.
-func (clnt *client) UpdateResources(containerID string, resources Resources) error {
-	// Updating resource isn't supported on Windows
-	// but we should return nil for enabling updating container
-	return nil
-}
-
-func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error {
-	return errors.New("Windows: Containers do not support checkpoints")
-}
-
-func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error {
-	return errors.New("Windows: Containers do not support checkpoints")
-}
-
-func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) {
-	return nil, errors.New("Windows: Containers do not support checkpoints")
-}
-
-func (clnt *client) GetServerVersion(ctx context.Context) (*ServerVersion, error) {
-	return &ServerVersion{}, nil
-}

+ 0 - 13
libcontainerd/container.go

@@ -1,13 +0,0 @@
-package libcontainerd
-
-const (
-	// InitFriendlyName is the name given in the lookup map of processes
-	// for the first process started in a container.
-	InitFriendlyName = "init"
-	configFilename   = "config.json"
-)
-
-type containerCommon struct {
-	process
-	processes map[string]*process
-}

+ 0 - 246
libcontainerd/container_unix.go

@@ -1,246 +0,0 @@
-// +build linux solaris
-
-package libcontainerd
-
-import (
-	"encoding/json"
-	"io"
-	"io/ioutil"
-	"os"
-	"path/filepath"
-	"sync"
-	"time"
-
-	containerd "github.com/containerd/containerd/api/grpc/types"
-	"github.com/docker/docker/pkg/ioutils"
-	specs "github.com/opencontainers/runtime-spec/specs-go"
-	"github.com/sirupsen/logrus"
-	"github.com/tonistiigi/fifo"
-	"golang.org/x/net/context"
-	"golang.org/x/sys/unix"
-)
-
-type container struct {
-	containerCommon
-
-	// Platform specific fields are below here.
-	pauseMonitor
-	oom         bool
-	runtime     string
-	runtimeArgs []string
-}
-
-type runtime struct {
-	path string
-	args []string
-}
-
-// WithRuntime sets the runtime to be used for the created container
-func WithRuntime(path string, args []string) CreateOption {
-	return runtime{path, args}
-}
-
-func (rt runtime) Apply(p interface{}) error {
-	if pr, ok := p.(*container); ok {
-		pr.runtime = rt.path
-		pr.runtimeArgs = rt.args
-	}
-	return nil
-}
-
-func (ctr *container) clean() error {
-	if os.Getenv("LIBCONTAINERD_NOCLEAN") == "1" {
-		return nil
-	}
-	if _, err := os.Lstat(ctr.dir); err != nil {
-		if os.IsNotExist(err) {
-			return nil
-		}
-		return err
-	}
-
-	if err := os.RemoveAll(ctr.dir); err != nil {
-		return err
-	}
-	return nil
-}
-
-// cleanProcess removes the fifos used by an additional process.
-// Caller needs to lock container ID before calling this method.
-func (ctr *container) cleanProcess(id string) {
-	if p, ok := ctr.processes[id]; ok {
-		for _, i := range []int{unix.Stdin, unix.Stdout, unix.Stderr} {
-			if err := os.Remove(p.fifo(i)); err != nil && !os.IsNotExist(err) {
-				logrus.Warnf("libcontainerd: failed to remove %v for process %v: %v", p.fifo(i), id, err)
-			}
-		}
-	}
-	delete(ctr.processes, id)
-}
-
-func (ctr *container) spec() (*specs.Spec, error) {
-	var spec specs.Spec
-	dt, err := ioutil.ReadFile(filepath.Join(ctr.dir, configFilename))
-	if err != nil {
-		return nil, err
-	}
-	if err := json.Unmarshal(dt, &spec); err != nil {
-		return nil, err
-	}
-	return &spec, nil
-}
-
-func (ctr *container) start(spec *specs.Spec, checkpoint, checkpointDir string, attachStdio StdioCallback) (err error) {
-	ctx, cancel := context.WithCancel(context.Background())
-	defer cancel()
-	ready := make(chan struct{})
-
-	fifoCtx, cancel := context.WithCancel(context.Background())
-	defer func() {
-		if err != nil {
-			cancel()
-		}
-	}()
-
-	iopipe, err := ctr.openFifos(fifoCtx, spec.Process.Terminal)
-	if err != nil {
-		return err
-	}
-
-	var stdinOnce sync.Once
-
-	// we need to delay stdin closure after container start or else "stdin close"
-	// event will be rejected by containerd.
-	// stdin closure happens in attachStdio
-	stdin := iopipe.Stdin
-	iopipe.Stdin = ioutils.NewWriteCloserWrapper(stdin, func() error {
-		var err error
-		stdinOnce.Do(func() { // on error from attach we don't know if stdin was already closed
-			err = stdin.Close()
-			go func() {
-				select {
-				case <-ready:
-				case <-ctx.Done():
-				}
-				select {
-				case <-ready:
-					if err := ctr.sendCloseStdin(); err != nil {
-						logrus.Warnf("failed to close stdin: %+v", err)
-					}
-				default:
-				}
-			}()
-		})
-		return err
-	})
-
-	r := &containerd.CreateContainerRequest{
-		Id:            ctr.containerID,
-		BundlePath:    ctr.dir,
-		Stdin:         ctr.fifo(unix.Stdin),
-		Stdout:        ctr.fifo(unix.Stdout),
-		Stderr:        ctr.fifo(unix.Stderr),
-		Checkpoint:    checkpoint,
-		CheckpointDir: checkpointDir,
-		// check to see if we are running in ramdisk to disable pivot root
-		NoPivotRoot: os.Getenv("DOCKER_RAMDISK") != "",
-		Runtime:     ctr.runtime,
-		RuntimeArgs: ctr.runtimeArgs,
-	}
-	ctr.client.appendContainer(ctr)
-
-	if err := attachStdio(*iopipe); err != nil {
-		ctr.closeFifos(iopipe)
-		return err
-	}
-
-	resp, err := ctr.client.remote.apiClient.CreateContainer(context.Background(), r)
-	if err != nil {
-		ctr.closeFifos(iopipe)
-		return err
-	}
-	ctr.systemPid = systemPid(resp.Container)
-	close(ready)
-
-	return ctr.client.backend.StateChanged(ctr.containerID, StateInfo{
-		CommonStateInfo: CommonStateInfo{
-			State: StateStart,
-			Pid:   ctr.systemPid,
-		}})
-
-}
-
-func (ctr *container) newProcess(friendlyName string) *process {
-	return &process{
-		dir: ctr.dir,
-		processCommon: processCommon{
-			containerID:  ctr.containerID,
-			friendlyName: friendlyName,
-			client:       ctr.client,
-		},
-	}
-}
-
-func (ctr *container) handleEvent(e *containerd.Event) error {
-	ctr.client.lock(ctr.containerID)
-	defer ctr.client.unlock(ctr.containerID)
-	switch e.Type {
-	case StateExit, StatePause, StateResume, StateOOM:
-		st := StateInfo{
-			CommonStateInfo: CommonStateInfo{
-				State:    e.Type,
-				ExitCode: e.Status,
-			},
-			OOMKilled: e.Type == StateExit && ctr.oom,
-		}
-		if e.Type == StateOOM {
-			ctr.oom = true
-		}
-		if e.Type == StateExit && e.Pid != InitFriendlyName {
-			st.ProcessID = e.Pid
-			st.State = StateExitProcess
-		}
-
-		// Remove process from list if we have exited
-		switch st.State {
-		case StateExit:
-			ctr.clean()
-			ctr.client.deleteContainer(e.Id)
-		case StateExitProcess:
-			ctr.cleanProcess(st.ProcessID)
-		}
-		ctr.client.q.append(e.Id, func() {
-			if err := ctr.client.backend.StateChanged(e.Id, st); err != nil {
-				logrus.Errorf("libcontainerd: backend.StateChanged(): %v", err)
-			}
-			if e.Type == StatePause || e.Type == StateResume {
-				ctr.pauseMonitor.handle(e.Type)
-			}
-			if e.Type == StateExit {
-				if en := ctr.client.getExitNotifier(e.Id); en != nil {
-					en.close()
-				}
-			}
-		})
-
-	default:
-		logrus.Debugf("libcontainerd: event unhandled: %+v", e)
-	}
-	return nil
-}
-
-// discardFifos attempts to fully read the container fifos to unblock processes
-// that may be blocked on the writer side.
-func (ctr *container) discardFifos() {
-	ctx, _ := context.WithTimeout(context.Background(), 3*time.Second)
-	for _, i := range []int{unix.Stdout, unix.Stderr} {
-		f, err := fifo.OpenFifo(ctx, ctr.fifo(i), unix.O_RDONLY|unix.O_NONBLOCK, 0)
-		if err != nil {
-			logrus.Warnf("error opening fifo %v for discarding: %+v", f, err)
-			continue
-		}
-		go func() {
-			io.Copy(ioutil.Discard, f)
-		}()
-	}
-}

+ 0 - 338
libcontainerd/container_windows.go

@@ -1,338 +0,0 @@
-package libcontainerd
-
-import (
-	"encoding/json"
-	"fmt"
-	"io"
-	"io/ioutil"
-	"strings"
-	"time"
-
-	"github.com/Microsoft/hcsshim"
-	"github.com/opencontainers/runtime-spec/specs-go"
-	"github.com/sirupsen/logrus"
-	"golang.org/x/sys/windows"
-)
-
-type container struct {
-	containerCommon
-
-	// Platform specific fields are below here. There are none presently on Windows.
-	options []CreateOption
-
-	// The ociSpec is required, as client.Create() needs a spec,
-	// but can be called from the RestartManager context which does not
-	// otherwise have access to the Spec
-	ociSpec specs.Spec
-
-	isWindows           bool
-	manualStopRequested bool
-	hcsContainer        hcsshim.Container
-}
-
-func (ctr *container) newProcess(friendlyName string) *process {
-	return &process{
-		processCommon: processCommon{
-			containerID:  ctr.containerID,
-			friendlyName: friendlyName,
-			client:       ctr.client,
-		},
-	}
-}
-
-// start starts a created container.
-// Caller needs to lock container ID before calling this method.
-func (ctr *container) start(attachStdio StdioCallback) error {
-	var err error
-
-	// Start the container.  If this is a servicing container, this call will block
-	// until the container is done with the servicing execution.
-	logrus.Debugln("libcontainerd: starting container ", ctr.containerID)
-	if err = ctr.hcsContainer.Start(); err != nil {
-		logrus.Errorf("libcontainerd: failed to start container: %s", err)
-		ctr.debugGCS() // Before terminating!
-		if err := ctr.terminate(); err != nil {
-			logrus.Errorf("libcontainerd: failed to cleanup after a failed Start. %s", err)
-		} else {
-			logrus.Debugln("libcontainerd: cleaned up after failed Start by calling Terminate")
-		}
-		return err
-	}
-
-	defer ctr.debugGCS()
-
-	// Note we always tell HCS to
-	// create stdout as it's required regardless of '-i' or '-t' options, so that
-	// docker can always grab the output through logs. We also tell HCS to always
-	// create stdin, even if it's not used - it will be closed shortly. Stderr
-	// is only created if it we're not -t.
-	var (
-		emulateConsole   bool
-		createStdErrPipe bool
-	)
-	if ctr.ociSpec.Process != nil {
-		emulateConsole = ctr.ociSpec.Process.Terminal
-		createStdErrPipe = !ctr.ociSpec.Process.Terminal && !ctr.ociSpec.Windows.Servicing
-	}
-
-	createProcessParms := &hcsshim.ProcessConfig{
-		EmulateConsole:   emulateConsole,
-		WorkingDirectory: ctr.ociSpec.Process.Cwd,
-		CreateStdInPipe:  !ctr.ociSpec.Windows.Servicing,
-		CreateStdOutPipe: !ctr.ociSpec.Windows.Servicing,
-		CreateStdErrPipe: createStdErrPipe,
-	}
-
-	if ctr.ociSpec.Process != nil && ctr.ociSpec.Process.ConsoleSize != nil {
-		createProcessParms.ConsoleSize[0] = uint(ctr.ociSpec.Process.ConsoleSize.Height)
-		createProcessParms.ConsoleSize[1] = uint(ctr.ociSpec.Process.ConsoleSize.Width)
-	}
-
-	// Configure the environment for the process
-	createProcessParms.Environment = setupEnvironmentVariables(ctr.ociSpec.Process.Env)
-	if ctr.isWindows {
-		createProcessParms.CommandLine = strings.Join(ctr.ociSpec.Process.Args, " ")
-	} else {
-		createProcessParms.CommandArgs = ctr.ociSpec.Process.Args
-	}
-	createProcessParms.User = ctr.ociSpec.Process.User.Username
-
-	// LCOW requires the raw OCI spec passed through HCS and onwards to GCS for the utility VM.
-	if !ctr.isWindows {
-		ociBuf, err := json.Marshal(ctr.ociSpec)
-		if err != nil {
-			return err
-		}
-		ociRaw := json.RawMessage(ociBuf)
-		createProcessParms.OCISpecification = &ociRaw
-	}
-
-	// Start the command running in the container.
-	newProcess, err := ctr.hcsContainer.CreateProcess(createProcessParms)
-	if err != nil {
-		logrus.Errorf("libcontainerd: CreateProcess() failed %s", err)
-		if err := ctr.terminate(); err != nil {
-			logrus.Errorf("libcontainerd: failed to cleanup after a failed CreateProcess. %s", err)
-		} else {
-			logrus.Debugln("libcontainerd: cleaned up after failed CreateProcess by calling Terminate")
-		}
-		return err
-	}
-
-	pid := newProcess.Pid()
-
-	// Save the hcs Process and PID
-	ctr.process.friendlyName = InitFriendlyName
-	ctr.process.hcsProcess = newProcess
-
-	// If this is a servicing container, wait on the process synchronously here and
-	// if it succeeds, wait for it cleanly shutdown and merge into the parent container.
-	if ctr.ociSpec.Windows.Servicing {
-		exitCode := ctr.waitProcessExitCode(&ctr.process)
-
-		if exitCode != 0 {
-			if err := ctr.terminate(); err != nil {
-				logrus.Warnf("libcontainerd: terminating servicing container %s failed: %s", ctr.containerID, err)
-			}
-			return fmt.Errorf("libcontainerd: servicing container %s returned non-zero exit code %d", ctr.containerID, exitCode)
-		}
-
-		return ctr.hcsContainer.WaitTimeout(time.Minute * 5)
-	}
-
-	var stdout, stderr io.ReadCloser
-	var stdin io.WriteCloser
-	stdin, stdout, stderr, err = newProcess.Stdio()
-	if err != nil {
-		logrus.Errorf("libcontainerd: failed to get stdio pipes: %s", err)
-		if err := ctr.terminate(); err != nil {
-			logrus.Errorf("libcontainerd: failed to cleanup after a failed Stdio. %s", err)
-		}
-		return err
-	}
-
-	iopipe := &IOPipe{Terminal: ctr.ociSpec.Process.Terminal}
-
-	iopipe.Stdin = createStdInCloser(stdin, newProcess)
-
-	// Convert io.ReadClosers to io.Readers
-	if stdout != nil {
-		iopipe.Stdout = ioutil.NopCloser(&autoClosingReader{ReadCloser: stdout})
-	}
-	if stderr != nil {
-		iopipe.Stderr = ioutil.NopCloser(&autoClosingReader{ReadCloser: stderr})
-	}
-
-	// Save the PID
-	logrus.Debugf("libcontainerd: process started - PID %d", pid)
-	ctr.systemPid = uint32(pid)
-
-	// Spin up a go routine waiting for exit to handle cleanup
-	go ctr.waitExit(&ctr.process, true)
-
-	ctr.client.appendContainer(ctr)
-
-	if err := attachStdio(*iopipe); err != nil {
-		// OK to return the error here, as waitExit will handle tear-down in HCS
-		return err
-	}
-
-	// Tell the docker engine that the container has started.
-	si := StateInfo{
-		CommonStateInfo: CommonStateInfo{
-			State: StateStart,
-			Pid:   ctr.systemPid, // Not sure this is needed? Double-check monitor.go in daemon BUGBUG @jhowardmsft
-		}}
-	logrus.Debugf("libcontainerd: start() completed OK, %+v", si)
-	return ctr.client.backend.StateChanged(ctr.containerID, si)
-
-}
-
-// waitProcessExitCode will wait for the given process to exit and return its error code.
-func (ctr *container) waitProcessExitCode(process *process) int {
-	// Block indefinitely for the process to exit.
-	err := process.hcsProcess.Wait()
-	if err != nil {
-		if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE {
-			logrus.Warnf("libcontainerd: Wait() failed (container may have been killed): %s", err)
-		}
-		// Fall through here, do not return. This ensures we attempt to continue the
-		// shutdown in HCS and tell the docker engine that the process/container
-		// has exited to avoid a container being dropped on the floor.
-	}
-
-	exitCode, err := process.hcsProcess.ExitCode()
-	if err != nil {
-		if herr, ok := err.(*hcsshim.ProcessError); ok && herr.Err != windows.ERROR_BROKEN_PIPE {
-			logrus.Warnf("libcontainerd: unable to get exit code from container %s", ctr.containerID)
-		}
-		// Since we got an error retrieving the exit code, make sure that the code we return
-		// doesn't incorrectly indicate success.
-		exitCode = -1
-
-		// Fall through here, do not return. This ensures we attempt to continue the
-		// shutdown in HCS and tell the docker engine that the process/container
-		// has exited to avoid a container being dropped on the floor.
-	}
-
-	return exitCode
-}
-
-// waitExit runs as a goroutine waiting for the process to exit. It's
-// equivalent to (in the linux containerd world) where events come in for
-// state change notifications from containerd.
-func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) error {
-	logrus.Debugln("libcontainerd: waitExit() on pid", process.systemPid)
-
-	exitCode := ctr.waitProcessExitCode(process)
-	// Lock the container while removing the process/container from the list
-	ctr.client.lock(ctr.containerID)
-
-	if !isFirstProcessToStart {
-		ctr.cleanProcess(process.friendlyName)
-	} else {
-		ctr.client.deleteContainer(ctr.containerID)
-	}
-
-	// Unlock here so other threads are unblocked
-	ctr.client.unlock(ctr.containerID)
-
-	// Assume the container has exited
-	si := StateInfo{
-		CommonStateInfo: CommonStateInfo{
-			State:     StateExit,
-			ExitCode:  uint32(exitCode),
-			Pid:       process.systemPid,
-			ProcessID: process.friendlyName,
-		},
-		UpdatePending: false,
-	}
-
-	// But it could have been an exec'd process which exited
-	if !isFirstProcessToStart {
-		si.State = StateExitProcess
-	} else {
-		// Pending updates is only applicable for WCOW
-		if ctr.isWindows {
-			updatePending, err := ctr.hcsContainer.HasPendingUpdates()
-			if err != nil {
-				logrus.Warnf("libcontainerd: HasPendingUpdates() failed (container may have been killed): %s", err)
-			} else {
-				si.UpdatePending = updatePending
-			}
-		}
-
-		logrus.Debugf("libcontainerd: shutting down container %s", ctr.containerID)
-		if err := ctr.shutdown(); err != nil {
-			logrus.Debugf("libcontainerd: failed to shutdown container %s", ctr.containerID)
-		} else {
-			logrus.Debugf("libcontainerd: completed shutting down container %s", ctr.containerID)
-		}
-		if err := ctr.hcsContainer.Close(); err != nil {
-			logrus.Error(err)
-		}
-	}
-
-	if err := process.hcsProcess.Close(); err != nil {
-		logrus.Errorf("libcontainerd: hcsProcess.Close(): %v", err)
-	}
-
-	// Call into the backend to notify it of the state change.
-	logrus.Debugf("libcontainerd: waitExit() calling backend.StateChanged %+v", si)
-	if err := ctr.client.backend.StateChanged(ctr.containerID, si); err != nil {
-		logrus.Error(err)
-	}
-
-	logrus.Debugf("libcontainerd: waitExit() completed OK, %+v", si)
-
-	return nil
-}
-
-// cleanProcess removes process from the map.
-// Caller needs to lock container ID before calling this method.
-func (ctr *container) cleanProcess(id string) {
-	delete(ctr.processes, id)
-}
-
-// shutdown shuts down the container in HCS
-// Caller needs to lock container ID before calling this method.
-func (ctr *container) shutdown() error {
-	const shutdownTimeout = time.Minute * 5
-	err := ctr.hcsContainer.Shutdown()
-	if hcsshim.IsPending(err) {
-		// Explicit timeout to avoid a (remote) possibility that shutdown hangs indefinitely.
-		err = ctr.hcsContainer.WaitTimeout(shutdownTimeout)
-	} else if hcsshim.IsAlreadyStopped(err) {
-		err = nil
-	}
-
-	if err != nil {
-		logrus.Debugf("libcontainerd: error shutting down container %s %v calling terminate", ctr.containerID, err)
-		if err := ctr.terminate(); err != nil {
-			return err
-		}
-		return err
-	}
-
-	return nil
-}
-
-// terminate terminates the container in HCS
-// Caller needs to lock container ID before calling this method.
-func (ctr *container) terminate() error {
-	const terminateTimeout = time.Minute * 5
-	err := ctr.hcsContainer.Terminate()
-
-	if hcsshim.IsPending(err) {
-		err = ctr.hcsContainer.WaitTimeout(terminateTimeout)
-	} else if hcsshim.IsAlreadyStopped(err) {
-		err = nil
-	}
-
-	if err != nil {
-		logrus.Debugf("libcontainerd: error terminating container %s %v", ctr.containerID, err)
-		return err
-	}
-
-	return nil
-}

+ 46 - 0
libcontainerd/errors.go

@@ -0,0 +1,46 @@
+package libcontainerd
+
+import "errors"
+
+type liberr struct {
+	err error
+}
+
+func (e liberr) Error() string {
+	return e.err.Error()
+}
+
+func (e liberr) Cause() error {
+	return e.err
+}
+
+type notFoundErr struct {
+	liberr
+}
+
+func (notFoundErr) NotFound() {}
+
+func newNotFoundError(err string) error { return notFoundErr{liberr{errors.New(err)}} }
+func wrapNotFoundError(err error) error { return notFoundErr{liberr{err}} }
+
+type invalidParamErr struct {
+	liberr
+}
+
+func (invalidParamErr) InvalidParameter() {}
+
+func newInvalidParameterError(err string) error { return invalidParamErr{liberr{errors.New(err)}} }
+
+type conflictErr struct {
+	liberr
+}
+
+func (conflictErr) ConflictErr() {}
+
+func newConflictError(err string) error { return conflictErr{liberr{errors.New(err)}} }
+
+type sysErr struct {
+	liberr
+}
+
+func wrapSystemError(err error) error { return sysErr{liberr{err}} }

+ 36 - 0
libcontainerd/io.go

@@ -0,0 +1,36 @@
+package libcontainerd
+
+import "github.com/containerd/containerd"
+
+// Config returns the containerd.IOConfig of this pipe set
+func (p *IOPipe) Config() containerd.IOConfig {
+	return p.config
+}
+
+// Cancel aborts ongoing operations if they have not completed yet
+func (p *IOPipe) Cancel() {
+	p.cancel()
+}
+
+// Wait waits for io operations to finish
+func (p *IOPipe) Wait() {
+}
+
+// Close closes the underlying pipes
+func (p *IOPipe) Close() error {
+	p.cancel()
+
+	if p.Stdin != nil {
+		p.Stdin.Close()
+	}
+
+	if p.Stdout != nil {
+		p.Stdout.Close()
+	}
+
+	if p.Stderr != nil {
+		p.Stderr.Close()
+	}
+
+	return nil
+}

+ 60 - 0
libcontainerd/io_unix.go

@@ -0,0 +1,60 @@
+// +build !windows
+
+package libcontainerd
+
+import (
+	"context"
+	"io"
+	"syscall"
+
+	"github.com/containerd/containerd"
+	"github.com/containerd/fifo"
+	"github.com/pkg/errors"
+)
+
+func newIOPipe(fifos *containerd.FIFOSet) (*IOPipe, error) {
+	var (
+		err         error
+		ctx, cancel = context.WithCancel(context.Background())
+		f           io.ReadWriteCloser
+		iop         = &IOPipe{
+			Terminal: fifos.Terminal,
+			cancel:   cancel,
+			config: containerd.IOConfig{
+				Terminal: fifos.Terminal,
+				Stdin:    fifos.In,
+				Stdout:   fifos.Out,
+				Stderr:   fifos.Err,
+			},
+		}
+	)
+	defer func() {
+		if err != nil {
+			cancel()
+			iop.Close()
+		}
+	}()
+
+	if fifos.In != "" {
+		if f, err = fifo.OpenFifo(ctx, fifos.In, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
+			return nil, errors.WithStack(err)
+		}
+		iop.Stdin = f
+	}
+
+	if fifos.Out != "" {
+		if f, err = fifo.OpenFifo(ctx, fifos.Out, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
+			return nil, errors.WithStack(err)
+		}
+		iop.Stdout = f
+	}
+
+	if fifos.Err != "" {
+		if f, err = fifo.OpenFifo(ctx, fifos.Err, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
+			return nil, errors.WithStack(err)
+		}
+		iop.Stderr = f
+	}
+
+	return iop, nil
+}

+ 138 - 0
libcontainerd/io_windows.go

@@ -0,0 +1,138 @@
+package libcontainerd
+
+import (
+	"context"
+	"io"
+	"net"
+	"sync"
+
+	winio "github.com/Microsoft/go-winio"
+	"github.com/containerd/containerd"
+	"github.com/pkg/errors"
+)
+
+type winpipe struct {
+	sync.Mutex
+
+	ctx      context.Context
+	listener net.Listener
+	readyCh  chan struct{}
+	readyErr error
+
+	client net.Conn
+}
+
+func newWinpipe(ctx context.Context, pipe string) (*winpipe, error) {
+	l, err := winio.ListenPipe(pipe, nil)
+	if err != nil {
+		return nil, errors.Wrapf(err, "%q pipe creation failed", pipe)
+	}
+	wp := &winpipe{
+		ctx:      ctx,
+		listener: l,
+		readyCh:  make(chan struct{}),
+	}
+	go func() {
+		go func() {
+			defer close(wp.readyCh)
+			defer wp.listener.Close()
+			c, err := wp.listener.Accept()
+			if err != nil {
+				wp.Lock()
+				if wp.readyErr == nil {
+					wp.readyErr = err
+				}
+				wp.Unlock()
+				return
+			}
+			wp.client = c
+		}()
+
+		select {
+		case <-wp.readyCh:
+		case <-ctx.Done():
+			wp.Lock()
+			if wp.readyErr == nil {
+				wp.listener.Close()
+				wp.readyErr = ctx.Err()
+			}
+			wp.Unlock()
+		}
+	}()
+
+	return wp, nil
+}
+
+func (wp *winpipe) Read(b []byte) (int, error) {
+	select {
+	case <-wp.ctx.Done():
+		return 0, wp.ctx.Err()
+	case <-wp.readyCh:
+		return wp.client.Read(b)
+	}
+}
+
+func (wp *winpipe) Write(b []byte) (int, error) {
+	select {
+	case <-wp.ctx.Done():
+		return 0, wp.ctx.Err()
+	case <-wp.readyCh:
+		return wp.client.Write(b)
+	}
+}
+
+func (wp *winpipe) Close() error {
+	select {
+	case <-wp.readyCh:
+		return wp.client.Close()
+	default:
+		return nil
+	}
+}
+
+func newIOPipe(fifos *containerd.FIFOSet) (*IOPipe, error) {
+	var (
+		err         error
+		ctx, cancel = context.WithCancel(context.Background())
+		p           io.ReadWriteCloser
+		iop         = &IOPipe{
+			Terminal: fifos.Terminal,
+			cancel:   cancel,
+			config: containerd.IOConfig{
+				Terminal: fifos.Terminal,
+				Stdin:    fifos.In,
+				Stdout:   fifos.Out,
+				Stderr:   fifos.Err,
+			},
+		}
+	)
+	defer func() {
+		if err != nil {
+			cancel()
+			iop.Close()
+		}
+	}()
+
+	if fifos.In != "" {
+		if p, err = newWinpipe(ctx, fifos.In); err != nil {
+			return nil, err
+		}
+		iop.Stdin = p
+	}
+
+	if fifos.Out != "" {
+		if p, err = newWinpipe(ctx, fifos.Out); err != nil {
+			return nil, err
+		}
+		iop.Stdout = p
+	}
+
+	if fifos.Err != "" {
+		if p, err = newWinpipe(ctx, fifos.Err); err != nil {
+			return nil, err
+		}
+		iop.Stderr = p
+	}
+
+	return iop, nil
+}

+ 0 - 31
libcontainerd/oom_linux.go

@@ -1,31 +0,0 @@
-package libcontainerd
-
-import (
-	"fmt"
-	"os"
-	"strconv"
-
-	"github.com/opencontainers/runc/libcontainer/system"
-	"github.com/sirupsen/logrus"
-)
-
-func setOOMScore(pid, score int) error {
-	oomScoreAdjPath := fmt.Sprintf("/proc/%d/oom_score_adj", pid)
-	f, err := os.OpenFile(oomScoreAdjPath, os.O_WRONLY, 0)
-	if err != nil {
-		return err
-	}
-	stringScore := strconv.Itoa(score)
-	_, err = f.WriteString(stringScore)
-	f.Close()
-	if os.IsPermission(err) {
-		// Setting oom_score_adj does not work in an
-		// unprivileged container. Ignore the error, but log
-		// it if we appear not to be in that situation.
-		if !system.RunningInUserNS() {
-			logrus.Debugf("Permission denied writing %q to %s", stringScore, oomScoreAdjPath)
-		}
-		return nil
-	}
-	return err
-}

+ 0 - 5
libcontainerd/oom_solaris.go

@@ -1,5 +0,0 @@
-package libcontainerd
-
-func setOOMScore(pid, score int) error {
-	return nil
-}

+ 0 - 42
libcontainerd/pausemonitor_unix.go

@@ -1,42 +0,0 @@
-// +build !windows
-
-package libcontainerd
-
-import (
-	"sync"
-)
-
-// pauseMonitor is helper to get notifications from pause state changes.
-type pauseMonitor struct {
-	sync.Mutex
-	waiters map[string][]chan struct{}
-}
-
-func (m *pauseMonitor) handle(t string) {
-	m.Lock()
-	defer m.Unlock()
-	if m.waiters == nil {
-		return
-	}
-	q, ok := m.waiters[t]
-	if !ok {
-		return
-	}
-	if len(q) > 0 {
-		close(q[0])
-		m.waiters[t] = q[1:]
-	}
-}
-
-func (m *pauseMonitor) append(t string, waiter chan struct{}) {
-	m.Lock()
-	defer m.Unlock()
-	if m.waiters == nil {
-		m.waiters = make(map[string][]chan struct{})
-	}
-	_, ok := m.waiters[t]
-	if !ok {
-		m.waiters[t] = make([]chan struct{}, 0)
-	}
-	m.waiters[t] = append(m.waiters[t], waiter)
-}

+ 0 - 18
libcontainerd/process.go

@@ -1,18 +0,0 @@
-package libcontainerd
-
-// processCommon are the platform common fields as part of the process structure
-// which keeps the state for the main container process, as well as any exec
-// processes.
-type processCommon struct {
-	client *client
-
-	// containerID is the Container ID
-	containerID string
-
-	// friendlyName is an identifier for the process (or `InitFriendlyName`
-	// for the first process)
-	friendlyName string
-
-	// systemPid is the PID of the main container process
-	systemPid uint32
-}

+ 0 - 107
libcontainerd/process_unix.go

@@ -1,107 +0,0 @@
-// +build linux solaris
-
-package libcontainerd
-
-import (
-	"io"
-	"io/ioutil"
-	"os"
-	"path/filepath"
-	goruntime "runtime"
-	"strings"
-
-	containerd "github.com/containerd/containerd/api/grpc/types"
-	"github.com/tonistiigi/fifo"
-	"golang.org/x/net/context"
-	"golang.org/x/sys/unix"
-)
-
-var fdNames = map[int]string{
-	unix.Stdin:  "stdin",
-	unix.Stdout: "stdout",
-	unix.Stderr: "stderr",
-}
-
-// process keeps the state for both main container process and exec process.
-type process struct {
-	processCommon
-
-	// Platform specific fields are below here.
-	dir string
-}
-
-func (p *process) openFifos(ctx context.Context, terminal bool) (pipe *IOPipe, err error) {
-	if err := os.MkdirAll(p.dir, 0700); err != nil {
-		return nil, err
-	}
-
-	io := &IOPipe{}
-
-	io.Stdin, err = fifo.OpenFifo(ctx, p.fifo(unix.Stdin), unix.O_WRONLY|unix.O_CREAT|unix.O_NONBLOCK, 0700)
-	if err != nil {
-		return nil, err
-	}
-
-	defer func() {
-		if err != nil {
-			io.Stdin.Close()
-		}
-	}()
-
-	io.Stdout, err = fifo.OpenFifo(ctx, p.fifo(unix.Stdout), unix.O_RDONLY|unix.O_CREAT|unix.O_NONBLOCK, 0700)
-	if err != nil {
-		return nil, err
-	}
-
-	defer func() {
-		if err != nil {
-			io.Stdout.Close()
-		}
-	}()
-
-	if goruntime.GOOS == "solaris" || !terminal {
-		// For Solaris terminal handling is done exclusively by the runtime therefore we make no distinction
-		// in the processing for terminal and !terminal cases.
-		io.Stderr, err = fifo.OpenFifo(ctx, p.fifo(unix.Stderr), unix.O_RDONLY|unix.O_CREAT|unix.O_NONBLOCK, 0700)
-		if err != nil {
-			return nil, err
-		}
-		defer func() {
-			if err != nil {
-				io.Stderr.Close()
-			}
-		}()
-	} else {
-		io.Stderr = ioutil.NopCloser(emptyReader{})
-	}
-
-	return io, nil
-}
-
-func (p *process) sendCloseStdin() error {
-	_, err := p.client.remote.apiClient.UpdateProcess(context.Background(), &containerd.UpdateProcessRequest{
-		Id:         p.containerID,
-		Pid:        p.friendlyName,
-		CloseStdin: true,
-	})
-	if err != nil && (strings.Contains(err.Error(), "container not found") || strings.Contains(err.Error(), "process not found")) {
-		return nil
-	}
-	return err
-}
-
-func (p *process) closeFifos(io *IOPipe) {
-	io.Stdin.Close()
-	io.Stdout.Close()
-	io.Stderr.Close()
-}
-
-type emptyReader struct{}
-
-func (r emptyReader) Read(b []byte) (int, error) {
-	return 0, io.EOF
-}
-
-func (p *process) fifo(index int) string {
-	return filepath.Join(p.dir, p.friendlyName+"-"+fdNames[index])
-}

+ 5 - 9
libcontainerd/process_windows.go

@@ -8,14 +8,6 @@ import (
 	"github.com/docker/docker/pkg/ioutils"
 )
 
-// process keeps the state for both main container process and exec process.
-type process struct {
-	processCommon
-
-	// Platform specific fields are below here.
-	hcsProcess hcsshim.Process
-}
-
 type autoClosingReader struct {
 	io.ReadCloser
 	sync.Once
@@ -23,7 +15,7 @@ type autoClosingReader struct {
 
 func (r *autoClosingReader) Read(b []byte) (n int, err error) {
 	n, err = r.ReadCloser.Read(b)
-	if err == io.EOF {
+	if err != nil {
 		r.Once.Do(func() { r.ReadCloser.Close() })
 	}
 	return
@@ -46,3 +38,7 @@ func createStdInCloser(pipe io.WriteCloser, process hcsshim.Process) io.WriteClo
 		return nil
 	})
 }
+
+func (p *process) Cleanup() error {
+	return nil
+}

+ 0 - 2
libcontainerd/queue_unix.go → libcontainerd/queue.go

@@ -1,5 +1,3 @@
-// +build linux solaris
-
 package libcontainerd
 
 import "sync"

+ 0 - 2
libcontainerd/queue_unix_test.go → libcontainerd/queue_test.go

@@ -1,5 +1,3 @@
-// +build linux solaris
-
 package libcontainerd
 
 import (

+ 0 - 20
libcontainerd/remote.go

@@ -1,20 +0,0 @@
-package libcontainerd
-
-// Remote on Linux defines the accesspoint to the containerd grpc API.
-// Remote on Windows is largely an unimplemented interface as there is
-// no remote containerd.
-type Remote interface {
-	// Client returns a new Client instance connected with given Backend.
-	Client(Backend) (Client, error)
-	// Cleanup stops containerd if it was started by libcontainerd.
-	// Note this is not used on Windows as there is no remote containerd.
-	Cleanup()
-	// UpdateOptions allows various remote options to be updated at runtime.
-	UpdateOptions(...RemoteOption) error
-}
-
-// RemoteOption allows to configure parameters of remotes.
-// This is unused on Windows.
-type RemoteOption interface {
-	Apply(Remote) error
-}

+ 317 - 0
libcontainerd/remote_daemon.go

@@ -0,0 +1,317 @@
+// +build !windows
+
+package libcontainerd
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+
+	"github.com/BurntSushi/toml"
+	"github.com/containerd/containerd"
+	"github.com/containerd/containerd/server"
+	"github.com/docker/docker/pkg/system"
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
+)
+
+const (
+	maxConnectionRetryCount = 3
+	healthCheckTimeout      = 3 * time.Second
+	shutdownTimeout         = 15 * time.Second
+	configFile              = "containerd.toml"
+	binaryName              = "docker-containerd"
+	pidFile                 = "docker-containerd.pid"
+)
+
+type pluginConfigs struct {
+	Plugins map[string]interface{} `toml:"plugins"`
+}
+
+type remote struct {
+	sync.RWMutex
+	server.Config
+
+	daemonPid int
+	logger    *logrus.Entry
+
+	daemonWaitCh    chan struct{}
+	clients         []*client
+	shutdownContext context.Context
+	shutdownCancel  context.CancelFunc
+	shutdown        bool
+
+	// Options
+	startDaemon bool
+	rootDir     string
+	stateDir    string
+	snapshotter string
+	pluginConfs pluginConfigs
+}
+
+// New creates a fresh instance of libcontainerd remote.
+func New(rootDir, stateDir string, options ...RemoteOption) (rem Remote, err error) {
+	defer func() {
+		if err != nil {
+			err = errors.Wrap(err, "Failed to connect to containerd")
+		}
+	}()
+
+	r := &remote{
+		rootDir:  rootDir,
+		stateDir: stateDir,
+		Config: server.Config{
+			Root:  filepath.Join(rootDir, "daemon"),
+			State: filepath.Join(stateDir, "daemon"),
+		},
+		pluginConfs: pluginConfigs{make(map[string]interface{})},
+		daemonPid:   -1,
+		logger:      logrus.WithField("module", "libcontainerd"),
+	}
+	r.shutdownContext, r.shutdownCancel = context.WithCancel(context.Background())
+
+	rem = r
+	for _, option := range options {
+		if err = option.Apply(r); err != nil {
+			return
+		}
+	}
+	r.setDefaults()
+
+	if err = system.MkdirAll(stateDir, 0700, ""); err != nil {
+		return
+	}
+
+	if r.startDaemon {
+		os.Remove(r.GRPC.Address)
+		if err = r.startContainerd(); err != nil {
+			return
+		}
+		defer func() {
+			if err != nil {
+				r.Cleanup()
+			}
+		}()
+	}
+
+	// This connection is just used to monitor the connection
+	client, err := containerd.New(r.GRPC.Address)
+	if err != nil {
+		return
+	}
+	if _, err := client.Version(context.Background()); err != nil {
+		system.KillProcess(r.daemonPid)
+		return nil, errors.Wrapf(err, "unable to get containerd version")
+	}
+
+	go r.monitorConnection(client)
+
+	return r, nil
+}
+
+func (r *remote) NewClient(ns string, b Backend) (Client, error) {
+	c := &client{
+		stateDir:   r.stateDir,
+		logger:     r.logger.WithField("namespace", ns),
+		namespace:  ns,
+		backend:    b,
+		containers: make(map[string]*container),
+	}
+
+	rclient, err := containerd.New(r.GRPC.Address, containerd.WithDefaultNamespace(ns))
+	if err != nil {
+		return nil, err
+	}
+	c.remote = rclient
+
+	go c.processEventStream(r.shutdownContext)
+
+	r.Lock()
+	r.clients = append(r.clients, c)
+	r.Unlock()
+	return c, nil
+}
+
+func (r *remote) Cleanup() {
+	if r.daemonPid != -1 {
+		r.shutdownCancel()
+		r.stopDaemon()
+	}
+
+	// cleanup some files
+	os.Remove(filepath.Join(r.stateDir, pidFile))
+
+	r.platformCleanup()
+}
+
+func (r *remote) getContainerdPid() (int, error) {
+	pidFile := filepath.Join(r.stateDir, pidFile)
+	f, err := os.OpenFile(pidFile, os.O_RDWR, 0600)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return -1, nil
+		}
+		return -1, err
+	}
+	defer f.Close()
+
+	b := make([]byte, 8)
+	n, err := f.Read(b)
+	if err != nil && err != io.EOF {
+		return -1, err
+	}
+
+	if n > 0 {
+		pid, err := strconv.ParseUint(string(b[:n]), 10, 64)
+		if err != nil {
+			return -1, err
+		}
+		if system.IsProcessAlive(int(pid)) {
+			return int(pid), nil
+		}
+	}
+
+	return -1, nil
+}
+
+func (r *remote) getContainerdConfig() (string, error) {
+	path := filepath.Join(r.stateDir, configFile)
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
+	if err != nil {
+		return "", errors.Wrapf(err, "failed to open containerd config file at %s", path)
+	}
+	defer f.Close()
+
+	enc := toml.NewEncoder(f)
+	if err = enc.Encode(r.Config); err != nil {
+		return "", errors.Wrapf(err, "failed to encode general config")
+	}
+	if err = enc.Encode(r.pluginConfs); err != nil {
+		return "", errors.Wrapf(err, "failed to encode plugin configs")
+	}
+
+	return path, nil
+}
+
+func (r *remote) startContainerd() error {
+	pid, err := r.getContainerdPid()
+	if err != nil {
+		return err
+	}
+
+	if pid != -1 {
+		r.daemonPid = pid
+		logrus.WithField("pid", pid).
+			Infof("libcontainerd: %s is still running", binaryName)
+		return nil
+	}
+
+	configFile, err := r.getContainerdConfig()
+	if err != nil {
+		return err
+	}
+
+	args := []string{"--config", configFile}
+	cmd := exec.Command(binaryName, args...)
+	// redirect containerd logs to docker logs
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	cmd.SysProcAttr = containerdSysProcAttr()
+	// clear the NOTIFY_SOCKET from the env when starting containerd
+	cmd.Env = nil
+	for _, e := range os.Environ() {
+		if !strings.HasPrefix(e, "NOTIFY_SOCKET") {
+			cmd.Env = append(cmd.Env, e)
+		}
+	}
+	if err := cmd.Start(); err != nil {
+		return err
+	}
+
+	r.daemonWaitCh = make(chan struct{})
+	go func() {
+		// Reap our child when needed
+		if err := cmd.Wait(); err != nil {
+			r.logger.WithError(err).Errorf("containerd did not exit successfully")
+		}
+		close(r.daemonWaitCh)
+	}()
+
+	r.daemonPid = cmd.Process.Pid
+
+	err = ioutil.WriteFile(filepath.Join(r.stateDir, pidFile), []byte(fmt.Sprintf("%d", r.daemonPid)), 0660)
+	if err != nil {
+		system.KillProcess(r.daemonPid)
+		return errors.Wrap(err, "libcontainerd: failed to save daemon pid to disk")
+	}
+
+	logrus.WithField("pid", r.daemonPid).
+		Infof("libcontainerd: started new %s process", binaryName)
+
+	return nil
+}
+
+func (r *remote) monitorConnection(client *containerd.Client) {
+	var transientFailureCount = 0
+
+	ticker := time.NewTicker(500 * time.Millisecond)
+	defer ticker.Stop()
+
+	for {
+		<-ticker.C
+		ctx, cancel := context.WithTimeout(r.shutdownContext, healthCheckTimeout)
+		_, err := client.IsServing(ctx)
+		cancel()
+		if err == nil {
+			transientFailureCount = 0
+			continue
+		}
+
+		select {
+		case <-r.shutdownContext.Done():
+			r.logger.Info("stopping healtcheck following graceful shutdown")
+			client.Close()
+			return
+		default:
+		}
+
+		r.logger.WithError(err).WithField("binary", binaryName).Debug("daemon is not responding")
+
+		if r.daemonPid != -1 {
+			transientFailureCount++
+			if transientFailureCount >= maxConnectionRetryCount || !system.IsProcessAlive(r.daemonPid) {
+				transientFailureCount = 0
+				if system.IsProcessAlive(r.daemonPid) {
+					r.logger.WithField("pid", r.daemonPid).Info("killing and restarting containerd")
+					// Try to get a stack trace
+					syscall.Kill(r.daemonPid, syscall.SIGUSR1)
+					<-time.After(100 * time.Millisecond)
+					system.KillProcess(r.daemonPid)
+				}
+				<-r.daemonWaitCh
+				var err error
+				client.Close()
+				os.Remove(r.GRPC.Address)
+				if err = r.startContainerd(); err != nil {
+					r.logger.WithError(err).Error("failed restarting containerd")
+				} else {
+					newClient, err := containerd.New(r.GRPC.Address)
+					if err != nil {
+						r.logger.WithError(err).Error("failed connect to containerd")
+					} else {
+						client = newClient
+					}
+				}
+			}
+		}
+	}
+}

+ 141 - 0
libcontainerd/remote_daemon_options.go

@@ -0,0 +1,141 @@
+// +build !windows
+
+package libcontainerd
+
+import "fmt"
+
+// WithRemoteAddr sets the external containerd socket to connect to.
+func WithRemoteAddr(addr string) RemoteOption {
+	return rpcAddr(addr)
+}
+
+type rpcAddr string
+
+func (a rpcAddr) Apply(r Remote) error {
+	if remote, ok := r.(*remote); ok {
+		remote.GRPC.Address = string(a)
+		return nil
+	}
+	return fmt.Errorf("WithRemoteAddr option not supported for this remote")
+}
+
+// WithRemoteAddrUser sets the uid and gid to create the RPC address with
+func WithRemoteAddrUser(uid, gid int) RemoteOption {
+	return rpcUser{uid, gid}
+}
+
+type rpcUser struct {
+	uid int
+	gid int
+}
+
+func (u rpcUser) Apply(r Remote) error {
+	if remote, ok := r.(*remote); ok {
+		remote.GRPC.Uid = u.uid
+		remote.GRPC.Gid = u.gid
+		return nil
+	}
+	return fmt.Errorf("WithRemoteAddr option not supported for this remote")
+}
+
+// WithStartDaemon defines if libcontainerd should also run containerd daemon.
+func WithStartDaemon(start bool) RemoteOption {
+	return startDaemon(start)
+}
+
+type startDaemon bool
+
+func (s startDaemon) Apply(r Remote) error {
+	if remote, ok := r.(*remote); ok {
+		remote.startDaemon = bool(s)
+		return nil
+	}
+	return fmt.Errorf("WithStartDaemon option not supported for this remote")
+}
+
+// WithLogLevel defines which log level to starts containerd with.
+// This only makes sense if WithStartDaemon() was set to true.
+func WithLogLevel(lvl string) RemoteOption {
+	return logLevel(lvl)
+}
+
+type logLevel string
+
+func (l logLevel) Apply(r Remote) error {
+	if remote, ok := r.(*remote); ok {
+		remote.Debug.Level = string(l)
+		return nil
+	}
+	return fmt.Errorf("WithDebugLog option not supported for this remote")
+}
+
+// WithDebugAddress defines at which location the debug GRPC connection
+// should be made
+func WithDebugAddress(addr string) RemoteOption {
+	return debugAddress(addr)
+}
+
+type debugAddress string
+
+func (d debugAddress) Apply(r Remote) error {
+	if remote, ok := r.(*remote); ok {
+		remote.Debug.Address = string(d)
+		return nil
+	}
+	return fmt.Errorf("WithDebugAddress option not supported for this remote")
+}
+
+// WithMetricsAddress defines at which location the debug GRPC connection
+// should be made
+func WithMetricsAddress(addr string) RemoteOption {
+	return metricsAddress(addr)
+}
+
+type metricsAddress string
+
+func (m metricsAddress) Apply(r Remote) error {
+	if remote, ok := r.(*remote); ok {
+		remote.Metrics.Address = string(m)
+		return nil
+	}
+	return fmt.Errorf("WithMetricsAddress option not supported for this remote")
+}
+
+// WithSnapshotter defines snapshotter driver should be used
+func WithSnapshotter(name string) RemoteOption {
+	return snapshotter(name)
+}
+
+type snapshotter string
+
+func (s snapshotter) Apply(r Remote) error {
+	if remote, ok := r.(*remote); ok {
+		remote.snapshotter = string(s)
+		return nil
+	}
+	return fmt.Errorf("WithSnapshotter option not supported for this remote")
+}
+
+// WithPlugin allow configuring a containerd plugin
+// configuration values passed needs to be quoted if quotes are needed in
+// the toml format.
+func WithPlugin(name string, conf interface{}) RemoteOption {
+	return pluginConf{
+		name: name,
+		conf: conf,
+	}
+}
+
+type pluginConf struct {
+	// Name is the name of the plugin
+	name string
+	conf interface{}
+}
+
+func (p pluginConf) Apply(r Remote) error {
+	if remote, ok := r.(*remote); ok {
+		remote.pluginConfs.Plugins[p.name] = p.conf
+		return nil
+	}
+	return fmt.Errorf("WithPlugin option not supported for this remote")
+}

+ 36 - 0
libcontainerd/remote_daemon_options_unix.go

@@ -0,0 +1,36 @@
+// +build linux solaris
+
+package libcontainerd
+
+import "fmt"
+
+// WithOOMScore defines the oom_score_adj to set for the containerd process.
+func WithOOMScore(score int) RemoteOption {
+	return oomScore(score)
+}
+
+type oomScore int
+
+func (o oomScore) Apply(r Remote) error {
+	if remote, ok := r.(*remote); ok {
+		remote.OOMScore = int(o)
+		return nil
+	}
+	return fmt.Errorf("WithOOMScore option not supported for this remote")
+}
+
+// WithSubreaper sets whether containerd should register itself as a
+// subreaper
+func WithSubreaper(reap bool) RemoteOption {
+	return subreaper(reap)
+}
+
+type subreaper bool
+
+func (s subreaper) Apply(r Remote) error {
+	if remote, ok := r.(*remote); ok {
+		remote.Subreaper = bool(s)
+		return nil
+	}
+	return fmt.Errorf("WithSubreaper option not supported for this remote")
+}

+ 56 - 0
libcontainerd/remote_daemon_process.go

@@ -0,0 +1,56 @@
+// +build !windows
+
+package libcontainerd
+
+import "github.com/pkg/errors"
+
+// process represents the state for the main container process or an exec.
+type process struct {
+	// id is the logical name of the process
+	id string
+
+	// cid is the container id to which this process belongs
+	cid string
+
+	// pid is the identifier of the process
+	pid uint32
+
+	// io holds the io reader/writer associated with the process
+	io *IOPipe
+
+	// root is the state directory for the process
+	root string
+}
+
+func (p *process) ID() string {
+	return p.id
+}
+
+func (p *process) Pid() uint32 {
+	return p.pid
+}
+
+func (p *process) SetPid(pid uint32) error {
+	if p.pid != 0 {
+		return errors.Errorf("pid is already set to %d", pid)
+	}
+
+	p.pid = pid
+	return nil
+}
+
+func (p *process) IOPipe() *IOPipe {
+	return p.io
+}
+
+func (p *process) CloseIO() {
+	if p.io.Stdin != nil {
+		p.io.Stdin.Close()
+	}
+	if p.io.Stdout != nil {
+		p.io.Stdout.Close()
+	}
+	if p.io.Stderr != nil {
+		p.io.Stderr.Close()
+	}
+}

+ 61 - 0
libcontainerd/remote_daemon_process_unix.go

@@ -0,0 +1,61 @@
+// +build linux solaris
+
+package libcontainerd
+
+import (
+	"os"
+	"path/filepath"
+
+	"github.com/pkg/errors"
+	"golang.org/x/sys/unix"
+)
+
+var fdNames = map[int]string{
+	unix.Stdin:  "stdin",
+	unix.Stdout: "stdout",
+	unix.Stderr: "stderr",
+}
+
+func (p *process) pipeName(index int) string {
+	return filepath.Join(p.root, p.id+"-"+fdNames[index])
+}
+
+func (p *process) IOPaths() (string, string, string) {
+	var (
+		stdin  = p.pipeName(unix.Stdin)
+		stdout = p.pipeName(unix.Stdout)
+		stderr = p.pipeName(unix.Stderr)
+	)
+	// TODO: debug why we're having zombies when I don't unset those
+	if p.io.Stdin == nil {
+		stdin = ""
+	}
+	if p.io.Stderr == nil {
+		stderr = ""
+	}
+	return stdin, stdout, stderr
+}
+
+func (p *process) Cleanup() error {
+	var retErr error
+
+	// Ensure everything was closed
+	p.CloseIO()
+
+	for _, i := range [3]string{
+		p.pipeName(unix.Stdin),
+		p.pipeName(unix.Stdout),
+		p.pipeName(unix.Stderr),
+	} {
+		err := os.Remove(i)
+		if err != nil {
+			if retErr == nil {
+				retErr = errors.Wrapf(err, "failed to remove %s", i)
+			} else {
+				retErr = errors.Wrapf(retErr, "failed to remove %s", i)
+			}
+		}
+	}
+
+	return retErr
+}

+ 56 - 0
libcontainerd/remote_daemon_unix.go

@@ -0,0 +1,56 @@
+// +build linux solaris
+
+package libcontainerd
+
+import (
+	"os"
+	"path/filepath"
+	"syscall"
+	"time"
+
+	"github.com/docker/docker/pkg/system"
+)
+
+const (
+	sockFile      = "docker-containerd.sock"
+	debugSockFile = "docker-containerd-debug.sock"
+)
+
+func (r *remote) setDefaults() {
+	if r.GRPC.Address == "" {
+		r.GRPC.Address = filepath.Join(r.stateDir, sockFile)
+	}
+	if r.Debug.Address == "" {
+		r.Debug.Address = filepath.Join(r.stateDir, debugSockFile)
+	}
+	if r.Debug.Level == "" {
+		r.Debug.Level = "info"
+	}
+	if r.OOMScore == 0 {
+		r.OOMScore = -999
+	}
+	if r.snapshotter == "" {
+		r.snapshotter = "overlay"
+	}
+}
+
+func (r *remote) stopDaemon() {
+	// Ask the daemon to quit
+	syscall.Kill(r.daemonPid, syscall.SIGTERM)
+	// Wait up to 15secs for it to stop
+	for i := time.Duration(0); i < shutdownTimeout; i += time.Second {
+		if !system.IsProcessAlive(r.daemonPid) {
+			break
+		}
+		time.Sleep(time.Second)
+	}
+
+	if system.IsProcessAlive(r.daemonPid) {
+		r.logger.WithField("pid", r.daemonPid).Warn("daemon didn't stop within 15 secs, killing it")
+		syscall.Kill(r.daemonPid, syscall.SIGKILL)
+	}
+}
+
+func (r *remote) platformCleanup() {
+	os.Remove(filepath.Join(r.stateDir, sockFile))
+}

+ 50 - 0
libcontainerd/remote_daemon_windows.go

@@ -0,0 +1,50 @@
+// +build remote_daemon
+
+package libcontainerd
+
+import (
+	"os"
+)
+
+const (
+	grpcPipeName  = `\\.\pipe\docker-containerd-containerd`
+	debugPipeName = `\\.\pipe\docker-containerd-debug`
+)
+
+func (r *remote) setDefaults() {
+	if r.GRPC.Address == "" {
+		r.GRPC.Address = grpcPipeName
+	}
+	if r.Debug.Address == "" {
+		r.Debug.Address = debugPipeName
+	}
+	if r.Debug.Level == "" {
+		r.Debug.Level = "info"
+	}
+	if r.snapshotter == "" {
+		r.snapshotter = "naive" // TODO(mlaventure): switch to "windows" once implemented
+	}
+}
+
+func (r *remote) stopDaemon() {
+	p, err := os.FindProcess(r.daemonPid)
+	if err != nil {
+		r.logger.WithField("pid", r.daemonPid).Warn("could not find daemon process")
+		return
+	}
+
+	if err = p.Kill(); err != nil {
+		r.logger.WithError(err).WithField("pid", r.daemonPid).Warn("could not kill daemon process")
+		return
+	}
+
+	_, err = p.Wait()
+	if err != nil {
+		r.logger.WithError(err).WithField("pid", r.daemonPid).Warn("wait for daemon process")
+		return
+	}
+}
+
+func (r *remote) platformCleanup() {
+	// Nothing to do
+}

+ 59 - 0
libcontainerd/remote_local.go

@@ -0,0 +1,59 @@
+// +build windows
+
+package libcontainerd
+
+import (
+	"sync"
+
+	"github.com/sirupsen/logrus"
+)
+
+type remote struct {
+	sync.RWMutex
+
+	logger  *logrus.Entry
+	clients []*client
+
+	// Options
+	rootDir  string
+	stateDir string
+}
+
+// New creates a fresh instance of libcontainerd remote.
+func New(rootDir, stateDir string, options ...RemoteOption) (Remote, error) {
+	return &remote{
+		logger:   logrus.WithField("module", "libcontainerd"),
+		rootDir:  rootDir,
+		stateDir: stateDir,
+	}, nil
+}
+
+type client struct {
+	sync.Mutex
+
+	rootDir    string
+	stateDir   string
+	backend    Backend
+	logger     *logrus.Entry
+	eventQ     queue
+	containers map[string]*container
+}
+
+func (r *remote) NewClient(ns string, b Backend) (Client, error) {
+	c := &client{
+		rootDir:    r.rootDir,
+		stateDir:   r.stateDir,
+		backend:    b,
+		logger:     r.logger.WithField("namespace", ns),
+		containers: make(map[string]*container),
+	}
+	r.Lock()
+	r.clients = append(r.clients, c)
+	r.Unlock()
+
+	return c, nil
+}
+
+func (r *remote) Cleanup() {
+	// Nothing to do
+}

+ 0 - 565
libcontainerd/remote_unix.go

@@ -1,565 +0,0 @@
-// +build linux solaris
-
-package libcontainerd
-
-import (
-	"fmt"
-	"io"
-	"io/ioutil"
-	"log"
-	"net"
-	"os"
-	"os/exec"
-	"path/filepath"
-	goruntime "runtime"
-	"strconv"
-	"strings"
-	"sync"
-	"time"
-
-	containerd "github.com/containerd/containerd/api/grpc/types"
-	"github.com/docker/docker/pkg/locker"
-	"github.com/docker/docker/pkg/system"
-	"github.com/golang/protobuf/ptypes"
-	"github.com/golang/protobuf/ptypes/timestamp"
-	"github.com/sirupsen/logrus"
-	"golang.org/x/net/context"
-	"golang.org/x/sys/unix"
-	"google.golang.org/grpc"
-	"google.golang.org/grpc/grpclog"
-	"google.golang.org/grpc/health/grpc_health_v1"
-	"google.golang.org/grpc/transport"
-)
-
-const (
-	maxConnectionRetryCount      = 3
-	containerdHealthCheckTimeout = 3 * time.Second
-	containerdShutdownTimeout    = 15 * time.Second
-	containerdBinary             = "docker-containerd"
-	containerdPidFilename        = "docker-containerd.pid"
-	containerdSockFilename       = "docker-containerd.sock"
-	containerdStateDir           = "containerd"
-	eventTimestampFilename       = "event.ts"
-)
-
-type remote struct {
-	sync.RWMutex
-	apiClient            containerd.APIClient
-	daemonPid            int
-	stateDir             string
-	rpcAddr              string
-	startDaemon          bool
-	closedManually       bool
-	debugLog             bool
-	rpcConn              *grpc.ClientConn
-	clients              []*client
-	eventTsPath          string
-	runtime              string
-	runtimeArgs          []string
-	daemonWaitCh         chan struct{}
-	liveRestore          bool
-	oomScore             int
-	restoreFromTimestamp *timestamp.Timestamp
-}
-
-// New creates a fresh instance of libcontainerd remote.
-func New(stateDir string, options ...RemoteOption) (_ Remote, err error) {
-	defer func() {
-		if err != nil {
-			err = fmt.Errorf("Failed to connect to containerd. Please make sure containerd is installed in your PATH or you have specified the correct address. Got error: %v", err)
-		}
-	}()
-	r := &remote{
-		stateDir:    stateDir,
-		daemonPid:   -1,
-		eventTsPath: filepath.Join(stateDir, eventTimestampFilename),
-	}
-	for _, option := range options {
-		if err := option.Apply(r); err != nil {
-			return nil, err
-		}
-	}
-
-	if err := system.MkdirAll(stateDir, 0700, ""); err != nil {
-		return nil, err
-	}
-
-	if r.rpcAddr == "" {
-		r.rpcAddr = filepath.Join(stateDir, containerdSockFilename)
-	}
-
-	if r.startDaemon {
-		if err := r.runContainerdDaemon(); err != nil {
-			return nil, err
-		}
-	}
-
-	// don't output the grpc reconnect logging
-	grpclog.SetLogger(log.New(ioutil.Discard, "", log.LstdFlags))
-	dialOpts := []grpc.DialOption{
-		grpc.WithInsecure(),
-		grpc.WithBackoffMaxDelay(2 * time.Second),
-		grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
-			return net.DialTimeout("unix", addr, timeout)
-		}),
-	}
-	conn, err := grpc.Dial(r.rpcAddr, dialOpts...)
-	if err != nil {
-		return nil, fmt.Errorf("error connecting to containerd: %v", err)
-	}
-
-	r.rpcConn = conn
-	r.apiClient = containerd.NewAPIClient(conn)
-
-	// Get the timestamp to restore from
-	t := r.getLastEventTimestamp()
-	tsp, err := ptypes.TimestampProto(t)
-	if err != nil {
-		logrus.Errorf("libcontainerd: failed to convert timestamp: %q", err)
-	}
-	r.restoreFromTimestamp = tsp
-
-	go r.handleConnectionChange()
-
-	if err := r.startEventsMonitor(); err != nil {
-		return nil, err
-	}
-
-	return r, nil
-}
-
-func (r *remote) UpdateOptions(options ...RemoteOption) error {
-	for _, option := range options {
-		if err := option.Apply(r); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func (r *remote) handleConnectionChange() {
-	var transientFailureCount = 0
-
-	ticker := time.NewTicker(500 * time.Millisecond)
-	defer ticker.Stop()
-	healthClient := grpc_health_v1.NewHealthClient(r.rpcConn)
-
-	for {
-		<-ticker.C
-		ctx, cancel := context.WithTimeout(context.Background(), containerdHealthCheckTimeout)
-		_, err := healthClient.Check(ctx, &grpc_health_v1.HealthCheckRequest{})
-		cancel()
-		if err == nil {
-			continue
-		}
-
-		logrus.Debugf("libcontainerd: containerd health check returned error: %v", err)
-
-		if r.daemonPid != -1 {
-			if r.closedManually {
-				// Well, we asked for it to stop, just return
-				return
-			}
-			// all other errors are transient
-			// Reset state to be notified of next failure
-			transientFailureCount++
-			if transientFailureCount >= maxConnectionRetryCount {
-				transientFailureCount = 0
-				if system.IsProcessAlive(r.daemonPid) {
-					system.KillProcess(r.daemonPid)
-				}
-				<-r.daemonWaitCh
-				if err := r.runContainerdDaemon(); err != nil { //FIXME: Handle error
-					logrus.Errorf("libcontainerd: error restarting containerd: %v", err)
-				}
-				continue
-			}
-		}
-	}
-}
-
-func (r *remote) Cleanup() {
-	if r.daemonPid == -1 {
-		return
-	}
-	r.closedManually = true
-	r.rpcConn.Close()
-	// Ask the daemon to quit
-	unix.Kill(r.daemonPid, unix.SIGTERM)
-
-	// Wait up to 15secs for it to stop
-	for i := time.Duration(0); i < containerdShutdownTimeout; i += time.Second {
-		if !system.IsProcessAlive(r.daemonPid) {
-			break
-		}
-		time.Sleep(time.Second)
-	}
-
-	if system.IsProcessAlive(r.daemonPid) {
-		logrus.Warnf("libcontainerd: containerd (%d) didn't stop within 15 secs, killing it\n", r.daemonPid)
-		unix.Kill(r.daemonPid, unix.SIGKILL)
-	}
-
-	// cleanup some files
-	os.Remove(filepath.Join(r.stateDir, containerdPidFilename))
-	os.Remove(filepath.Join(r.stateDir, containerdSockFilename))
-}
-
-func (r *remote) Client(b Backend) (Client, error) {
-	c := &client{
-		clientCommon: clientCommon{
-			backend:    b,
-			containers: make(map[string]*container),
-			locker:     locker.New(),
-		},
-		remote:        r,
-		exitNotifiers: make(map[string]*exitNotifier),
-		liveRestore:   r.liveRestore,
-	}
-
-	r.Lock()
-	r.clients = append(r.clients, c)
-	r.Unlock()
-	return c, nil
-}
-
-func (r *remote) updateEventTimestamp(t time.Time) {
-	f, err := os.OpenFile(r.eventTsPath, unix.O_CREAT|unix.O_WRONLY|unix.O_TRUNC, 0600)
-	if err != nil {
-		logrus.Warnf("libcontainerd: failed to open event timestamp file: %v", err)
-		return
-	}
-	defer f.Close()
-
-	b, err := t.MarshalText()
-	if err != nil {
-		logrus.Warnf("libcontainerd: failed to encode timestamp: %v", err)
-		return
-	}
-
-	n, err := f.Write(b)
-	if err != nil || n != len(b) {
-		logrus.Warnf("libcontainerd: failed to update event timestamp file: %v", err)
-		f.Truncate(0)
-		return
-	}
-}
-
-func (r *remote) getLastEventTimestamp() time.Time {
-	t := time.Now()
-
-	fi, err := os.Stat(r.eventTsPath)
-	if os.IsNotExist(err) || fi.Size() == 0 {
-		return t
-	}
-
-	f, err := os.Open(r.eventTsPath)
-	if err != nil {
-		logrus.Warnf("libcontainerd: Unable to access last event ts: %v", err)
-		return t
-	}
-	defer f.Close()
-
-	b := make([]byte, fi.Size())
-	n, err := f.Read(b)
-	if err != nil || n != len(b) {
-		logrus.Warnf("libcontainerd: Unable to read last event ts: %v", err)
-		return t
-	}
-
-	t.UnmarshalText(b)
-
-	return t
-}
-
-func (r *remote) startEventsMonitor() error {
-	// First, get past events
-	t := r.getLastEventTimestamp()
-	tsp, err := ptypes.TimestampProto(t)
-	if err != nil {
-		logrus.Errorf("libcontainerd: failed to convert timestamp: %q", err)
-	}
-	er := &containerd.EventsRequest{
-		Timestamp: tsp,
-	}
-
-	var events containerd.API_EventsClient
-	for {
-		events, err = r.apiClient.Events(context.Background(), er, grpc.FailFast(false))
-		if err == nil {
-			break
-		}
-		logrus.Warnf("libcontainerd: failed to get events from containerd: %q", err)
-
-		if r.closedManually {
-			// ignore error if grpc remote connection is closed manually
-			return nil
-		}
-
-		<-time.After(100 * time.Millisecond)
-	}
-
-	go r.handleEventStream(events)
-	return nil
-}
-
-func (r *remote) handleEventStream(events containerd.API_EventsClient) {
-	for {
-		e, err := events.Recv()
-		if err != nil {
-			if grpc.ErrorDesc(err) == transport.ErrConnClosing.Desc &&
-				r.closedManually {
-				// ignore error if grpc remote connection is closed manually
-				return
-			}
-			logrus.Errorf("libcontainerd: failed to receive event from containerd: %v", err)
-			go r.startEventsMonitor()
-			return
-		}
-
-		logrus.Debugf("libcontainerd: received containerd event: %#v", e)
-
-		var container *container
-		var c *client
-		r.RLock()
-		for _, c = range r.clients {
-			container, err = c.getContainer(e.Id)
-			if err == nil {
-				break
-			}
-		}
-		r.RUnlock()
-		if container == nil {
-			logrus.Warnf("libcontainerd: unknown container %s", e.Id)
-			continue
-		}
-
-		if err := container.handleEvent(e); err != nil {
-			logrus.Errorf("libcontainerd: error processing state change for %s: %v", e.Id, err)
-		}
-
-		tsp, err := ptypes.Timestamp(e.Timestamp)
-		if err != nil {
-			logrus.Errorf("libcontainerd: failed to convert event timestamp: %q", err)
-			continue
-		}
-
-		r.updateEventTimestamp(tsp)
-	}
-}
-
-func (r *remote) runContainerdDaemon() error {
-	pidFilename := filepath.Join(r.stateDir, containerdPidFilename)
-	f, err := os.OpenFile(pidFilename, os.O_RDWR|os.O_CREATE, 0600)
-	if err != nil {
-		return err
-	}
-	defer f.Close()
-
-	// File exist, check if the daemon is alive
-	b := make([]byte, 8)
-	n, err := f.Read(b)
-	if err != nil && err != io.EOF {
-		return err
-	}
-
-	if n > 0 {
-		pid, err := strconv.ParseUint(string(b[:n]), 10, 64)
-		if err != nil {
-			return err
-		}
-		if system.IsProcessAlive(int(pid)) {
-			logrus.Infof("libcontainerd: previous instance of containerd still alive (%d)", pid)
-			r.daemonPid = int(pid)
-			return nil
-		}
-	}
-
-	// rewind the file
-	_, err = f.Seek(0, os.SEEK_SET)
-	if err != nil {
-		return err
-	}
-
-	// Truncate it
-	err = f.Truncate(0)
-	if err != nil {
-		return err
-	}
-
-	// Start a new instance
-	args := []string{
-		"-l", fmt.Sprintf("unix://%s", r.rpcAddr),
-		"--metrics-interval=0",
-		"--start-timeout", "2m",
-		"--state-dir", filepath.Join(r.stateDir, containerdStateDir),
-	}
-	if goruntime.GOOS == "solaris" {
-		args = append(args, "--shim", "containerd-shim", "--runtime", "runc")
-	} else {
-		args = append(args, "--shim", "docker-containerd-shim")
-		if r.runtime != "" {
-			args = append(args, "--runtime")
-			args = append(args, r.runtime)
-		}
-	}
-	if r.debugLog {
-		args = append(args, "--debug")
-	}
-	if len(r.runtimeArgs) > 0 {
-		for _, v := range r.runtimeArgs {
-			args = append(args, "--runtime-args")
-			args = append(args, v)
-		}
-		logrus.Debugf("libcontainerd: runContainerdDaemon: runtimeArgs: %s", args)
-	}
-
-	cmd := exec.Command(containerdBinary, args...)
-	// redirect containerd logs to docker logs
-	cmd.Stdout = os.Stdout
-	cmd.Stderr = os.Stderr
-	cmd.SysProcAttr = setSysProcAttr(true)
-	cmd.Env = nil
-	// clear the NOTIFY_SOCKET from the env when starting containerd
-	for _, e := range os.Environ() {
-		if !strings.HasPrefix(e, "NOTIFY_SOCKET") {
-			cmd.Env = append(cmd.Env, e)
-		}
-	}
-	if err := cmd.Start(); err != nil {
-		return err
-	}
-
-	// unless strictly necessary, do not add anything in between here
-	// as the reaper goroutine below needs to kick in as soon as possible
-	// and any "return" from code paths added here will defeat the reaper
-	// process.
-
-	r.daemonWaitCh = make(chan struct{})
-	go func() {
-		cmd.Wait()
-		close(r.daemonWaitCh)
-	}() // Reap our child when needed
-
-	logrus.Infof("libcontainerd: new containerd process, pid: %d", cmd.Process.Pid)
-	if err := setOOMScore(cmd.Process.Pid, r.oomScore); err != nil {
-		system.KillProcess(cmd.Process.Pid)
-		return err
-	}
-	if _, err := f.WriteString(fmt.Sprintf("%d", cmd.Process.Pid)); err != nil {
-		system.KillProcess(cmd.Process.Pid)
-		return err
-	}
-
-	r.daemonPid = cmd.Process.Pid
-	return nil
-}
-
-// WithRemoteAddr sets the external containerd socket to connect to.
-func WithRemoteAddr(addr string) RemoteOption {
-	return rpcAddr(addr)
-}
-
-type rpcAddr string
-
-func (a rpcAddr) Apply(r Remote) error {
-	if remote, ok := r.(*remote); ok {
-		remote.rpcAddr = string(a)
-		return nil
-	}
-	return fmt.Errorf("WithRemoteAddr option not supported for this remote")
-}
-
-// WithRuntimePath sets the path of the runtime to be used as the
-// default by containerd
-func WithRuntimePath(rt string) RemoteOption {
-	return runtimePath(rt)
-}
-
-type runtimePath string
-
-func (rt runtimePath) Apply(r Remote) error {
-	if remote, ok := r.(*remote); ok {
-		remote.runtime = string(rt)
-		return nil
-	}
-	return fmt.Errorf("WithRuntime option not supported for this remote")
-}
-
-// WithRuntimeArgs sets the list of runtime args passed to containerd
-func WithRuntimeArgs(args []string) RemoteOption {
-	return runtimeArgs(args)
-}
-
-type runtimeArgs []string
-
-func (rt runtimeArgs) Apply(r Remote) error {
-	if remote, ok := r.(*remote); ok {
-		remote.runtimeArgs = rt
-		return nil
-	}
-	return fmt.Errorf("WithRuntimeArgs option not supported for this remote")
-}
-
-// WithStartDaemon defines if libcontainerd should also run containerd daemon.
-func WithStartDaemon(start bool) RemoteOption {
-	return startDaemon(start)
-}
-
-type startDaemon bool
-
-func (s startDaemon) Apply(r Remote) error {
-	if remote, ok := r.(*remote); ok {
-		remote.startDaemon = bool(s)
-		return nil
-	}
-	return fmt.Errorf("WithStartDaemon option not supported for this remote")
-}
-
-// WithDebugLog defines if containerd debug logs will be enabled for daemon.
-func WithDebugLog(debug bool) RemoteOption {
-	return debugLog(debug)
-}
-
-type debugLog bool
-
-func (d debugLog) Apply(r Remote) error {
-	if remote, ok := r.(*remote); ok {
-		remote.debugLog = bool(d)
-		return nil
-	}
-	return fmt.Errorf("WithDebugLog option not supported for this remote")
-}
-
-// WithLiveRestore defines if containers are stopped on shutdown or restored.
-func WithLiveRestore(v bool) RemoteOption {
-	return liveRestore(v)
-}
-
-type liveRestore bool
-
-func (l liveRestore) Apply(r Remote) error {
-	if remote, ok := r.(*remote); ok {
-		remote.liveRestore = bool(l)
-		for _, c := range remote.clients {
-			c.liveRestore = bool(l)
-		}
-		return nil
-	}
-	return fmt.Errorf("WithLiveRestore option not supported for this remote")
-}
-
-// WithOOMScore defines the oom_score_adj to set for the containerd process.
-func WithOOMScore(score int) RemoteOption {
-	return oomScore(score)
-}
-
-type oomScore int
-
-func (o oomScore) Apply(r Remote) error {
-	if remote, ok := r.(*remote); ok {
-		remote.oomScore = int(o)
-		return nil
-	}
-	return fmt.Errorf("WithOOMScore option not supported for this remote")
-}

+ 0 - 36
libcontainerd/remote_windows.go

@@ -1,36 +0,0 @@
-package libcontainerd
-
-import "github.com/docker/docker/pkg/locker"
-
-type remote struct {
-}
-
-func (r *remote) Client(b Backend) (Client, error) {
-	c := &client{
-		clientCommon: clientCommon{
-			backend:    b,
-			containers: make(map[string]*container),
-			locker:     locker.New(),
-		},
-	}
-	return c, nil
-}
-
-// Cleanup is a no-op on Windows. It is here to implement the interface.
-func (r *remote) Cleanup() {
-}
-
-func (r *remote) UpdateOptions(opts ...RemoteOption) error {
-	return nil
-}
-
-// New creates a fresh instance of libcontainerd remote. On Windows,
-// this is not used as there is no remote containerd process.
-func New(_ string, _ ...RemoteOption) (Remote, error) {
-	return &remote{}, nil
-}
-
-// WithLiveRestore is a noop on windows.
-func WithLiveRestore(v bool) RemoteOption {
-	return nil
-}

+ 87 - 39
libcontainerd/types.go

@@ -1,64 +1,110 @@
 package libcontainerd
 
 import (
+	"context"
 	"io"
+	"time"
 
-	containerd "github.com/containerd/containerd/api/grpc/types"
+	"github.com/containerd/containerd"
 	"github.com/opencontainers/runtime-spec/specs-go"
-	"golang.org/x/net/context"
 )
 
-// State constants used in state change reporting.
+// EventType represents a possible event from libcontainerd
+type EventType string
+
+// Event constants used when reporting events
+const (
+	EventUnknown     EventType = "unknown"
+	EventExit        EventType = "exit"
+	EventOOM         EventType = "oom"
+	EventCreate      EventType = "create"
+	EventStart       EventType = "start"
+	EventExecAdded   EventType = "exec-added"
+	EventExecStarted EventType = "exec-started"
+	EventPaused      EventType = "paused"
+	EventResumed     EventType = "resumed"
+)
+
+// Status represents the current status of a container
+type Status string
+
+// Possible container statuses
 const (
-	StateStart       = "start-container"
-	StatePause       = "pause"
-	StateResume      = "resume"
-	StateExit        = "exit"
-	StateRestore     = "restore"
-	StateExitProcess = "exit-process"
-	StateOOM         = "oom" // fake state
+	// Running indicates the process is currently executing
+	StatusRunning Status = "running"
+	// Created indicates the process has been created within containerd but the
+	// user's defined process has not started
+	StatusCreated Status = "created"
+	// Stopped indicates that the process has ran and exited
+	StatusStopped Status = "stopped"
+	// Paused indicates that the process is currently paused
+	StatusPaused Status = "paused"
+	// Pausing indicates that the process is currently switching from a
+	// running state into a paused state
+	StatusPausing Status = "pausing"
+	// Unknown indicates that we could not determine the status from the runtime
+	StatusUnknown Status = "unknown"
 )
 
-// CommonStateInfo contains the state info common to all platforms.
-type CommonStateInfo struct { // FIXME: event?
-	State     string
-	Pid       uint32
-	ExitCode  uint32
-	ProcessID string
+// Remote on Linux defines the accesspoint to the containerd grpc API.
+// Remote on Windows is largely an unimplemented interface as there is
+// no remote containerd.
+type Remote interface {
+	// Client returns a new Client instance connected with given Backend.
+	NewClient(namespace string, backend Backend) (Client, error)
+	// Cleanup stops containerd if it was started by libcontainerd.
+	// Note this is not used on Windows as there is no remote containerd.
+	Cleanup()
+}
+
+// RemoteOption allows to configure parameters of remotes.
+// This is unused on Windows.
+type RemoteOption interface {
+	Apply(Remote) error
+}
+
+// EventInfo contains the event info
+type EventInfo struct {
+	ContainerID string
+	ProcessID   string
+	Pid         uint32
+	ExitCode    uint32
+	ExitedAt    time.Time
+	OOMKilled   bool
+	// Windows Only field
+	UpdatePending bool
 }
 
 // Backend defines callbacks that the client of the library needs to implement.
 type Backend interface {
-	StateChanged(containerID string, state StateInfo) error
+	ProcessEvent(containerID string, event EventType, ei EventInfo) error
 }
 
 // Client provides access to containerd features.
 type Client interface {
-	GetServerVersion(ctx context.Context) (*ServerVersion, error)
-	Create(containerID string, checkpoint string, checkpointDir string, spec specs.Spec, attachStdio StdioCallback, options ...CreateOption) error
-	Signal(containerID string, sig int) error
-	SignalProcess(containerID string, processFriendlyName string, sig int) error
-	AddProcess(ctx context.Context, containerID, processFriendlyName string, process Process, attachStdio StdioCallback) (int, error)
-	Resize(containerID, processFriendlyName string, width, height int) error
-	Pause(containerID string) error
-	Resume(containerID string) error
-	Restore(containerID string, attachStdio StdioCallback, options ...CreateOption) error
-	Stats(containerID string) (*Stats, error)
-	GetPidsForContainer(containerID string) ([]int, error)
-	Summary(containerID string) ([]Summary, error)
-	UpdateResources(containerID string, resources Resources) error
-	CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error
-	DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error
-	ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error)
-}
+	Restore(ctx context.Context, containerID string, attachStdio StdioCallback) (alive bool, pid int, err error)
+
+	Create(ctx context.Context, containerID string, spec *specs.Spec, runtimeOptions interface{}) error
+	Start(ctx context.Context, containerID, checkpointDir string, withStdin bool, attachStdio StdioCallback) (pid int, err error)
+	SignalProcess(ctx context.Context, containerID, processID string, signal int) error
+	Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio StdioCallback) (int, error)
+	ResizeTerminal(ctx context.Context, containerID, processID string, width, height int) error
+	CloseStdin(ctx context.Context, containerID, processID string) error
+	Pause(ctx context.Context, containerID string) error
+	Resume(ctx context.Context, containerID string) error
+	Stats(ctx context.Context, containerID string) (*Stats, error)
+	ListPids(ctx context.Context, containerID string) ([]uint32, error)
+	Summary(ctx context.Context, containerID string) ([]Summary, error)
+	DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error)
+	Delete(ctx context.Context, containerID string) error
+	Status(ctx context.Context, containerID string) (Status, error)
 
-// CreateOption allows to configure parameters of container creation.
-type CreateOption interface {
-	Apply(interface{}) error
+	UpdateResources(ctx context.Context, containerID string, resources *Resources) error
+	CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error
 }
 
 // StdioCallback is called to connect a container or process stdio.
-type StdioCallback func(IOPipe) error
+type StdioCallback func(*IOPipe) (containerd.IO, error)
 
 // IOPipe contains the stdio streams.
 type IOPipe struct {
@@ -66,10 +112,12 @@ type IOPipe struct {
 	Stdout   io.ReadCloser
 	Stderr   io.ReadCloser
 	Terminal bool // Whether stderr is connected on Windows
+
+	cancel context.CancelFunc
+	config containerd.IOConfig
 }
 
 // ServerVersion contains version information as retrieved from the
 // server
 type ServerVersion struct {
-	containerd.GetServerVersionResponse
 }

Деякі файли не було показано, через те що забагато файлів було змінено