浏览代码

Replace execdrivers with containerd implementation

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>
Signed-off-by: Kenfe-Mickael Laventure <mickael.laventure@gmail.com>
Signed-off-by: Anusha Ragunathan <anusha@docker.com>
Tonis Tiigi 9 年之前
父节点
当前提交
9c4570a958
共有 89 个文件被更改,包括 5692 次插入1248 次删除
  1. 18 0
      Dockerfile
  2. 18 0
      Dockerfile.aarch64
  3. 18 0
      Dockerfile.armhf
  4. 18 0
      Dockerfile.gccgo
  5. 18 0
      Dockerfile.ppc64le
  6. 18 0
      Dockerfile.s390x
  7. 18 0
      Dockerfile.simple
  8. 0 11
      api/client/run.go
  9. 2 0
      api/server/router/container/exec.go
  10. 32 29
      container/container.go
  11. 23 38
      container/container_unix.go
  12. 12 11
      container/memory_store.go
  13. 4 340
      container/monitor.go
  14. 12 0
      container/mounts_unix.go
  15. 9 7
      container/state.go
  16. 3 5
      container/state_test.go
  17. 1 3
      container/state_unix.go
  18. 30 0
      daemon/apparmor_default.go
  19. 6 0
      daemon/apparmor_default_unsupported.go
  20. 131 0
      daemon/caps/utils_unix.go
  21. 1 1
      daemon/config.go
  22. 5 2
      daemon/config_unix.go
  23. 3 4
      daemon/container_operations.go
  24. 56 235
      daemon/container_operations_unix.go
  25. 89 75
      daemon/daemon.go
  26. 60 2
      daemon/daemon_linux.go
  27. 65 35
      daemon/daemon_linux_test.go
  28. 163 26
      daemon/daemon_unix.go
  29. 0 3
      daemon/delete.go
  30. 32 107
      daemon/exec.go
  31. 14 63
      daemon/exec/exec.go
  32. 26 0
      daemon/exec_linux.go
  33. 0 21
      daemon/exec_unix.go
  34. 0 1
      daemon/info.go
  35. 5 5
      daemon/inspect_unix.go
  36. 4 0
      daemon/kill.go
  37. 143 0
      daemon/monitor.go
  38. 14 0
      daemon/monitor_linux.go
  39. 652 0
      daemon/oci_linux.go
  40. 2 3
      daemon/pause.go
  41. 8 5
      daemon/resize.go
  42. 1600 0
      daemon/seccomp_default_linux.go
  43. 12 0
      daemon/seccomp_disabled.go
  44. 100 0
      daemon/seccomp_linux.go
  45. 28 27
      daemon/start.go
  46. 2 6
      daemon/stats.go
  47. 5 4
      daemon/stats_collector_unix.go
  48. 0 84
      daemon/stats_linux.go
  49. 0 14
      daemon/stats_windows.go
  50. 2 1
      daemon/top_unix.go
  51. 1 3
      daemon/unpause.go
  52. 1 1
      daemon/update.go
  53. 25 0
      daemon/update_linux.go
  54. 1 2
      daemon/volumes.go
  55. 9 10
      daemon/volumes_unix.go
  56. 3 2
      distribution/xfer/download_test.go
  57. 9 2
      docker/daemon.go
  58. 13 2
      docker/daemon_unix.go
  59. 28 0
      integration-cli/daemon.go
  60. 150 0
      integration-cli/docker_cli_daemon_experimental_test.go
  61. 13 1
      integration-cli/docker_cli_daemon_test.go
  62. 0 52
      integration-cli/docker_cli_exec_test.go
  63. 0 1
      integration-cli/docker_cli_info_test.go
  64. 9 4
      integration-cli/docker_cli_run_test.go
  65. 1 0
      layer/layer.go
  66. 12 0
      layer/layer_store.go
  67. 58 0
      libcontainerd/client.go
  68. 394 0
      libcontainerd/client_linux.go
  69. 83 0
      libcontainerd/client_liverestore_linux.go
  70. 39 0
      libcontainerd/client_shutdownrestore_linux.go
  71. 38 0
      libcontainerd/container.go
  72. 166 0
      libcontainerd/container_linux.go
  73. 31 0
      libcontainerd/pausemonitor_linux.go
  74. 18 0
      libcontainerd/process.go
  75. 107 0
      libcontainerd/process_linux.go
  76. 29 0
      libcontainerd/queue_linux.go
  77. 18 0
      libcontainerd/remote.go
  78. 401 0
      libcontainerd/remote_linux.go
  79. 59 0
      libcontainerd/types.go
  80. 44 0
      libcontainerd/types_linux.go
  81. 41 0
      libcontainerd/utils_linux.go
  82. 214 0
      oci/defaults_linux.go
  83. 6 0
      pkg/system/syscall_unix.go
  84. 24 0
      pkg/system/syscall_windows.go
  85. 118 0
      restartmanager/restartmanager.go
  86. 3 0
      restartmanager/restartmanager_test.go
  87. 2 0
      runconfig/streams.go
  88. 22 0
      utils/process_unix.go
  89. 20 0
      utils/process_windows.go

+ 18 - 0
Dockerfile

@@ -249,6 +249,24 @@ RUN set -x \
 	&& go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \
 	&& go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \
 	&& rm -rf "$GOPATH"
 	&& rm -rf "$GOPATH"
 
 
+# Install runc
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
+	&& git checkout -q "$RUNC_COMMIT" \
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
+
+# Install containerd
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
+	&& git checkout -q "$CONTAINERD_COMMIT" \
+	&& make && make install
+
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 ENTRYPOINT ["hack/dind"]
 ENTRYPOINT ["hack/dind"]
 
 

+ 18 - 0
Dockerfile.aarch64

@@ -186,6 +186,24 @@ RUN set -x \
 	&& go build -v -o /usr/local/bin/tomlv github.com/BurntSushi/toml/cmd/tomlv \
 	&& go build -v -o /usr/local/bin/tomlv github.com/BurntSushi/toml/cmd/tomlv \
 	&& rm -rf "$GOPATH"
 	&& rm -rf "$GOPATH"
 
 
+# Install runc
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
+	&& git checkout -q "$RUNC_COMMIT" \
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
+
+# Install containerd
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
+	&& git checkout -q "$CONTAINERD_COMMIT" \
+	&& make && make install
+
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 ENTRYPOINT ["hack/dind"]
 ENTRYPOINT ["hack/dind"]
 
 

+ 18 - 0
Dockerfile.armhf

@@ -205,6 +205,24 @@ RUN set -x \
 	&& go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \
 	&& go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \
 	&& rm -rf "$GOPATH"
 	&& rm -rf "$GOPATH"
 
 
+# Install runc
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
+	&& git checkout -q "$RUNC_COMMIT" \
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
+
+# Install containerd
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
+	&& git checkout -q "$CONTAINERD_COMMIT" \
+	&& make && make install
+
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 ENTRYPOINT ["hack/dind"]
 ENTRYPOINT ["hack/dind"]
 
 

+ 18 - 0
Dockerfile.gccgo

@@ -73,6 +73,24 @@ VOLUME /var/lib/docker
 WORKDIR /go/src/github.com/docker/docker
 WORKDIR /go/src/github.com/docker/docker
 ENV DOCKER_BUILDTAGS apparmor seccomp selinux
 ENV DOCKER_BUILDTAGS apparmor seccomp selinux
 
 
+# Install runc
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
+	&& git checkout -q "$RUNC_COMMIT" \
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
+
+# Install containerd
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
+	&& git checkout -q "$CONTAINERD_COMMIT" \
+	&& make && make install
+
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 ENTRYPOINT ["hack/dind"]
 ENTRYPOINT ["hack/dind"]
 
 

+ 18 - 0
Dockerfile.ppc64le

@@ -197,6 +197,24 @@ RUN set -x \
 	&& go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \
 	&& go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \
 	&& rm -rf "$GOPATH"
 	&& rm -rf "$GOPATH"
 
 
+# Install runc
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
+	&& git checkout -q "$RUNC_COMMIT" \
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
+
+# Install containerd
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
+	&& git checkout -q "$CONTAINERD_COMMIT" \
+	&& make && make install
+
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 ENTRYPOINT ["hack/dind"]
 ENTRYPOINT ["hack/dind"]
 
 

+ 18 - 0
Dockerfile.s390x

@@ -176,6 +176,24 @@ RUN set -x \
 	&& go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \
 	&& go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \
 	&& rm -rf "$GOPATH"
 	&& rm -rf "$GOPATH"
 
 
+# Install runc
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
+	&& git checkout -q "$RUNC_COMMIT" \
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
+
+# Install containerd
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
+	&& git checkout -q "$CONTAINERD_COMMIT" \
+	&& make && make install
+
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 ENTRYPOINT ["hack/dind"]
 ENTRYPOINT ["hack/dind"]
 
 

+ 18 - 0
Dockerfile.simple

@@ -29,6 +29,24 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 		aufs-tools \
 		aufs-tools \
 	&& rm -rf /var/lib/apt/lists/*
 	&& rm -rf /var/lib/apt/lists/*
 
 
+# Install runc
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
+	&& git checkout -q "$RUNC_COMMIT" \
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
+
+# Install containerd
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
+	&& git checkout -q "$CONTAINERD_COMMIT" \
+	&& make && make install
+
 ENV AUTO_GOPATH 1
 ENV AUTO_GOPATH 1
 WORKDIR /usr/src/docker
 WORKDIR /usr/src/docker
 COPY . /usr/src/docker
 COPY . /usr/src/docker

+ 0 - 11
api/client/run.go

@@ -14,7 +14,6 @@ import (
 	"github.com/docker/docker/opts"
 	"github.com/docker/docker/opts"
 	"github.com/docker/docker/pkg/promise"
 	"github.com/docker/docker/pkg/promise"
 	"github.com/docker/docker/pkg/signal"
 	"github.com/docker/docker/pkg/signal"
-	"github.com/docker/docker/pkg/stringid"
 	runconfigopts "github.com/docker/docker/runconfig/opts"
 	runconfigopts "github.com/docker/docker/runconfig/opts"
 	"github.com/docker/engine-api/types"
 	"github.com/docker/engine-api/types"
 	"github.com/docker/libnetwork/resolvconf/dns"
 	"github.com/docker/libnetwork/resolvconf/dns"
@@ -256,16 +255,6 @@ func (cli *DockerCli) CmdRun(args ...string) error {
 
 
 	// Attached mode
 	// Attached mode
 	if *flAutoRemove {
 	if *flAutoRemove {
-		// Warn user if they detached us
-		js, err := cli.client.ContainerInspect(context.Background(), createResponse.ID)
-		if err != nil {
-			return runStartContainerErr(err)
-		}
-		if js.State.Running == true || js.State.Paused == true {
-			fmt.Fprintf(cli.out, "Detached from %s, awaiting its termination in order to uphold \"--rm\".\n",
-				stringid.TruncateID(createResponse.ID))
-		}
-
 		// Autoremove: wait for the container to finish, retrieve
 		// Autoremove: wait for the container to finish, retrieve
 		// the exit code and remove the container
 		// the exit code and remove the container
 		if status, err = cli.client.ContainerWait(context.Background(), createResponse.ID); err != nil {
 		if status, err = cli.client.ContainerWait(context.Background(), createResponse.ID); err != nil {

+ 2 - 0
api/server/router/container/exec.go

@@ -112,7 +112,9 @@ func (s *containerRouter) postContainerExecStart(ctx context.Context, w http.Res
 		if execStartCheck.Detach {
 		if execStartCheck.Detach {
 			return err
 			return err
 		}
 		}
+		stdout.Write([]byte(err.Error()))
 		logrus.Errorf("Error running exec in container: %v\n", err)
 		logrus.Errorf("Error running exec in container: %v\n", err)
+		return err
 	}
 	}
 	return nil
 	return nil
 }
 }

+ 32 - 29
container/container.go

@@ -17,7 +17,6 @@ import (
 
 
 	"github.com/Sirupsen/logrus"
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/docker/daemon/exec"
 	"github.com/docker/docker/daemon/exec"
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/daemon/logger"
 	"github.com/docker/docker/daemon/logger"
 	"github.com/docker/docker/daemon/logger/jsonfilelog"
 	"github.com/docker/docker/daemon/logger/jsonfilelog"
 	"github.com/docker/docker/daemon/network"
 	"github.com/docker/docker/daemon/network"
@@ -27,6 +26,7 @@ import (
 	"github.com/docker/docker/pkg/promise"
 	"github.com/docker/docker/pkg/promise"
 	"github.com/docker/docker/pkg/signal"
 	"github.com/docker/docker/pkg/signal"
 	"github.com/docker/docker/pkg/symlink"
 	"github.com/docker/docker/pkg/symlink"
+	"github.com/docker/docker/restartmanager"
 	"github.com/docker/docker/runconfig"
 	"github.com/docker/docker/runconfig"
 	runconfigopts "github.com/docker/docker/runconfig/opts"
 	runconfigopts "github.com/docker/docker/runconfig/opts"
 	"github.com/docker/docker/volume"
 	"github.com/docker/docker/volume"
@@ -74,13 +74,12 @@ type CommonContainer struct {
 	HasBeenManuallyStopped bool // used for unless-stopped restart policy
 	HasBeenManuallyStopped bool // used for unless-stopped restart policy
 	MountPoints            map[string]*volume.MountPoint
 	MountPoints            map[string]*volume.MountPoint
 	HostConfig             *containertypes.HostConfig `json:"-"` // do not serialize the host config in the json, otherwise we'll make the container unportable
 	HostConfig             *containertypes.HostConfig `json:"-"` // do not serialize the host config in the json, otherwise we'll make the container unportable
-	Command                *execdriver.Command        `json:"-"`
-	monitor                *containerMonitor
-	ExecCommands           *exec.Store `json:"-"`
+	ExecCommands           *exec.Store                `json:"-"`
 	// logDriver for closing
 	// logDriver for closing
-	LogDriver     logger.Logger  `json:"-"`
-	LogCopier     *logger.Copier `json:"-"`
-	attachContext *attachContext
+	LogDriver      logger.Logger  `json:"-"`
+	LogCopier      *logger.Copier `json:"-"`
+	restartManager restartmanager.RestartManager
+	attachContext  *attachContext
 }
 }
 
 
 // NewBaseContainer creates a new container with its
 // NewBaseContainer creates a new container with its
@@ -276,19 +275,9 @@ func (container *Container) GetRootResourcePath(path string) (string, error) {
 // ExitOnNext signals to the monitor that it should not restart the container
 // ExitOnNext signals to the monitor that it should not restart the container
 // after we send the kill signal.
 // after we send the kill signal.
 func (container *Container) ExitOnNext() {
 func (container *Container) ExitOnNext() {
-	container.monitor.ExitOnNext()
-}
-
-// Resize changes the TTY of the process running inside the container
-// to the given height and width. The container must be running.
-func (container *Container) Resize(h, w int) error {
-	if container.Command.ProcessConfig.Terminal == nil {
-		return fmt.Errorf("Container %s does not have a terminal ready", container.ID)
-	}
-	if err := container.Command.ProcessConfig.Terminal.Resize(h, w); err != nil {
-		return err
+	if container.restartManager != nil {
+		container.restartManager.Cancel()
 	}
 	}
-	return nil
 }
 }
 
 
 // HostConfigPath returns the path to the container's JSON hostconfig
 // HostConfigPath returns the path to the container's JSON hostconfig
@@ -897,19 +886,33 @@ func (container *Container) BuildCreateEndpointOptions(n libnetwork.Network, epC
 
 
 // UpdateMonitor updates monitor configure for running container
 // UpdateMonitor updates monitor configure for running container
 func (container *Container) UpdateMonitor(restartPolicy containertypes.RestartPolicy) {
 func (container *Container) UpdateMonitor(restartPolicy containertypes.RestartPolicy) {
-	monitor := container.monitor
-	// No need to update monitor if container hasn't got one
-	// monitor will be generated correctly according to container
-	if monitor == nil {
-		return
+	type policySetter interface {
+		SetPolicy(containertypes.RestartPolicy)
+	}
+
+	if rm, ok := container.RestartManager(false).(policySetter); ok {
+		rm.SetPolicy(restartPolicy)
+	}
+}
+
+// FullHostname returns hostname and optional domain appended to it.
+func (container *Container) FullHostname() string {
+	fullHostname := container.Config.Hostname
+	if container.Config.Domainname != "" {
+		fullHostname = fmt.Sprintf("%s.%s", fullHostname, container.Config.Domainname)
 	}
 	}
+	return fullHostname
+}
 
 
-	monitor.mux.Lock()
-	// to check whether restart policy has changed.
-	if restartPolicy.Name != "" && !monitor.restartPolicy.IsSame(&restartPolicy) {
-		monitor.restartPolicy = restartPolicy
+// RestartManager returns the current restartmanager instace connected to container.
+func (container *Container) RestartManager(reset bool) restartmanager.RestartManager {
+	if reset {
+		container.RestartCount = 0
+	}
+	if container.restartManager == nil {
+		container.restartManager = restartmanager.New(container.HostConfig.RestartPolicy)
 	}
 	}
-	monitor.mux.Unlock()
+	return container.restartManager
 }
 }
 
 
 type attachContext struct {
 type attachContext struct {

+ 23 - 38
container/container_unix.go

@@ -11,7 +11,6 @@ import (
 	"syscall"
 	"syscall"
 
 
 	"github.com/Sirupsen/logrus"
 	"github.com/Sirupsen/logrus"
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/pkg/chrootarchive"
 	"github.com/docker/docker/pkg/chrootarchive"
 	"github.com/docker/docker/pkg/symlink"
 	"github.com/docker/docker/pkg/symlink"
 	"github.com/docker/docker/pkg/system"
 	"github.com/docker/docker/pkg/system"
@@ -39,6 +38,15 @@ type Container struct {
 	NoNewPrivileges bool
 	NoNewPrivileges bool
 }
 }
 
 
+// ExitStatus provides exit reasons for a container.
+type ExitStatus struct {
+	// The exit code with which the container exited.
+	ExitCode int
+
+	// Whether the container encountered an OOM.
+	OOMKilled bool
+}
+
 // CreateDaemonEnvironment returns the list of all environment variables given the list of
 // CreateDaemonEnvironment returns the list of all environment variables given the list of
 // environment variables related to links.
 // environment variables related to links.
 // Sets PATH, HOSTNAME and if container.Config.Tty is set: TERM.
 // Sets PATH, HOSTNAME and if container.Config.Tty is set: TERM.
@@ -57,7 +65,6 @@ func (container *Container) CreateDaemonEnvironment(linkedEnv []string) []string
 	// we need to replace the 'env' keys where they match and append anything
 	// we need to replace the 'env' keys where they match and append anything
 	// else.
 	// else.
 	env = utils.ReplaceOrAppendEnvValues(env, container.Config.Env)
 	env = utils.ReplaceOrAppendEnvValues(env, container.Config.Env)
-
 	return env
 	return env
 }
 }
 
 
@@ -103,8 +110,8 @@ func appendNetworkMounts(container *Container, volumeMounts []volume.MountPoint)
 }
 }
 
 
 // NetworkMounts returns the list of network mounts.
 // NetworkMounts returns the list of network mounts.
-func (container *Container) NetworkMounts() []execdriver.Mount {
-	var mounts []execdriver.Mount
+func (container *Container) NetworkMounts() []Mount {
+	var mounts []Mount
 	shared := container.HostConfig.NetworkMode.IsContainer()
 	shared := container.HostConfig.NetworkMode.IsContainer()
 	if container.ResolvConfPath != "" {
 	if container.ResolvConfPath != "" {
 		if _, err := os.Stat(container.ResolvConfPath); err != nil {
 		if _, err := os.Stat(container.ResolvConfPath); err != nil {
@@ -115,7 +122,7 @@ func (container *Container) NetworkMounts() []execdriver.Mount {
 			if m, exists := container.MountPoints["/etc/resolv.conf"]; exists {
 			if m, exists := container.MountPoints["/etc/resolv.conf"]; exists {
 				writable = m.RW
 				writable = m.RW
 			}
 			}
-			mounts = append(mounts, execdriver.Mount{
+			mounts = append(mounts, Mount{
 				Source:      container.ResolvConfPath,
 				Source:      container.ResolvConfPath,
 				Destination: "/etc/resolv.conf",
 				Destination: "/etc/resolv.conf",
 				Writable:    writable,
 				Writable:    writable,
@@ -132,7 +139,7 @@ func (container *Container) NetworkMounts() []execdriver.Mount {
 			if m, exists := container.MountPoints["/etc/hostname"]; exists {
 			if m, exists := container.MountPoints["/etc/hostname"]; exists {
 				writable = m.RW
 				writable = m.RW
 			}
 			}
-			mounts = append(mounts, execdriver.Mount{
+			mounts = append(mounts, Mount{
 				Source:      container.HostnamePath,
 				Source:      container.HostnamePath,
 				Destination: "/etc/hostname",
 				Destination: "/etc/hostname",
 				Writable:    writable,
 				Writable:    writable,
@@ -149,7 +156,7 @@ func (container *Container) NetworkMounts() []execdriver.Mount {
 			if m, exists := container.MountPoints["/etc/hosts"]; exists {
 			if m, exists := container.MountPoints["/etc/hosts"]; exists {
 				writable = m.RW
 				writable = m.RW
 			}
 			}
-			mounts = append(mounts, execdriver.Mount{
+			mounts = append(mounts, Mount{
 				Source:      container.HostsPath,
 				Source:      container.HostsPath,
 				Destination: "/etc/hosts",
 				Destination: "/etc/hosts",
 				Writable:    writable,
 				Writable:    writable,
@@ -224,37 +231,26 @@ func (container *Container) UnmountIpcMounts(unmount func(pth string) error) {
 }
 }
 
 
 // IpcMounts returns the list of IPC mounts
 // IpcMounts returns the list of IPC mounts
-func (container *Container) IpcMounts() []execdriver.Mount {
-	var mounts []execdriver.Mount
+func (container *Container) IpcMounts() []Mount {
+	var mounts []Mount
 
 
 	if !container.HasMountFor("/dev/shm") {
 	if !container.HasMountFor("/dev/shm") {
 		label.SetFileLabel(container.ShmPath, container.MountLabel)
 		label.SetFileLabel(container.ShmPath, container.MountLabel)
-		mounts = append(mounts, execdriver.Mount{
+		mounts = append(mounts, Mount{
 			Source:      container.ShmPath,
 			Source:      container.ShmPath,
 			Destination: "/dev/shm",
 			Destination: "/dev/shm",
 			Writable:    true,
 			Writable:    true,
 			Propagation: volume.DefaultPropagationMode,
 			Propagation: volume.DefaultPropagationMode,
 		})
 		})
 	}
 	}
-	return mounts
-}
 
 
-func updateCommand(c *execdriver.Command, resources containertypes.Resources) {
-	c.Resources.BlkioWeight = resources.BlkioWeight
-	c.Resources.CPUShares = resources.CPUShares
-	c.Resources.CPUPeriod = resources.CPUPeriod
-	c.Resources.CPUQuota = resources.CPUQuota
-	c.Resources.CpusetCpus = resources.CpusetCpus
-	c.Resources.CpusetMems = resources.CpusetMems
-	c.Resources.Memory = resources.Memory
-	c.Resources.MemorySwap = resources.MemorySwap
-	c.Resources.MemoryReservation = resources.MemoryReservation
-	c.Resources.KernelMemory = resources.KernelMemory
+	return mounts
 }
 }
 
 
 // UpdateContainer updates configuration of a container.
 // UpdateContainer updates configuration of a container.
 func (container *Container) UpdateContainer(hostConfig *containertypes.HostConfig) error {
 func (container *Container) UpdateContainer(hostConfig *containertypes.HostConfig) error {
 	container.Lock()
 	container.Lock()
+	defer container.Unlock()
 
 
 	// update resources of container
 	// update resources of container
 	resources := hostConfig.Resources
 	resources := hostConfig.Resources
@@ -294,19 +290,8 @@ func (container *Container) UpdateContainer(hostConfig *containertypes.HostConfi
 	if hostConfig.RestartPolicy.Name != "" {
 	if hostConfig.RestartPolicy.Name != "" {
 		container.HostConfig.RestartPolicy = hostConfig.RestartPolicy
 		container.HostConfig.RestartPolicy = hostConfig.RestartPolicy
 	}
 	}
-	container.Unlock()
-
-	// If container is not running, update hostConfig struct is enough,
-	// resources will be updated when the container is started again.
-	// If container is running (including paused), we need to update
-	// the command so we can update configs to the real world.
-	if container.IsRunning() {
-		container.Lock()
-		updateCommand(container.Command, *cResources)
-		container.Unlock()
-	}
 
 
-	if err := container.ToDiskLocking(); err != nil {
+	if err := container.ToDisk(); err != nil {
 		logrus.Errorf("Error saving updated container: %v", err)
 		logrus.Errorf("Error saving updated container: %v", err)
 		return err
 		return err
 	}
 	}
@@ -400,10 +385,10 @@ func copyOwnership(source, destination string) error {
 }
 }
 
 
 // TmpfsMounts returns the list of tmpfs mounts
 // TmpfsMounts returns the list of tmpfs mounts
-func (container *Container) TmpfsMounts() []execdriver.Mount {
-	var mounts []execdriver.Mount
+func (container *Container) TmpfsMounts() []Mount {
+	var mounts []Mount
 	for dest, data := range container.HostConfig.Tmpfs {
 	for dest, data := range container.HostConfig.Tmpfs {
-		mounts = append(mounts, execdriver.Mount{
+		mounts = append(mounts, Mount{
 			Source:      "tmpfs",
 			Source:      "tmpfs",
 			Destination: dest,
 			Destination: dest,
 			Data:        data,
 			Data:        data,

+ 12 - 11
container/memory_store.go

@@ -5,7 +5,7 @@ import "sync"
 // memoryStore implements a Store in memory.
 // memoryStore implements a Store in memory.
 type memoryStore struct {
 type memoryStore struct {
 	s map[string]*Container
 	s map[string]*Container
-	sync.Mutex
+	sync.RWMutex
 }
 }
 
 
 // NewMemoryStore initializes a new memory store.
 // NewMemoryStore initializes a new memory store.
@@ -25,9 +25,9 @@ func (c *memoryStore) Add(id string, cont *Container) {
 
 
 // Get returns a container from the store by id.
 // Get returns a container from the store by id.
 func (c *memoryStore) Get(id string) *Container {
 func (c *memoryStore) Get(id string) *Container {
-	c.Lock()
+	c.RLock()
 	res := c.s[id]
 	res := c.s[id]
-	c.Unlock()
+	c.RUnlock()
 	return res
 	return res
 }
 }
 
 
@@ -42,26 +42,26 @@ func (c *memoryStore) Delete(id string) {
 // The containers are ordered by creation date.
 // The containers are ordered by creation date.
 func (c *memoryStore) List() []*Container {
 func (c *memoryStore) List() []*Container {
 	containers := new(History)
 	containers := new(History)
-	c.Lock()
+	c.RLock()
 	for _, cont := range c.s {
 	for _, cont := range c.s {
 		containers.Add(cont)
 		containers.Add(cont)
 	}
 	}
-	c.Unlock()
+	c.RUnlock()
 	containers.sort()
 	containers.sort()
 	return *containers
 	return *containers
 }
 }
 
 
 // Size returns the number of containers in the store.
 // Size returns the number of containers in the store.
 func (c *memoryStore) Size() int {
 func (c *memoryStore) Size() int {
-	c.Lock()
-	defer c.Unlock()
+	c.RLock()
+	defer c.RUnlock()
 	return len(c.s)
 	return len(c.s)
 }
 }
 
 
 // First returns the first container found in the store by a given filter.
 // First returns the first container found in the store by a given filter.
 func (c *memoryStore) First(filter StoreFilter) *Container {
 func (c *memoryStore) First(filter StoreFilter) *Container {
-	c.Lock()
-	defer c.Unlock()
+	c.RLock()
+	defer c.RUnlock()
 	for _, cont := range c.s {
 	for _, cont := range c.s {
 		if filter(cont) {
 		if filter(cont) {
 			return cont
 			return cont
@@ -72,9 +72,10 @@ func (c *memoryStore) First(filter StoreFilter) *Container {
 
 
 // ApplyAll calls the reducer function with every container in the store.
 // ApplyAll calls the reducer function with every container in the store.
 // This operation is asyncronous in the memory store.
 // This operation is asyncronous in the memory store.
+// NOTE: Modifications to the store MUST NOT be done by the StoreReducer.
 func (c *memoryStore) ApplyAll(apply StoreReducer) {
 func (c *memoryStore) ApplyAll(apply StoreReducer) {
-	c.Lock()
-	defer c.Unlock()
+	c.RLock()
+	defer c.RUnlock()
 
 
 	wg := new(sync.WaitGroup)
 	wg := new(sync.WaitGroup)
 	for _, cont := range c.s {
 	for _, cont := range c.s {

+ 4 - 340
container/monitor.go

@@ -1,24 +1,13 @@
 package container
 package container
 
 
 import (
 import (
-	"fmt"
-	"io"
-	"os/exec"
-	"strings"
-	"sync"
-	"syscall"
 	"time"
 	"time"
 
 
 	"github.com/Sirupsen/logrus"
 	"github.com/Sirupsen/logrus"
-	"github.com/docker/docker/daemon/execdriver"
-	"github.com/docker/docker/pkg/promise"
-	"github.com/docker/docker/pkg/stringid"
-	"github.com/docker/engine-api/types/container"
 )
 )
 
 
 const (
 const (
-	defaultTimeIncrement = 100
-	loggerCloseTimeout   = 10 * time.Second
+	loggerCloseTimeout = 10 * time.Second
 )
 )
 
 
 // supervisor defines the interface that a supervisor must implement
 // supervisor defines the interface that a supervisor must implement
@@ -30,311 +19,13 @@ type supervisor interface {
 	// StartLogging starts the logging driver for the container
 	// StartLogging starts the logging driver for the container
 	StartLogging(*Container) error
 	StartLogging(*Container) error
 	// Run starts a container
 	// Run starts a container
-	Run(c *Container, pipes *execdriver.Pipes, startCallback execdriver.DriverCallback) (execdriver.ExitStatus, error)
+	Run(c *Container) error
 	// IsShuttingDown tells whether the supervisor is shutting down or not
 	// IsShuttingDown tells whether the supervisor is shutting down or not
 	IsShuttingDown() bool
 	IsShuttingDown() bool
 }
 }
 
 
-// containerMonitor monitors the execution of a container's main process.
-// If a restart policy is specified for the container the monitor will ensure that the
-// process is restarted based on the rules of the policy.  When the container is finally stopped
-// the monitor will reset and cleanup any of the container resources such as networking allocations
-// and the rootfs
-type containerMonitor struct {
-	mux sync.Mutex
-
-	// supervisor keeps track of the container and the events it generates
-	supervisor supervisor
-
-	// container is the container being monitored
-	container *Container
-
-	// restartPolicy is the current policy being applied to the container monitor
-	restartPolicy container.RestartPolicy
-
-	// failureCount is the number of times the container has failed to
-	// start in a row
-	failureCount int
-
-	// shouldStop signals the monitor that the next time the container exits it is
-	// either because docker or the user asked for the container to be stopped
-	shouldStop bool
-
-	// startSignal is a channel that is closes after the container initially starts
-	startSignal chan struct{}
-
-	// stopChan is used to signal to the monitor whenever there is a wait for the
-	// next restart so that the timeIncrement is not honored and the user is not
-	// left waiting for nothing to happen during this time
-	stopChan chan struct{}
-
-	// timeIncrement is the amount of time to wait between restarts
-	// this is in milliseconds
-	timeIncrement int
-
-	// lastStartTime is the time which the monitor last exec'd the container's process
-	lastStartTime time.Time
-}
-
-// StartMonitor initializes a containerMonitor for this container with the provided supervisor and restart policy
-// and starts the container's process.
-func (container *Container) StartMonitor(s supervisor) error {
-	container.monitor = &containerMonitor{
-		supervisor:    s,
-		container:     container,
-		restartPolicy: container.HostConfig.RestartPolicy,
-		timeIncrement: defaultTimeIncrement,
-		stopChan:      make(chan struct{}),
-		startSignal:   make(chan struct{}),
-	}
-
-	return container.monitor.wait()
-}
-
-// wait starts the container and wait until
-// we either receive an error from the initial start of the container's
-// process or until the process is running in the container
-func (m *containerMonitor) wait() error {
-	select {
-	case <-m.startSignal:
-	case err := <-promise.Go(m.start):
-		return err
-	}
-
-	return nil
-}
-
-// Stop signals to the container monitor that it should stop monitoring the container
-// for exits the next time the process dies
-func (m *containerMonitor) ExitOnNext() {
-	m.mux.Lock()
-
-	// we need to protect having a double close of the channel when stop is called
-	// twice or else we will get a panic
-	if !m.shouldStop {
-		m.shouldStop = true
-		close(m.stopChan)
-	}
-
-	m.mux.Unlock()
-}
-
-// Close closes the container's resources such as networking allocations and
-// unmounts the container's root filesystem
-func (m *containerMonitor) Close() error {
-	// Cleanup networking and mounts
-	m.supervisor.Cleanup(m.container)
-
-	if err := m.container.ToDisk(); err != nil {
-		logrus.Errorf("Error dumping container %s state to disk: %s", m.container.ID, err)
-
-		return err
-	}
-
-	return nil
-}
-
-// Start starts the containers process and monitors it according to the restart policy
-func (m *containerMonitor) start() error {
-	var (
-		err        error
-		exitStatus execdriver.ExitStatus
-		// this variable indicates where we in execution flow:
-		// before Run or after
-		afterRun bool
-	)
-
-	// ensure that when the monitor finally exits we release the networking and unmount the rootfs
-	defer func() {
-		if afterRun {
-			m.container.Lock()
-			defer m.container.Unlock()
-			m.container.SetStopped(&exitStatus)
-		}
-		m.Close()
-	}()
-	// reset stopped flag
-	if m.container.HasBeenManuallyStopped {
-		m.container.HasBeenManuallyStopped = false
-	}
-
-	// reset the restart count
-	m.container.RestartCount = -1
-
-	for {
-		m.container.RestartCount++
-
-		if err := m.supervisor.StartLogging(m.container); err != nil {
-			m.resetContainer(false)
-
-			return err
-		}
-
-		pipes := execdriver.NewPipes(m.container.Stdin(), m.container.Stdout(), m.container.Stderr(), m.container.Config.OpenStdin)
-
-		m.logEvent("start")
-
-		m.lastStartTime = time.Now()
-
-		if exitStatus, err = m.supervisor.Run(m.container, pipes, m.callback); err != nil {
-			// if we receive an internal error from the initial start of a container then lets
-			// return it instead of entering the restart loop
-			// set to 127 for container cmd not found/does not exist)
-			if strings.Contains(err.Error(), "executable file not found") ||
-				strings.Contains(err.Error(), "no such file or directory") ||
-				strings.Contains(err.Error(), "system cannot find the file specified") {
-				if m.container.RestartCount == 0 {
-					m.container.ExitCode = 127
-					m.resetContainer(false)
-					return fmt.Errorf("Container command not found or does not exist.")
-				}
-			}
-			// set to 126 for container cmd can't be invoked errors
-			if strings.Contains(err.Error(), syscall.EACCES.Error()) {
-				if m.container.RestartCount == 0 {
-					m.container.ExitCode = 126
-					m.resetContainer(false)
-					return fmt.Errorf("Container command could not be invoked.")
-				}
-			}
-
-			if m.container.RestartCount == 0 {
-				m.container.ExitCode = -1
-				m.resetContainer(false)
-
-				return fmt.Errorf("Cannot start container %s: %v", m.container.ID, err)
-			}
-
-			logrus.Errorf("Error running container: %s", err)
-		}
-
-		// here container.Lock is already lost
-		afterRun = true
-
-		m.resetMonitor(err == nil && exitStatus.ExitCode == 0)
-
-		if m.shouldRestart(exitStatus.ExitCode) {
-			m.container.SetRestartingLocking(&exitStatus)
-			m.logEvent("die")
-			m.resetContainer(true)
-
-			// sleep with a small time increment between each restart to help avoid issues cased by quickly
-			// restarting the container because of some types of errors ( networking cut out, etc... )
-			m.waitForNextRestart()
-
-			// we need to check this before reentering the loop because the waitForNextRestart could have
-			// been terminated by a request from a user
-			if m.shouldStop {
-				return err
-			}
-			continue
-		}
-
-		m.logEvent("die")
-		m.resetContainer(true)
-		return err
-	}
-}
-
-// resetMonitor resets the stateful fields on the containerMonitor based on the
-// previous runs success or failure.  Regardless of success, if the container had
-// an execution time of more than 10s then reset the timer back to the default
-func (m *containerMonitor) resetMonitor(successful bool) {
-	executionTime := time.Now().Sub(m.lastStartTime).Seconds()
-
-	if executionTime > 10 {
-		m.timeIncrement = defaultTimeIncrement
-	} else {
-		// otherwise we need to increment the amount of time we wait before restarting
-		// the process.  We will build up by multiplying the increment by 2
-		m.timeIncrement *= 2
-	}
-
-	// the container exited successfully so we need to reset the failure counter
-	if successful {
-		m.failureCount = 0
-	} else {
-		m.failureCount++
-	}
-}
-
-// waitForNextRestart waits with the default time increment to restart the container unless
-// a user or docker asks for the container to be stopped
-func (m *containerMonitor) waitForNextRestart() {
-	select {
-	case <-time.After(time.Duration(m.timeIncrement) * time.Millisecond):
-	case <-m.stopChan:
-	}
-}
-
-// shouldRestart checks the restart policy and applies the rules to determine if
-// the container's process should be restarted
-func (m *containerMonitor) shouldRestart(exitCode int) bool {
-	m.mux.Lock()
-	defer m.mux.Unlock()
-
-	// do not restart if the user or docker has requested that this container be stopped
-	if m.shouldStop {
-		m.container.HasBeenManuallyStopped = !m.supervisor.IsShuttingDown()
-		return false
-	}
-
-	switch {
-	case m.restartPolicy.IsAlways(), m.restartPolicy.IsUnlessStopped():
-		return true
-	case m.restartPolicy.IsOnFailure():
-		// the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count
-		if max := m.restartPolicy.MaximumRetryCount; max != 0 && m.failureCount > max {
-			logrus.Debugf("stopping restart of container %s because maximum failure could of %d has been reached",
-				stringid.TruncateID(m.container.ID), max)
-			return false
-		}
-
-		return exitCode != 0
-	}
-
-	return false
-}
-
-// callback ensures that the container's state is properly updated after we
-// received ack from the execution drivers
-func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error {
-	go func() {
-		for range chOOM {
-			m.logEvent("oom")
-		}
-	}()
-
-	if processConfig.Tty {
-		// The callback is called after the process start()
-		// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlave
-		// which we close here.
-		if c, ok := processConfig.Stdout.(io.Closer); ok {
-			c.Close()
-		}
-	}
-
-	m.container.SetRunning(pid)
-
-	// signal that the process has started
-	// close channel only if not closed
-	select {
-	case <-m.startSignal:
-	default:
-		close(m.startSignal)
-	}
-
-	if err := m.container.ToDiskLocking(); err != nil {
-		logrus.Errorf("Error saving container to disk: %v", err)
-	}
-	return nil
-}
-
-// resetContainer resets the container's IO and ensures that the command is able to be executed again
-// by copying the data into a new struct
-// if lock is true, then container locked during reset
-func (m *containerMonitor) resetContainer(lock bool) {
-	container := m.container
+// Reset puts a container into a state where it can be restarted again.
+func (container *Container) Reset(lock bool) {
 	if lock {
 	if lock {
 		container.Lock()
 		container.Lock()
 		defer container.Unlock()
 		defer container.Unlock()
@@ -344,12 +35,6 @@ func (m *containerMonitor) resetContainer(lock bool) {
 		logrus.Errorf("%s: %s", container.ID, err)
 		logrus.Errorf("%s: %s", container.ID, err)
 	}
 	}
 
 
-	if container.Command != nil && container.Command.ProcessConfig.Terminal != nil {
-		if err := container.Command.ProcessConfig.Terminal.Close(); err != nil {
-			logrus.Errorf("%s: Error closing terminal: %s", container.ID, err)
-		}
-	}
-
 	// Re-create a brand new stdin pipe once the container exited
 	// Re-create a brand new stdin pipe once the container exited
 	if container.Config.OpenStdin {
 	if container.Config.OpenStdin {
 		container.NewInputPipes()
 		container.NewInputPipes()
@@ -365,9 +50,6 @@ func (m *containerMonitor) resetContainer(lock bool) {
 			select {
 			select {
 			case <-time.After(loggerCloseTimeout):
 			case <-time.After(loggerCloseTimeout):
 				logrus.Warnf("Logger didn't exit in time: logs may be truncated")
 				logrus.Warnf("Logger didn't exit in time: logs may be truncated")
-				container.LogCopier.Close()
-				// always waits for the LogCopier to finished before closing
-				<-exit
 			case <-exit:
 			case <-exit:
 			}
 			}
 		}
 		}
@@ -375,22 +57,4 @@ func (m *containerMonitor) resetContainer(lock bool) {
 		container.LogCopier = nil
 		container.LogCopier = nil
 		container.LogDriver = nil
 		container.LogDriver = nil
 	}
 	}
-
-	c := container.Command.ProcessConfig.Cmd
-
-	container.Command.ProcessConfig.Cmd = exec.Cmd{
-		Stdin:       c.Stdin,
-		Stdout:      c.Stdout,
-		Stderr:      c.Stderr,
-		Path:        c.Path,
-		Env:         c.Env,
-		ExtraFiles:  c.ExtraFiles,
-		Args:        c.Args,
-		Dir:         c.Dir,
-		SysProcAttr: c.SysProcAttr,
-	}
-}
-
-func (m *containerMonitor) logEvent(action string) {
-	m.supervisor.LogContainerEvent(m.container, action)
 }
 }

+ 12 - 0
container/mounts_unix.go

@@ -0,0 +1,12 @@
+// +build !windows
+
+package container
+
+// Mount contains information for a mount operation.
+type Mount struct {
+	Source      string `json:"source"`
+	Destination string `json:"destination"`
+	Writable    bool   `json:"writable"`
+	Data        string `json:"data"`
+	Propagation string `json:"mountpropagation"`
+}

+ 9 - 7
container/state.go

@@ -5,7 +5,6 @@ import (
 	"sync"
 	"sync"
 	"time"
 	"time"
 
 
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/go-units"
 	"github.com/docker/go-units"
 )
 )
 
 
@@ -179,28 +178,31 @@ func (s *State) getExitCode() int {
 }
 }
 
 
 // SetRunning sets the state of the container to "running".
 // SetRunning sets the state of the container to "running".
-func (s *State) SetRunning(pid int) {
+func (s *State) SetRunning(pid int, initial bool) {
 	s.Error = ""
 	s.Error = ""
 	s.Running = true
 	s.Running = true
 	s.Paused = false
 	s.Paused = false
 	s.Restarting = false
 	s.Restarting = false
 	s.ExitCode = 0
 	s.ExitCode = 0
 	s.Pid = pid
 	s.Pid = pid
-	s.StartedAt = time.Now().UTC()
+	if initial {
+		s.StartedAt = time.Now().UTC()
+	}
 	close(s.waitChan) // fire waiters for start
 	close(s.waitChan) // fire waiters for start
 	s.waitChan = make(chan struct{})
 	s.waitChan = make(chan struct{})
 }
 }
 
 
 // SetStoppedLocking locks the container state is sets it to "stopped".
 // SetStoppedLocking locks the container state is sets it to "stopped".
-func (s *State) SetStoppedLocking(exitStatus *execdriver.ExitStatus) {
+func (s *State) SetStoppedLocking(exitStatus *ExitStatus) {
 	s.Lock()
 	s.Lock()
 	s.SetStopped(exitStatus)
 	s.SetStopped(exitStatus)
 	s.Unlock()
 	s.Unlock()
 }
 }
 
 
 // SetStopped sets the container state to "stopped" without locking.
 // SetStopped sets the container state to "stopped" without locking.
-func (s *State) SetStopped(exitStatus *execdriver.ExitStatus) {
+func (s *State) SetStopped(exitStatus *ExitStatus) {
 	s.Running = false
 	s.Running = false
+	s.Paused = false
 	s.Restarting = false
 	s.Restarting = false
 	s.Pid = 0
 	s.Pid = 0
 	s.FinishedAt = time.Now().UTC()
 	s.FinishedAt = time.Now().UTC()
@@ -211,7 +213,7 @@ func (s *State) SetStopped(exitStatus *execdriver.ExitStatus) {
 
 
 // SetRestartingLocking is when docker handles the auto restart of containers when they are
 // SetRestartingLocking is when docker handles the auto restart of containers when they are
 // in the middle of a stop and being restarted again
 // in the middle of a stop and being restarted again
-func (s *State) SetRestartingLocking(exitStatus *execdriver.ExitStatus) {
+func (s *State) SetRestartingLocking(exitStatus *ExitStatus) {
 	s.Lock()
 	s.Lock()
 	s.SetRestarting(exitStatus)
 	s.SetRestarting(exitStatus)
 	s.Unlock()
 	s.Unlock()
@@ -219,7 +221,7 @@ func (s *State) SetRestartingLocking(exitStatus *execdriver.ExitStatus) {
 
 
 // SetRestarting sets the container state to "restarting".
 // SetRestarting sets the container state to "restarting".
 // It also sets the container PID to 0.
 // It also sets the container PID to 0.
-func (s *State) SetRestarting(exitStatus *execdriver.ExitStatus) {
+func (s *State) SetRestarting(exitStatus *ExitStatus) {
 	// we should consider the container running when it is restarting because of
 	// we should consider the container running when it is restarting because of
 	// all the checks in docker around rm/stop/etc
 	// all the checks in docker around rm/stop/etc
 	s.Running = true
 	s.Running = true

+ 3 - 5
container/state_test.go

@@ -4,8 +4,6 @@ import (
 	"sync/atomic"
 	"sync/atomic"
 	"testing"
 	"testing"
 	"time"
 	"time"
-
-	"github.com/docker/docker/daemon/execdriver"
 )
 )
 
 
 func TestStateRunStop(t *testing.T) {
 func TestStateRunStop(t *testing.T) {
@@ -19,7 +17,7 @@ func TestStateRunStop(t *testing.T) {
 			close(started)
 			close(started)
 		}()
 		}()
 		s.Lock()
 		s.Lock()
-		s.SetRunning(i + 100)
+		s.SetRunning(i+100, false)
 		s.Unlock()
 		s.Unlock()
 
 
 		if !s.IsRunning() {
 		if !s.IsRunning() {
@@ -52,7 +50,7 @@ func TestStateRunStop(t *testing.T) {
 			atomic.StoreInt64(&exit, int64(exitCode))
 			atomic.StoreInt64(&exit, int64(exitCode))
 			close(stopped)
 			close(stopped)
 		}()
 		}()
-		s.SetStoppedLocking(&execdriver.ExitStatus{ExitCode: i})
+		s.SetStoppedLocking(&ExitStatus{ExitCode: i})
 		if s.IsRunning() {
 		if s.IsRunning() {
 			t.Fatal("State is running")
 			t.Fatal("State is running")
 		}
 		}
@@ -93,7 +91,7 @@ func TestStateTimeoutWait(t *testing.T) {
 	}
 	}
 
 
 	s.Lock()
 	s.Lock()
-	s.SetRunning(49)
+	s.SetRunning(49, false)
 	s.Unlock()
 	s.Unlock()
 
 
 	stopped := make(chan struct{})
 	stopped := make(chan struct{})

+ 1 - 3
container/state_unix.go

@@ -2,11 +2,9 @@
 
 
 package container
 package container
 
 
-import "github.com/docker/docker/daemon/execdriver"
-
 // setFromExitStatus is a platform specific helper function to set the state
 // setFromExitStatus is a platform specific helper function to set the state
 // based on the ExitStatus structure.
 // based on the ExitStatus structure.
-func (s *State) setFromExitStatus(exitStatus *execdriver.ExitStatus) {
+func (s *State) setFromExitStatus(exitStatus *ExitStatus) {
 	s.ExitCode = exitStatus.ExitCode
 	s.ExitCode = exitStatus.ExitCode
 	s.OOMKilled = exitStatus.OOMKilled
 	s.OOMKilled = exitStatus.OOMKilled
 }
 }

+ 30 - 0
daemon/apparmor_default.go

@@ -0,0 +1,30 @@
+// +build linux
+
+package daemon
+
+import (
+	"github.com/Sirupsen/logrus"
+	aaprofile "github.com/docker/docker/profiles/apparmor"
+	"github.com/opencontainers/runc/libcontainer/apparmor"
+)
+
+// Define constants for native driver
+const (
+	defaultApparmorProfile = "docker-default"
+)
+
+func installDefaultAppArmorProfile() {
+	if apparmor.IsEnabled() {
+		if err := aaprofile.InstallDefault(defaultApparmorProfile); err != nil {
+			apparmorProfiles := []string{defaultApparmorProfile}
+
+			// Allow daemon to run if loading failed, but are active
+			// (possibly through another run, manually, or via system startup)
+			for _, policy := range apparmorProfiles {
+				if err := aaprofile.IsLoaded(policy); err != nil {
+					logrus.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy)
+				}
+			}
+		}
+	}
+}

+ 6 - 0
daemon/apparmor_default_unsupported.go

@@ -0,0 +1,6 @@
+// +build !linux
+
+package daemon
+
+func installDefaultAppArmorProfile() {
+}

+ 131 - 0
daemon/caps/utils_unix.go

@@ -0,0 +1,131 @@
+// +build !windows
+
+package caps
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/docker/docker/pkg/stringutils"
+	"github.com/syndtr/gocapability/capability"
+)
+
+var capabilityList Capabilities
+
+func init() {
+	last := capability.CAP_LAST_CAP
+	// hack for RHEL6 which has no /proc/sys/kernel/cap_last_cap
+	if last == capability.Cap(63) {
+		last = capability.CAP_BLOCK_SUSPEND
+	}
+	for _, cap := range capability.List() {
+		if cap > last {
+			continue
+		}
+		capabilityList = append(capabilityList,
+			&CapabilityMapping{
+				Key:   "CAP_" + strings.ToUpper(cap.String()),
+				Value: cap,
+			},
+		)
+	}
+}
+
+type (
+	// CapabilityMapping maps linux capability name to its value of capability.Cap type
+	// Capabilities is one of the security systems in Linux Security Module (LSM)
+	// framework provided by the kernel.
+	// For more details on capabilities, see http://man7.org/linux/man-pages/man7/capabilities.7.html
+	CapabilityMapping struct {
+		Key   string         `json:"key,omitempty"`
+		Value capability.Cap `json:"value,omitempty"`
+	}
+	// Capabilities contains all CapabilityMapping
+	Capabilities []*CapabilityMapping
+)
+
+// String returns <key> of CapabilityMapping
+func (c *CapabilityMapping) String() string {
+	return c.Key
+}
+
+// GetCapability returns CapabilityMapping which contains specific key
+func GetCapability(key string) *CapabilityMapping {
+	for _, capp := range capabilityList {
+		if capp.Key == key {
+			cpy := *capp
+			return &cpy
+		}
+	}
+	return nil
+}
+
+// GetAllCapabilities returns all of the capabilities
+func GetAllCapabilities() []string {
+	output := make([]string, len(capabilityList))
+	for i, capability := range capabilityList {
+		output[i] = capability.String()
+	}
+	return output
+}
+
+// TweakCapabilities can tweak capabilities by adding or dropping capabilities
+// based on the basics capabilities.
+func TweakCapabilities(basics, adds, drops []string) ([]string, error) {
+	var (
+		newCaps []string
+		allCaps = GetAllCapabilities()
+	)
+
+	// FIXME(tonistiigi): docker format is without CAP_ prefix, oci is with prefix
+	// Currently they are mixed in here. We should do conversion in one place.
+
+	// look for invalid cap in the drop list
+	for _, cap := range drops {
+		if strings.ToLower(cap) == "all" {
+			continue
+		}
+
+		if !stringutils.InSlice(allCaps, "CAP_"+cap) {
+			return nil, fmt.Errorf("Unknown capability drop: %q", cap)
+		}
+	}
+
+	// handle --cap-add=all
+	if stringutils.InSlice(adds, "all") {
+		basics = allCaps
+	}
+
+	if !stringutils.InSlice(drops, "all") {
+		for _, cap := range basics {
+			// skip `all` already handled above
+			if strings.ToLower(cap) == "all" {
+				continue
+			}
+
+			// if we don't drop `all`, add back all the non-dropped caps
+			if !stringutils.InSlice(drops, cap[4:]) {
+				newCaps = append(newCaps, strings.ToUpper(cap))
+			}
+		}
+	}
+
+	for _, cap := range adds {
+		// skip `all` already handled above
+		if strings.ToLower(cap) == "all" {
+			continue
+		}
+
+		cap = "CAP_" + cap
+
+		if !stringutils.InSlice(allCaps, cap) {
+			return nil, fmt.Errorf("Unknown capability to add: %q", cap)
+		}
+
+		// add cap if not already in the list
+		if !stringutils.InSlice(newCaps, cap) {
+			newCaps = append(newCaps, strings.ToUpper(cap))
+		}
+	}
+	return newCaps, nil
+}

+ 1 - 1
daemon/config.go

@@ -115,7 +115,7 @@ func (config *Config) InstallCommonFlags(cmd *flag.FlagSet, usageFn func(string)
 	cmd.Var(opts.NewNamedListOptsRef("exec-opts", &config.ExecOptions, nil), []string{"-exec-opt"}, usageFn("Set exec driver options"))
 	cmd.Var(opts.NewNamedListOptsRef("exec-opts", &config.ExecOptions, nil), []string{"-exec-opt"}, usageFn("Set exec driver options"))
 	cmd.StringVar(&config.Pidfile, []string{"p", "-pidfile"}, defaultPidFile, usageFn("Path to use for daemon PID file"))
 	cmd.StringVar(&config.Pidfile, []string{"p", "-pidfile"}, defaultPidFile, usageFn("Path to use for daemon PID file"))
 	cmd.StringVar(&config.Root, []string{"g", "-graph"}, defaultGraph, usageFn("Root of the Docker runtime"))
 	cmd.StringVar(&config.Root, []string{"g", "-graph"}, defaultGraph, usageFn("Root of the Docker runtime"))
-	cmd.StringVar(&config.ExecRoot, []string{"-exec-root"}, "/var/run/docker", usageFn("Root of the Docker execdriver"))
+	cmd.StringVar(&config.ExecRoot, []string{"-exec-root"}, defaultExecRoot, usageFn("Root of the Docker execdriver"))
 	cmd.BoolVar(&config.AutoRestart, []string{"#r", "#-restart"}, true, usageFn("--restart on the daemon has been deprecated in favor of --restart policies on docker run"))
 	cmd.BoolVar(&config.AutoRestart, []string{"#r", "#-restart"}, true, usageFn("--restart on the daemon has been deprecated in favor of --restart policies on docker run"))
 	cmd.StringVar(&config.GraphDriver, []string{"s", "-storage-driver"}, "", usageFn("Storage driver to use"))
 	cmd.StringVar(&config.GraphDriver, []string{"s", "-storage-driver"}, "", usageFn("Storage driver to use"))
 	cmd.IntVar(&config.Mtu, []string{"#mtu", "-mtu"}, 0, usageFn("Set the containers network MTU"))
 	cmd.IntVar(&config.Mtu, []string{"#mtu", "-mtu"}, 0, usageFn("Set the containers network MTU"))

+ 5 - 2
daemon/config_unix.go

@@ -12,8 +12,9 @@ import (
 )
 )
 
 
 var (
 var (
-	defaultPidFile = "/var/run/docker.pid"
-	defaultGraph   = "/var/lib/docker"
+	defaultPidFile  = "/var/run/docker.pid"
+	defaultGraph    = "/var/lib/docker"
+	defaultExecRoot = "/var/run/docker"
 )
 )
 
 
 // Config defines the configuration of a docker daemon.
 // Config defines the configuration of a docker daemon.
@@ -30,6 +31,7 @@ type Config struct {
 	RemappedRoot         string                   `json:"userns-remap,omitempty"`
 	RemappedRoot         string                   `json:"userns-remap,omitempty"`
 	CgroupParent         string                   `json:"cgroup-parent,omitempty"`
 	CgroupParent         string                   `json:"cgroup-parent,omitempty"`
 	Ulimits              map[string]*units.Ulimit `json:"default-ulimits,omitempty"`
 	Ulimits              map[string]*units.Ulimit `json:"default-ulimits,omitempty"`
+	ContainerdAddr       string                   `json:"containerd,omitempty"`
 }
 }
 
 
 // bridgeConfig stores all the bridge driver specific
 // bridgeConfig stores all the bridge driver specific
@@ -80,6 +82,7 @@ func (config *Config) InstallFlags(cmd *flag.FlagSet, usageFn func(string) strin
 	cmd.StringVar(&config.CorsHeaders, []string{"-api-cors-header"}, "", usageFn("Set CORS headers in the remote API"))
 	cmd.StringVar(&config.CorsHeaders, []string{"-api-cors-header"}, "", usageFn("Set CORS headers in the remote API"))
 	cmd.StringVar(&config.CgroupParent, []string{"-cgroup-parent"}, "", usageFn("Set parent cgroup for all containers"))
 	cmd.StringVar(&config.CgroupParent, []string{"-cgroup-parent"}, "", usageFn("Set parent cgroup for all containers"))
 	cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces"))
 	cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces"))
+	cmd.StringVar(&config.ContainerdAddr, []string{"-containerd"}, "", usageFn("Path to containerD socket"))
 
 
 	config.attachExperimentalFlags(cmd, usageFn)
 	config.attachExperimentalFlags(cmd, usageFn)
 }
 }

+ 3 - 4
daemon/container_operations.go

@@ -48,11 +48,10 @@ func (daemon *Daemon) buildSandboxOptions(container *container.Container, n libn
 		sboxOptions = append(sboxOptions, libnetwork.OptionUseDefaultSandbox())
 		sboxOptions = append(sboxOptions, libnetwork.OptionUseDefaultSandbox())
 		sboxOptions = append(sboxOptions, libnetwork.OptionOriginHostsPath("/etc/hosts"))
 		sboxOptions = append(sboxOptions, libnetwork.OptionOriginHostsPath("/etc/hosts"))
 		sboxOptions = append(sboxOptions, libnetwork.OptionOriginResolvConfPath("/etc/resolv.conf"))
 		sboxOptions = append(sboxOptions, libnetwork.OptionOriginResolvConfPath("/etc/resolv.conf"))
-	} else if daemon.execDriver.SupportsHooks() {
-		// OptionUseExternalKey is mandatory for userns support.
-		// But optional for non-userns support
-		sboxOptions = append(sboxOptions, libnetwork.OptionUseExternalKey())
 	}
 	}
+	// OptionUseExternalKey is mandatory for userns support.
+	// But optional for non-userns support
+	sboxOptions = append(sboxOptions, libnetwork.OptionUseExternalKey())
 
 
 	container.HostsPath, err = container.GetRootResourcePath("hosts")
 	container.HostsPath, err = container.GetRootResourcePath("hosts")
 	if err != nil {
 	if err != nil {

+ 56 - 235
daemon/container_operations_unix.go

@@ -13,7 +13,6 @@ import (
 
 
 	"github.com/Sirupsen/logrus"
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/container"
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/daemon/links"
 	"github.com/docker/docker/daemon/links"
 	"github.com/docker/docker/pkg/fileutils"
 	"github.com/docker/docker/pkg/fileutils"
 	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/idtools"
@@ -22,13 +21,16 @@ import (
 	"github.com/docker/docker/runconfig"
 	"github.com/docker/docker/runconfig"
 	containertypes "github.com/docker/engine-api/types/container"
 	containertypes "github.com/docker/engine-api/types/container"
 	networktypes "github.com/docker/engine-api/types/network"
 	networktypes "github.com/docker/engine-api/types/network"
-	"github.com/docker/go-units"
 	"github.com/docker/libnetwork"
 	"github.com/docker/libnetwork"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/devices"
 	"github.com/opencontainers/runc/libcontainer/devices"
 	"github.com/opencontainers/runc/libcontainer/label"
 	"github.com/opencontainers/runc/libcontainer/label"
+	"github.com/opencontainers/specs/specs-go"
 )
 )
 
 
+func u32Ptr(i int64) *uint32     { u := uint32(i); return &u }
+func fmPtr(i int64) *os.FileMode { fm := os.FileMode(i); return &fm }
+
 func (daemon *Daemon) setupLinkedContainers(container *container.Container) ([]string, error) {
 func (daemon *Daemon) setupLinkedContainers(container *container.Container) ([]string, error) {
 	var env []string
 	var env []string
 	children := daemon.children(container)
 	children := daemon.children(container)
@@ -64,220 +66,6 @@ func (daemon *Daemon) setupLinkedContainers(container *container.Container) ([]s
 	return env, nil
 	return env, nil
 }
 }
 
 
-func (daemon *Daemon) populateCommand(c *container.Container, env []string) error {
-	var en *execdriver.Network
-	if !c.Config.NetworkDisabled {
-		en = &execdriver.Network{}
-		if !daemon.execDriver.SupportsHooks() || c.HostConfig.NetworkMode.IsHost() {
-			en.NamespacePath = c.NetworkSettings.SandboxKey
-		}
-
-		if c.HostConfig.NetworkMode.IsContainer() {
-			nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer())
-			if err != nil {
-				return err
-			}
-			en.ContainerID = nc.ID
-		}
-	}
-
-	ipc := &execdriver.Ipc{}
-	var err error
-	c.ShmPath, err = c.ShmResourcePath()
-	if err != nil {
-		return err
-	}
-
-	if c.HostConfig.IpcMode.IsContainer() {
-		ic, err := daemon.getIpcContainer(c)
-		if err != nil {
-			return err
-		}
-		ipc.ContainerID = ic.ID
-		c.ShmPath = ic.ShmPath
-	} else {
-		ipc.HostIpc = c.HostConfig.IpcMode.IsHost()
-		if ipc.HostIpc {
-			if _, err := os.Stat("/dev/shm"); err != nil {
-				return fmt.Errorf("/dev/shm is not mounted, but must be for --ipc=host")
-			}
-			c.ShmPath = "/dev/shm"
-		}
-	}
-
-	pid := &execdriver.Pid{}
-	pid.HostPid = c.HostConfig.PidMode.IsHost()
-
-	uts := &execdriver.UTS{
-		HostUTS: c.HostConfig.UTSMode.IsHost(),
-	}
-
-	// Build lists of devices allowed and created within the container.
-	var userSpecifiedDevices []*configs.Device
-	for _, deviceMapping := range c.HostConfig.Devices {
-		devs, err := getDevicesFromPath(deviceMapping)
-		if err != nil {
-			return err
-		}
-
-		userSpecifiedDevices = append(userSpecifiedDevices, devs...)
-	}
-
-	allowedDevices := mergeDevices(configs.DefaultAllowedDevices, userSpecifiedDevices)
-
-	autoCreatedDevices := mergeDevices(configs.DefaultAutoCreatedDevices, userSpecifiedDevices)
-
-	var rlimits []*units.Rlimit
-	ulimits := c.HostConfig.Ulimits
-
-	// Merge ulimits with daemon defaults
-	ulIdx := make(map[string]*units.Ulimit)
-	for _, ul := range ulimits {
-		ulIdx[ul.Name] = ul
-	}
-	for name, ul := range daemon.configStore.Ulimits {
-		if _, exists := ulIdx[name]; !exists {
-			ulimits = append(ulimits, ul)
-		}
-	}
-
-	weightDevices, err := getBlkioWeightDevices(c.HostConfig)
-	if err != nil {
-		return err
-	}
-
-	readBpsDevice, err := getBlkioReadBpsDevices(c.HostConfig)
-	if err != nil {
-		return err
-	}
-
-	writeBpsDevice, err := getBlkioWriteBpsDevices(c.HostConfig)
-	if err != nil {
-		return err
-	}
-
-	readIOpsDevice, err := getBlkioReadIOpsDevices(c.HostConfig)
-	if err != nil {
-		return err
-	}
-
-	writeIOpsDevice, err := getBlkioWriteIOpsDevices(c.HostConfig)
-	if err != nil {
-		return err
-	}
-
-	for _, limit := range ulimits {
-		rl, err := limit.GetRlimit()
-		if err != nil {
-			return err
-		}
-		rlimits = append(rlimits, rl)
-	}
-
-	resources := &execdriver.Resources{
-		CommonResources: execdriver.CommonResources{
-			Memory:            c.HostConfig.Memory,
-			MemoryReservation: c.HostConfig.MemoryReservation,
-			CPUShares:         c.HostConfig.CPUShares,
-			BlkioWeight:       c.HostConfig.BlkioWeight,
-		},
-		MemorySwap:                   c.HostConfig.MemorySwap,
-		KernelMemory:                 c.HostConfig.KernelMemory,
-		CpusetCpus:                   c.HostConfig.CpusetCpus,
-		CpusetMems:                   c.HostConfig.CpusetMems,
-		CPUPeriod:                    c.HostConfig.CPUPeriod,
-		CPUQuota:                     c.HostConfig.CPUQuota,
-		Rlimits:                      rlimits,
-		BlkioWeightDevice:            weightDevices,
-		BlkioThrottleReadBpsDevice:   readBpsDevice,
-		BlkioThrottleWriteBpsDevice:  writeBpsDevice,
-		BlkioThrottleReadIOpsDevice:  readIOpsDevice,
-		BlkioThrottleWriteIOpsDevice: writeIOpsDevice,
-		PidsLimit:                    c.HostConfig.PidsLimit,
-		MemorySwappiness:             -1,
-	}
-
-	if c.HostConfig.OomKillDisable != nil {
-		resources.OomKillDisable = *c.HostConfig.OomKillDisable
-	}
-	if c.HostConfig.MemorySwappiness != nil {
-		resources.MemorySwappiness = *c.HostConfig.MemorySwappiness
-	}
-
-	processConfig := execdriver.ProcessConfig{
-		CommonProcessConfig: execdriver.CommonProcessConfig{
-			Entrypoint: c.Path,
-			Arguments:  c.Args,
-			Tty:        c.Config.Tty,
-		},
-		Privileged: c.HostConfig.Privileged,
-		User:       c.Config.User,
-	}
-
-	processConfig.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
-	processConfig.Env = env
-
-	remappedRoot := &execdriver.User{}
-	if c.HostConfig.UsernsMode.IsPrivate() {
-		rootUID, rootGID := daemon.GetRemappedUIDGID()
-		if rootUID != 0 {
-			remappedRoot.UID = rootUID
-			remappedRoot.GID = rootGID
-		}
-	}
-
-	uidMap, gidMap := daemon.GetUIDGIDMaps()
-
-	if !daemon.seccompEnabled {
-		if c.SeccompProfile != "" && c.SeccompProfile != "unconfined" {
-			return fmt.Errorf("Seccomp is not enabled in your kernel, cannot run a custom seccomp profile.")
-		}
-		logrus.Warn("Seccomp is not enabled in your kernel, running container without default profile.")
-		c.SeccompProfile = "unconfined"
-	}
-
-	defaultCgroupParent := "/docker"
-	if daemon.configStore.CgroupParent != "" {
-		defaultCgroupParent = daemon.configStore.CgroupParent
-	} else if daemon.usingSystemd() {
-		defaultCgroupParent = "system.slice"
-	}
-	c.Command = &execdriver.Command{
-		CommonCommand: execdriver.CommonCommand{
-			ID:            c.ID,
-			MountLabel:    c.GetMountLabel(),
-			Network:       en,
-			ProcessConfig: processConfig,
-			ProcessLabel:  c.GetProcessLabel(),
-			Rootfs:        c.BaseFS,
-			Resources:     resources,
-			WorkingDir:    c.Config.WorkingDir,
-		},
-		AllowedDevices:     allowedDevices,
-		AppArmorProfile:    c.AppArmorProfile,
-		AutoCreatedDevices: autoCreatedDevices,
-		CapAdd:             c.HostConfig.CapAdd,
-		CapDrop:            c.HostConfig.CapDrop,
-		CgroupParent:       defaultCgroupParent,
-		GIDMapping:         gidMap,
-		GroupAdd:           c.HostConfig.GroupAdd,
-		Ipc:                ipc,
-		OomScoreAdj:        c.HostConfig.OomScoreAdj,
-		Pid:                pid,
-		ReadonlyRootfs:     c.HostConfig.ReadonlyRootfs,
-		RemappedRoot:       remappedRoot,
-		SeccompProfile:     c.SeccompProfile,
-		UIDMapping:         uidMap,
-		UTS:                uts,
-		NoNewPrivileges:    c.NoNewPrivileges,
-	}
-	if c.HostConfig.CgroupParent != "" {
-		c.Command.CgroupParent = c.HostConfig.CgroupParent
-	}
-
-	return nil
-}
-
 // getSize returns the real size & virtual size of the container.
 // getSize returns the real size & virtual size of the container.
 func (daemon *Daemon) getSize(container *container.Container) (int64, int64) {
 func (daemon *Daemon) getSize(container *container.Container) (int64, int64) {
 	var (
 	var (
@@ -395,28 +183,49 @@ func (daemon *Daemon) getIpcContainer(container *container.Container) (*containe
 }
 }
 
 
 func (daemon *Daemon) setupIpcDirs(c *container.Container) error {
 func (daemon *Daemon) setupIpcDirs(c *container.Container) error {
-	rootUID, rootGID := daemon.GetRemappedUIDGID()
-	if !c.HasMountFor("/dev/shm") {
-		shmPath, err := c.ShmResourcePath()
+	var err error
+
+	c.ShmPath, err = c.ShmResourcePath()
+	if err != nil {
+		return err
+	}
+
+	if c.HostConfig.IpcMode.IsContainer() {
+		ic, err := daemon.getIpcContainer(c)
 		if err != nil {
 		if err != nil {
 			return err
 			return err
 		}
 		}
-
-		if err := idtools.MkdirAllAs(shmPath, 0700, rootUID, rootGID); err != nil {
-			return err
+		c.ShmPath = ic.ShmPath
+	} else if c.HostConfig.IpcMode.IsHost() {
+		if _, err := os.Stat("/dev/shm"); err != nil {
+			return fmt.Errorf("/dev/shm is not mounted, but must be for --ipc=host")
 		}
 		}
+		c.ShmPath = "/dev/shm"
+	} else {
+		rootUID, rootGID := daemon.GetRemappedUIDGID()
+		if !c.HasMountFor("/dev/shm") {
+			shmPath, err := c.ShmResourcePath()
+			if err != nil {
+				return err
+			}
 
 
-		shmSize := container.DefaultSHMSize
-		if c.HostConfig.ShmSize != 0 {
-			shmSize = c.HostConfig.ShmSize
-		}
-		shmproperty := "mode=1777,size=" + strconv.FormatInt(shmSize, 10)
-		if err := syscall.Mount("shm", shmPath, "tmpfs", uintptr(syscall.MS_NOEXEC|syscall.MS_NOSUID|syscall.MS_NODEV), label.FormatMountLabel(shmproperty, c.GetMountLabel())); err != nil {
-			return fmt.Errorf("mounting shm tmpfs: %s", err)
-		}
-		if err := os.Chown(shmPath, rootUID, rootGID); err != nil {
-			return err
+			if err := idtools.MkdirAllAs(shmPath, 0700, rootUID, rootGID); err != nil {
+				return err
+			}
+
+			shmSize := container.DefaultSHMSize
+			if c.HostConfig.ShmSize != 0 {
+				shmSize = c.HostConfig.ShmSize
+			}
+			shmproperty := "mode=1777,size=" + strconv.FormatInt(shmSize, 10)
+			if err := syscall.Mount("shm", shmPath, "tmpfs", uintptr(syscall.MS_NOEXEC|syscall.MS_NOSUID|syscall.MS_NODEV), label.FormatMountLabel(shmproperty, c.GetMountLabel())); err != nil {
+				return fmt.Errorf("mounting shm tmpfs: %s", err)
+			}
+			if err := os.Chown(shmPath, rootUID, rootGID); err != nil {
+				return err
+			}
 		}
 		}
+
 	}
 	}
 
 
 	return nil
 	return nil
@@ -474,7 +283,19 @@ func killProcessDirectly(container *container.Container) error {
 	return nil
 	return nil
 }
 }
 
 
-func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []*configs.Device, err error) {
+func specDevice(d *configs.Device) specs.Device {
+	return specs.Device{
+		Type:     string(d.Type),
+		Path:     d.Path,
+		Major:    d.Major,
+		Minor:    d.Minor,
+		FileMode: fmPtr(int64(d.FileMode)),
+		UID:      u32Ptr(int64(d.Uid)),
+		GID:      u32Ptr(int64(d.Gid)),
+	}
+}
+
+func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []specs.Device, err error) {
 	resolvedPathOnHost := deviceMapping.PathOnHost
 	resolvedPathOnHost := deviceMapping.PathOnHost
 
 
 	// check if it is a symbolic link
 	// check if it is a symbolic link
@@ -488,7 +309,7 @@ func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []*con
 	// if there was no error, return the device
 	// if there was no error, return the device
 	if err == nil {
 	if err == nil {
 		device.Path = deviceMapping.PathInContainer
 		device.Path = deviceMapping.PathInContainer
-		return append(devs, device), nil
+		return append(devs, specDevice(device)), nil
 	}
 	}
 
 
 	// if the device is not a device node
 	// if the device is not a device node
@@ -508,7 +329,7 @@ func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []*con
 
 
 				// add the device to userSpecified devices
 				// add the device to userSpecified devices
 				childDevice.Path = strings.Replace(dpath, resolvedPathOnHost, deviceMapping.PathInContainer, 1)
 				childDevice.Path = strings.Replace(dpath, resolvedPathOnHost, deviceMapping.PathInContainer, 1)
-				devs = append(devs, childDevice)
+				devs = append(devs, specDevice(childDevice))
 
 
 				return nil
 				return nil
 			})
 			})

+ 89 - 75
daemon/daemon.go

@@ -20,13 +20,12 @@ import (
 	"time"
 	"time"
 
 
 	"github.com/Sirupsen/logrus"
 	"github.com/Sirupsen/logrus"
+	containerd "github.com/docker/containerd/api/grpc/types"
 	"github.com/docker/docker/api"
 	"github.com/docker/docker/api"
 	"github.com/docker/docker/builder"
 	"github.com/docker/docker/builder"
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/daemon/events"
 	"github.com/docker/docker/daemon/events"
 	"github.com/docker/docker/daemon/exec"
 	"github.com/docker/docker/daemon/exec"
-	"github.com/docker/docker/daemon/execdriver"
-	"github.com/docker/docker/daemon/execdriver/execdrivers"
 	"github.com/docker/docker/errors"
 	"github.com/docker/docker/errors"
 	"github.com/docker/engine-api/types"
 	"github.com/docker/engine-api/types"
 	containertypes "github.com/docker/engine-api/types/container"
 	containertypes "github.com/docker/engine-api/types/container"
@@ -46,12 +45,12 @@ import (
 	"github.com/docker/docker/image"
 	"github.com/docker/docker/image"
 	"github.com/docker/docker/image/tarexport"
 	"github.com/docker/docker/image/tarexport"
 	"github.com/docker/docker/layer"
 	"github.com/docker/docker/layer"
+	"github.com/docker/docker/libcontainerd"
 	"github.com/docker/docker/migrate/v1"
 	"github.com/docker/docker/migrate/v1"
 	"github.com/docker/docker/pkg/archive"
 	"github.com/docker/docker/pkg/archive"
 	"github.com/docker/docker/pkg/fileutils"
 	"github.com/docker/docker/pkg/fileutils"
 	"github.com/docker/docker/pkg/graphdb"
 	"github.com/docker/docker/pkg/graphdb"
 	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/idtools"
-	"github.com/docker/docker/pkg/mount"
 	"github.com/docker/docker/pkg/namesgenerator"
 	"github.com/docker/docker/pkg/namesgenerator"
 	"github.com/docker/docker/pkg/progress"
 	"github.com/docker/docker/pkg/progress"
 	"github.com/docker/docker/pkg/registrar"
 	"github.com/docker/docker/pkg/registrar"
@@ -115,7 +114,6 @@ type Daemon struct {
 	trustKey                  libtrust.PrivateKey
 	trustKey                  libtrust.PrivateKey
 	idIndex                   *truncindex.TruncIndex
 	idIndex                   *truncindex.TruncIndex
 	configStore               *Config
 	configStore               *Config
-	execDriver                execdriver.Driver
 	statsCollector            *statsCollector
 	statsCollector            *statsCollector
 	defaultLogConfig          containertypes.LogConfig
 	defaultLogConfig          containertypes.LogConfig
 	RegistryService           *registry.Service
 	RegistryService           *registry.Service
@@ -132,6 +130,8 @@ type Daemon struct {
 	imageStore                image.Store
 	imageStore                image.Store
 	nameIndex                 *registrar.Registrar
 	nameIndex                 *registrar.Registrar
 	linkIndex                 *linkIndex
 	linkIndex                 *linkIndex
+	containerd                libcontainerd.Client
+	defaultIsolation          containertypes.Isolation // Default isolation mode on Windows
 }
 }
 
 
 // GetContainer looks for a container using the provided information, which could be
 // GetContainer looks for a container using the provided information, which could be
@@ -220,36 +220,16 @@ func (daemon *Daemon) registerName(container *container.Container) error {
 }
 }
 
 
 // Register makes a container object usable by the daemon as <container.ID>
 // Register makes a container object usable by the daemon as <container.ID>
-func (daemon *Daemon) Register(container *container.Container) error {
+func (daemon *Daemon) Register(c *container.Container) error {
 	// Attach to stdout and stderr
 	// Attach to stdout and stderr
-	if container.Config.OpenStdin {
-		container.NewInputPipes()
+	if c.Config.OpenStdin {
+		c.NewInputPipes()
 	} else {
 	} else {
-		container.NewNopInputPipe()
+		c.NewNopInputPipe()
 	}
 	}
 
 
-	daemon.containers.Add(container.ID, container)
-	daemon.idIndex.Add(container.ID)
-
-	if container.IsRunning() {
-		logrus.Debugf("killing old running container %s", container.ID)
-		// Set exit code to 128 + SIGKILL (9) to properly represent unsuccessful exit
-		container.SetStoppedLocking(&execdriver.ExitStatus{ExitCode: 137})
-		// use the current driver and ensure that the container is dead x.x
-		cmd := &execdriver.Command{
-			CommonCommand: execdriver.CommonCommand{
-				ID: container.ID,
-			},
-		}
-		daemon.execDriver.Terminate(cmd)
-
-		container.UnmountIpcMounts(mount.Unmount)
-
-		daemon.Unmount(container)
-		if err := container.ToDiskLocking(); err != nil {
-			logrus.Errorf("Error saving stopped state to disk: %v", err)
-		}
-	}
+	daemon.containers.Add(c.ID, c)
+	daemon.idIndex.Add(c.ID)
 
 
 	return nil
 	return nil
 }
 }
@@ -307,17 +287,38 @@ func (daemon *Daemon) restore() error {
 			logrus.Errorf("Failed to register container %s: %s", c.ID, err)
 			logrus.Errorf("Failed to register container %s: %s", c.ID, err)
 			continue
 			continue
 		}
 		}
+	}
+	var wg sync.WaitGroup
+	var mapLock sync.Mutex
+	for _, c := range containers {
+		wg.Add(1)
+		go func(c *container.Container) {
+			defer wg.Done()
+			if c.IsRunning() || c.IsPaused() {
+				if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(c.RestartManager(true))); err != nil {
+					logrus.Errorf("Failed to restore with containerd: %q", err)
+					return
+				}
+			}
+			// fixme: only if not running
+			// get list of containers we need to restart
+			if daemon.configStore.AutoRestart && !c.IsRunning() && !c.IsPaused() && c.ShouldRestart() {
+				mapLock.Lock()
+				restartContainers[c] = make(chan struct{})
+				mapLock.Unlock()
+			} else if !c.IsRunning() && !c.IsPaused() {
+				if mountid, err := daemon.layerStore.GetMountID(c.ID); err == nil {
+					daemon.cleanupMountsByID(mountid)
+				}
+			}
 
 
-		// get list of containers we need to restart
-		if daemon.configStore.AutoRestart && c.ShouldRestart() {
-			restartContainers[c] = make(chan struct{})
-		}
-
-		// if c.hostConfig.Links is nil (not just empty), then it is using the old sqlite links and needs to be migrated
-		if c.HostConfig != nil && c.HostConfig.Links == nil {
-			migrateLegacyLinks = true
-		}
+			// if c.hostConfig.Links is nil (not just empty), then it is using the old sqlite links and needs to be migrated
+			if c.HostConfig != nil && c.HostConfig.Links == nil {
+				migrateLegacyLinks = true
+			}
+		}(c)
 	}
 	}
+	wg.Wait()
 
 
 	// migrate any legacy links from sqlite
 	// migrate any legacy links from sqlite
 	linkdbFile := filepath.Join(daemon.root, "linkgraph.db")
 	linkdbFile := filepath.Join(daemon.root, "linkgraph.db")
@@ -599,7 +600,7 @@ func (daemon *Daemon) registerLink(parent, child *container.Container, alias str
 
 
 // NewDaemon sets up everything for the daemon to be able to service
 // NewDaemon sets up everything for the daemon to be able to service
 // requests from the webserver.
 // requests from the webserver.
-func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemon, err error) {
+func NewDaemon(config *Config, registryService *registry.Service, containerdRemote libcontainerd.Remote) (daemon *Daemon, err error) {
 	setDefaultMtu(config)
 	setDefaultMtu(config)
 
 
 	// Ensure we have compatible and valid configuration options
 	// Ensure we have compatible and valid configuration options
@@ -659,7 +660,7 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 	}
 	}
 	os.Setenv("TMPDIR", realTmp)
 	os.Setenv("TMPDIR", realTmp)
 
 
-	d := &Daemon{}
+	d := &Daemon{configStore: config}
 	// Ensure the daemon is properly shutdown if there is a failure during
 	// Ensure the daemon is properly shutdown if there is a failure during
 	// initialization
 	// initialization
 	defer func() {
 	defer func() {
@@ -670,6 +671,11 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 		}
 		}
 	}()
 	}()
 
 
+	// Set the default isolation mode (only applicable on Windows)
+	if err := d.setDefaultIsolation(); err != nil {
+		return nil, fmt.Errorf("error setting default isolation mode: %v", err)
+	}
+
 	// Verify logging driver type
 	// Verify logging driver type
 	if config.LogConfig.Type != "none" {
 	if config.LogConfig.Type != "none" {
 		if _, err := logger.GetLogDriver(config.LogConfig.Type); err != nil {
 		if _, err := logger.GetLogDriver(config.LogConfig.Type); err != nil {
@@ -682,6 +688,7 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 		logrus.Warnf("Failed to configure golang's threads limit: %v", err)
 		logrus.Warnf("Failed to configure golang's threads limit: %v", err)
 	}
 	}
 
 
+	installDefaultAppArmorProfile()
 	daemonRepo := filepath.Join(config.Root, "containers")
 	daemonRepo := filepath.Join(config.Root, "containers")
 	if err := idtools.MkdirAllAs(daemonRepo, 0700, rootUID, rootGID); err != nil && !os.IsExist(err) {
 	if err := idtools.MkdirAllAs(daemonRepo, 0700, rootUID, rootGID); err != nil && !os.IsExist(err) {
 		return nil, err
 		return nil, err
@@ -781,11 +788,6 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 		return nil, fmt.Errorf("Devices cgroup isn't mounted")
 		return nil, fmt.Errorf("Devices cgroup isn't mounted")
 	}
 	}
 
 
-	ed, err := execdrivers.NewDriver(config.ExecOptions, config.ExecRoot, config.Root, sysInfo)
-	if err != nil {
-		return nil, err
-	}
-
 	d.ID = trustKey.PublicKey().KeyID()
 	d.ID = trustKey.PublicKey().KeyID()
 	d.repository = daemonRepo
 	d.repository = daemonRepo
 	d.containers = container.NewMemoryStore()
 	d.containers = container.NewMemoryStore()
@@ -794,8 +796,6 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 	d.distributionMetadataStore = distributionMetadataStore
 	d.distributionMetadataStore = distributionMetadataStore
 	d.trustKey = trustKey
 	d.trustKey = trustKey
 	d.idIndex = truncindex.NewTruncIndex([]string{})
 	d.idIndex = truncindex.NewTruncIndex([]string{})
-	d.configStore = config
-	d.execDriver = ed
 	d.statsCollector = d.newStatsCollector(1 * time.Second)
 	d.statsCollector = d.newStatsCollector(1 * time.Second)
 	d.defaultLogConfig = containertypes.LogConfig{
 	d.defaultLogConfig = containertypes.LogConfig{
 		Type:   config.LogConfig.Type,
 		Type:   config.LogConfig.Type,
@@ -812,10 +812,12 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 	d.nameIndex = registrar.NewRegistrar()
 	d.nameIndex = registrar.NewRegistrar()
 	d.linkIndex = newLinkIndex()
 	d.linkIndex = newLinkIndex()
 
 
-	if err := d.cleanupMounts(); err != nil {
+	go d.execCommandGC()
+
+	d.containerd, err = containerdRemote.Client(d)
+	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
-	go d.execCommandGC()
 
 
 	if err := d.restore(); err != nil {
 	if err := d.restore(); err != nil {
 		return nil, err
 		return nil, err
@@ -877,6 +879,9 @@ func (daemon *Daemon) Shutdown() error {
 				logrus.Errorf("Stop container error: %v", err)
 				logrus.Errorf("Stop container error: %v", err)
 				return
 				return
 			}
 			}
+			if mountid, err := daemon.layerStore.GetMountID(c.ID); err == nil {
+				daemon.cleanupMountsByID(mountid)
+			}
 			logrus.Debugf("container stopped %s", c.ID)
 			logrus.Debugf("container stopped %s", c.ID)
 		})
 		})
 	}
 	}
@@ -923,29 +928,16 @@ func (daemon *Daemon) Mount(container *container.Container) error {
 }
 }
 
 
 // Unmount unsets the container base filesystem
 // Unmount unsets the container base filesystem
-func (daemon *Daemon) Unmount(container *container.Container) {
+func (daemon *Daemon) Unmount(container *container.Container) error {
 	if err := container.RWLayer.Unmount(); err != nil {
 	if err := container.RWLayer.Unmount(); err != nil {
 		logrus.Errorf("Error unmounting container %s: %s", container.ID, err)
 		logrus.Errorf("Error unmounting container %s: %s", container.ID, err)
+		return err
 	}
 	}
-}
-
-// Run uses the execution driver to run a given container
-func (daemon *Daemon) Run(c *container.Container, pipes *execdriver.Pipes, startCallback execdriver.DriverCallback) (execdriver.ExitStatus, error) {
-	hooks := execdriver.Hooks{
-		Start: startCallback,
-	}
-	hooks.PreStart = append(hooks.PreStart, func(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error {
-		return daemon.setNetworkNamespaceKey(c.ID, pid)
-	})
-	return daemon.execDriver.Run(c.Command, pipes, hooks)
+	return nil
 }
 }
 
 
 func (daemon *Daemon) kill(c *container.Container, sig int) error {
 func (daemon *Daemon) kill(c *container.Container, sig int) error {
-	return daemon.execDriver.Kill(c.Command, sig)
-}
-
-func (daemon *Daemon) stats(c *container.Container) (*execdriver.ResourceStats, error) {
-	return daemon.execDriver.Stats(c.ID)
+	return daemon.containerd.Signal(c.ID, sig)
 }
 }
 
 
 func (daemon *Daemon) subscribeToContainerStats(c *container.Container) chan interface{} {
 func (daemon *Daemon) subscribeToContainerStats(c *container.Container) chan interface{} {
@@ -1322,12 +1314,6 @@ func (daemon *Daemon) GraphDriverName() string {
 	return daemon.layerStore.DriverName()
 	return daemon.layerStore.DriverName()
 }
 }
 
 
-// ExecutionDriver returns the currently used driver for creating and
-// starting execs in a container.
-func (daemon *Daemon) ExecutionDriver() execdriver.Driver {
-	return daemon.execDriver
-}
-
 // GetUIDGIDMaps returns the current daemon's user namespace settings
 // GetUIDGIDMaps returns the current daemon's user namespace settings
 // for the full uid and gid maps which will be applied to containers
 // for the full uid and gid maps which will be applied to containers
 // started in this instance.
 // started in this instance.
@@ -1536,7 +1522,7 @@ func (daemon *Daemon) IsShuttingDown() bool {
 }
 }
 
 
 // GetContainerStats collects all the stats published by a container
 // GetContainerStats collects all the stats published by a container
-func (daemon *Daemon) GetContainerStats(container *container.Container) (*execdriver.ResourceStats, error) {
+func (daemon *Daemon) GetContainerStats(container *container.Container) (*types.StatsJSON, error) {
 	stats, err := daemon.stats(container)
 	stats, err := daemon.stats(container)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
@@ -1547,7 +1533,22 @@ func (daemon *Daemon) GetContainerStats(container *container.Container) (*execdr
 	if nwStats, err = daemon.getNetworkStats(container); err != nil {
 	if nwStats, err = daemon.getNetworkStats(container); err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
-	stats.Interfaces = nwStats
+
+	stats.Networks = make(map[string]types.NetworkStats)
+	for _, iface := range nwStats {
+		// For API Version >= 1.21, the original data of network will
+		// be returned.
+		stats.Networks[iface.Name] = types.NetworkStats{
+			RxBytes:   iface.RxBytes,
+			RxPackets: iface.RxPackets,
+			RxErrors:  iface.RxErrors,
+			RxDropped: iface.RxDropped,
+			TxBytes:   iface.TxBytes,
+			TxPackets: iface.TxPackets,
+			TxErrors:  iface.TxErrors,
+			TxDropped: iface.TxDropped,
+		}
+	}
 
 
 	return stats, nil
 	return stats, nil
 }
 }
@@ -1735,3 +1736,16 @@ func (daemon *Daemon) networkOptions(dconfig *Config) ([]nwconfig.Option, error)
 	options = append(options, driverOptions(dconfig)...)
 	options = append(options, driverOptions(dconfig)...)
 	return options, nil
 	return options, nil
 }
 }
+
+func copyBlkioEntry(entries []*containerd.BlkioStatsEntry) []types.BlkioStatEntry {
+	out := make([]types.BlkioStatEntry, len(entries))
+	for i, re := range entries {
+		out[i] = types.BlkioStatEntry{
+			Major: re.Major,
+			Minor: re.Minor,
+			Op:    re.Op,
+			Value: re.Value,
+		}
+	}
+	return out
+}

+ 60 - 2
daemon/daemon_linux.go

@@ -12,6 +12,64 @@ import (
 	"github.com/docker/docker/pkg/mount"
 	"github.com/docker/docker/pkg/mount"
 )
 )
 
 
+func (daemon *Daemon) cleanupMountsByID(id string) error {
+	logrus.Debugf("Cleaning up old mountid %s: start.", id)
+	f, err := os.Open("/proc/self/mountinfo")
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	return daemon.cleanupMountsFromReaderByID(f, id, mount.Unmount)
+}
+
+func (daemon *Daemon) cleanupMountsFromReaderByID(reader io.Reader, id string, unmount func(target string) error) error {
+	if daemon.root == "" {
+		return nil
+	}
+	var errors []string
+	mountRoot := ""
+	shmSuffix := "/" + id + "/shm"
+	mergedSuffix := "/" + id + "/merged"
+	sc := bufio.NewScanner(reader)
+	for sc.Scan() {
+		line := sc.Text()
+		fields := strings.Fields(line)
+		if strings.HasPrefix(fields[4], daemon.root) {
+			logrus.Debugf("Mount base: %v", fields[4])
+			mnt := fields[4]
+			if strings.HasSuffix(mnt, shmSuffix) || strings.HasSuffix(mnt, mergedSuffix) {
+				logrus.Debugf("Unmounting %v", mnt)
+				if err := unmount(mnt); err != nil {
+					logrus.Error(err)
+					errors = append(errors, err.Error())
+				}
+			} else if mountBase := filepath.Base(mnt); mountBase == id {
+				mountRoot = mnt
+			}
+		}
+	}
+
+	if mountRoot != "" {
+		logrus.Debugf("Unmounting %v", mountRoot)
+		if err := unmount(mountRoot); err != nil {
+			logrus.Error(err)
+			errors = append(errors, err.Error())
+		}
+	}
+
+	if err := sc.Err(); err != nil {
+		return err
+	}
+
+	if len(errors) > 0 {
+		return fmt.Errorf("Error cleaningup mounts:\n%v", strings.Join(errors, "\n"))
+	}
+
+	logrus.Debugf("Cleaning up old container shm/mqueue/rootfs mounts: done.")
+	return nil
+}
+
 // cleanupMounts umounts shm/mqueue mounts for old containers
 // cleanupMounts umounts shm/mqueue mounts for old containers
 func (daemon *Daemon) cleanupMounts() error {
 func (daemon *Daemon) cleanupMounts() error {
 	logrus.Debugf("Cleaning up old container shm/mqueue/rootfs mounts: start.")
 	logrus.Debugf("Cleaning up old container shm/mqueue/rootfs mounts: start.")
@@ -25,7 +83,7 @@ func (daemon *Daemon) cleanupMounts() error {
 }
 }
 
 
 func (daemon *Daemon) cleanupMountsFromReader(reader io.Reader, unmount func(target string) error) error {
 func (daemon *Daemon) cleanupMountsFromReader(reader io.Reader, unmount func(target string) error) error {
-	if daemon.repository == "" {
+	if daemon.root == "" {
 		return nil
 		return nil
 	}
 	}
 	sc := bufio.NewScanner(reader)
 	sc := bufio.NewScanner(reader)
@@ -37,7 +95,7 @@ func (daemon *Daemon) cleanupMountsFromReader(reader io.Reader, unmount func(tar
 			logrus.Debugf("Mount base: %v", fields[4])
 			logrus.Debugf("Mount base: %v", fields[4])
 			mnt := fields[4]
 			mnt := fields[4]
 			mountBase := filepath.Base(mnt)
 			mountBase := filepath.Base(mnt)
-			if mountBase == "mqueue" || mountBase == "shm" || mountBase == "merged" {
+			if mountBase == "shm" || mountBase == "merged" {
 				logrus.Debugf("Unmounting %v", mnt)
 				logrus.Debugf("Unmounting %v", mnt)
 				if err := unmount(mnt); err != nil {
 				if err := unmount(mnt); err != nil {
 					logrus.Error(err)
 					logrus.Error(err)

+ 65 - 35
daemon/daemon_linux_test.go

@@ -7,53 +7,83 @@ import (
 	"testing"
 	"testing"
 )
 )
 
 
+const mountsFixture = `142 78 0:38 / / rw,relatime - aufs none rw,si=573b861da0b3a05b,dio
+143 142 0:60 / /proc rw,nosuid,nodev,noexec,relatime - proc proc rw
+144 142 0:67 / /dev rw,nosuid - tmpfs tmpfs rw,mode=755
+145 144 0:78 / /dev/pts rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=666
+146 144 0:49 / /dev/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw
+147 142 0:84 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
+148 147 0:86 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755
+149 148 0:22 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuset
+150 148 0:25 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/cpu rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpu
+151 148 0:27 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/cpuacct rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuacct
+152 148 0:28 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
+153 148 0:29 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,devices
+154 148 0:30 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,freezer
+155 148 0:31 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,blkio
+156 148 0:32 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,perf_event
+157 148 0:33 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,hugetlb
+158 148 0:35 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup systemd rw,name=systemd
+159 142 8:4 /home/mlaventure/gopath /home/mlaventure/gopath rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
+160 142 8:4 /var/lib/docker/volumes/9a428b651ee4c538130143cad8d87f603a4bf31b928afe7ff3ecd65480692b35/_data /var/lib/docker rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
+164 142 8:4 /home/mlaventure/gopath/src/github.com/docker/docker /go/src/github.com/docker/docker rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
+165 142 8:4 /var/lib/docker/containers/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a/resolv.conf /etc/resolv.conf rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
+166 142 8:4 /var/lib/docker/containers/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a/hostname /etc/hostname rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
+167 142 8:4 /var/lib/docker/containers/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a/hosts /etc/hosts rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
+168 144 0:39 / /dev/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=65536k
+169 144 0:12 /14 /dev/console rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=000
+83 147 0:10 / /sys/kernel/security rw,relatime - securityfs none rw
+89 142 0:87 / /tmp rw,relatime - tmpfs none rw
+97 142 0:60 / /run/docker/netns/default rw,nosuid,nodev,noexec,relatime - proc proc rw
+100 160 8:4 /var/lib/docker/volumes/9a428b651ee4c538130143cad8d87f603a4bf31b928afe7ff3ecd65480692b35/_data/aufs /var/lib/docker/aufs rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
+115 100 0:102 / /var/lib/docker/aufs/mnt/0ecda1c63e5b58b3d89ff380bf646c95cc980252cf0b52466d43619aec7c8432 rw,relatime - aufs none rw,si=573b861dbc01905b,dio
+116 160 0:107 / /var/lib/docker/containers/d045dc441d2e2e1d5b3e328d47e5943811a40819fb47497c5f5a5df2d6d13c37/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=65536k
+118 142 0:102 / /run/docker/libcontainerd/d045dc441d2e2e1d5b3e328d47e5943811a40819fb47497c5f5a5df2d6d13c37/rootfs rw,relatime - aufs none rw,si=573b861dbc01905b,dio
+242 142 0:60 / /run/docker/netns/c3664df2a0f7 rw,nosuid,nodev,noexec,relatime - proc proc rw
+120 100 0:122 / /var/lib/docker/aufs/mnt/03ca4b49e71f1e49a41108829f4d5c70ac95934526e2af8984a1f65f1de0715d rw,relatime - aufs none rw,si=573b861eb147805b,dio
+171 142 0:122 / /run/docker/libcontainerd/e406ff6f3e18516d50e03dbca4de54767a69a403a6f7ec1edc2762812824521e/rootfs rw,relatime - aufs none rw,si=573b861eb147805b,dio
+310 142 0:60 / /run/docker/netns/71a18572176b rw,nosuid,nodev,noexec,relatime - proc proc rw
+`
+
 func TestCleanupMounts(t *testing.T) {
 func TestCleanupMounts(t *testing.T) {
-	fixture := `230 138 0:60 / / rw,relatime - overlay overlay rw,lowerdir=/var/lib/docker/overlay/0ef9f93d5d365c1385b09d54bbee6afff3d92002c16f22eccb6e1549b2ff97d8/root,upperdir=/var/lib/docker/overlay/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb/upper,workdir=/var/lib/docker/overlay/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb/work
-231 230 0:56 / /proc rw,nosuid,nodev,noexec,relatime - proc proc rw
-232 230 0:57 / /dev rw,nosuid - tmpfs tmpfs rw,mode=755
-233 232 0:58 / /dev/pts rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=666
-234 232 0:59 / /dev/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=65536k
-235 232 0:55 / /dev/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw
-236 230 0:61 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
-237 236 0:62 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw
-238 237 0:21 /system.slice/docker.service /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd
-239 237 0:23 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,perf_event
-240 237 0:24 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuset,clone_children
-241 237 0:25 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,devices
-242 237 0:26 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,freezer
-243 237 0:27 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpu,cpuacct
-244 237 0:28 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,blkio
-245 237 0:29 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,net_cls,net_prio
-246 237 0:30 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,hugetlb
-247 237 0:31 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
-248 230 253:1 /var/lib/docker/volumes/510cc41ac68c48bd4eac932e3e09711673876287abf1b185312cfbfe6261a111/_data /var/lib/docker rw,relatime - ext4 /dev/disk/by-uuid/ba70ea0c-1a8f-4ee4-9687-cb393730e2b5 rw,errors=remount-ro,data=ordered
-250 230 253:1 /var/lib/docker/containers/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb/hostname /etc/hostname rw,relatime - ext4 /dev/disk/by-uuid/ba70ea0c-1a8f-4ee4-9687-cb393730e2b5 rw,errors=remount-ro,data=ordered
-251 230 253:1 /var/lib/docker/containers/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb/hosts /etc/hosts rw,relatime - ext4 /dev/disk/by-uuid/ba70ea0c-1a8f-4ee4-9687-cb393730e2b5 rw,errors=remount-ro,data=ordered
-252 232 0:13 /1 /dev/console rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=000
-139 236 0:11 / /sys/kernel/security rw,relatime - securityfs none rw
-140 230 0:54 / /tmp rw,relatime - tmpfs none rw
-145 230 0:3 / /run/docker/netns/default rw - nsfs nsfs rw
-130 140 0:45 / /tmp/docker_recursive_mount_test312125472/tmpfs rw,relatime - tmpfs tmpfs rw
-131 230 0:3 / /run/docker/netns/47903e2e6701 rw - nsfs nsfs rw
-133 230 0:55 / /go/src/github.com/docker/docker/bundles/1.9.0-dev/test-integration-cli/d45526097/graph/containers/47903e2e67014246eba27607809d5f5c2437c3bf84c2986393448f84093cc40b/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw`
+	d := &Daemon{
+		root: "/var/lib/docker/",
+	}
+
+	expected := "/var/lib/docker/containers/d045dc441d2e2e1d5b3e328d47e5943811a40819fb47497c5f5a5df2d6d13c37/shm"
+	var unmounted int
+	unmount := func(target string) error {
+		if target == expected {
+			unmounted++
+		}
+		return nil
+	}
 
 
+	d.cleanupMountsFromReader(strings.NewReader(mountsFixture), unmount)
+
+	if unmounted != 1 {
+		t.Fatalf("Expected to unmount the shm (and the shm only)")
+	}
+}
+
+func TestCleanupMountsByID(t *testing.T) {
 	d := &Daemon{
 	d := &Daemon{
-		repository: "/go/src/github.com/docker/docker/bundles/1.9.0-dev/test-integration-cli/d45526097/graph/containers/",
+		root: "/var/lib/docker/",
 	}
 	}
 
 
-	expected := "/go/src/github.com/docker/docker/bundles/1.9.0-dev/test-integration-cli/d45526097/graph/containers/47903e2e67014246eba27607809d5f5c2437c3bf84c2986393448f84093cc40b/mqueue"
-	var unmounted bool
+	expected := "/var/lib/docker/aufs/mnt/03ca4b49e71f1e49a41108829f4d5c70ac95934526e2af8984a1f65f1de0715d"
+	var unmounted int
 	unmount := func(target string) error {
 	unmount := func(target string) error {
 		if target == expected {
 		if target == expected {
-			unmounted = true
+			unmounted++
 		}
 		}
 		return nil
 		return nil
 	}
 	}
 
 
-	d.cleanupMountsFromReader(strings.NewReader(fixture), unmount)
+	d.cleanupMountsFromReaderByID(strings.NewReader(mountsFixture), "03ca4b49e71f1e49a41108829f4d5c70ac95934526e2af8984a1f65f1de0715d", unmount)
 
 
-	if !unmounted {
-		t.Fatalf("Expected to unmount the mqueue")
+	if unmounted != 1 {
+		t.Fatalf("Expected to unmount the auf root (and that only)")
 	}
 	}
 }
 }
 
 

+ 163 - 26
daemon/daemon_unix.go

@@ -13,6 +13,7 @@ import (
 	"strconv"
 	"strconv"
 	"strings"
 	"strings"
 	"syscall"
 	"syscall"
+	"time"
 
 
 	"github.com/Sirupsen/logrus"
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/container"
@@ -25,6 +26,7 @@ import (
 	"github.com/docker/docker/reference"
 	"github.com/docker/docker/reference"
 	"github.com/docker/docker/runconfig"
 	"github.com/docker/docker/runconfig"
 	runconfigopts "github.com/docker/docker/runconfig/opts"
 	runconfigopts "github.com/docker/docker/runconfig/opts"
+	"github.com/docker/engine-api/types"
 	pblkiodev "github.com/docker/engine-api/types/blkiodev"
 	pblkiodev "github.com/docker/engine-api/types/blkiodev"
 	containertypes "github.com/docker/engine-api/types/container"
 	containertypes "github.com/docker/engine-api/types/container"
 	"github.com/docker/libnetwork"
 	"github.com/docker/libnetwork"
@@ -33,10 +35,10 @@ import (
 	"github.com/docker/libnetwork/ipamutils"
 	"github.com/docker/libnetwork/ipamutils"
 	"github.com/docker/libnetwork/netlabel"
 	"github.com/docker/libnetwork/netlabel"
 	"github.com/docker/libnetwork/options"
 	"github.com/docker/libnetwork/options"
-	"github.com/docker/libnetwork/types"
-	blkiodev "github.com/opencontainers/runc/libcontainer/configs"
+	lntypes "github.com/docker/libnetwork/types"
 	"github.com/opencontainers/runc/libcontainer/label"
 	"github.com/opencontainers/runc/libcontainer/label"
 	"github.com/opencontainers/runc/libcontainer/user"
 	"github.com/opencontainers/runc/libcontainer/user"
+	"github.com/opencontainers/specs/specs-go"
 )
 )
 
 
 const (
 const (
@@ -51,16 +53,81 @@ const (
 	defaultRemappedID  string = "dockremap"
 	defaultRemappedID  string = "dockremap"
 )
 )
 
 
-func getBlkioWeightDevices(config *containertypes.HostConfig) ([]*blkiodev.WeightDevice, error) {
+func getMemoryResources(config containertypes.Resources) *specs.Memory {
+	memory := specs.Memory{}
+
+	if config.Memory > 0 {
+		limit := uint64(config.Memory)
+		memory.Limit = &limit
+	}
+
+	if config.MemoryReservation > 0 {
+		reservation := uint64(config.MemoryReservation)
+		memory.Reservation = &reservation
+	}
+
+	if config.MemorySwap != 0 {
+		swap := uint64(config.MemorySwap)
+		memory.Swap = &swap
+	}
+
+	if config.MemorySwappiness != nil {
+		swappiness := uint64(*config.MemorySwappiness)
+		memory.Swappiness = &swappiness
+	}
+
+	if config.KernelMemory != 0 {
+		kernelMemory := uint64(config.KernelMemory)
+		memory.Kernel = &kernelMemory
+	}
+
+	return &memory
+}
+
+func getCPUResources(config containertypes.Resources) *specs.CPU {
+	cpu := specs.CPU{}
+
+	if config.CPUShares != 0 {
+		shares := uint64(config.CPUShares)
+		cpu.Shares = &shares
+	}
+
+	if config.CpusetCpus != "" {
+		cpuset := config.CpusetCpus
+		cpu.Cpus = &cpuset
+	}
+
+	if config.CpusetMems != "" {
+		cpuset := config.CpusetMems
+		cpu.Mems = &cpuset
+	}
+
+	if config.CPUPeriod != 0 {
+		period := uint64(config.CPUPeriod)
+		cpu.Period = &period
+	}
+
+	if config.CPUQuota != 0 {
+		quota := uint64(config.CPUQuota)
+		cpu.Quota = &quota
+	}
+
+	return &cpu
+}
+
+func getBlkioWeightDevices(config containertypes.Resources) ([]specs.WeightDevice, error) {
 	var stat syscall.Stat_t
 	var stat syscall.Stat_t
-	var blkioWeightDevices []*blkiodev.WeightDevice
+	var blkioWeightDevices []specs.WeightDevice
 
 
 	for _, weightDevice := range config.BlkioWeightDevice {
 	for _, weightDevice := range config.BlkioWeightDevice {
 		if err := syscall.Stat(weightDevice.Path, &stat); err != nil {
 		if err := syscall.Stat(weightDevice.Path, &stat); err != nil {
 			return nil, err
 			return nil, err
 		}
 		}
-		weightDevice := blkiodev.NewWeightDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), weightDevice.Weight, 0)
-		blkioWeightDevices = append(blkioWeightDevices, weightDevice)
+		weight := weightDevice.Weight
+		d := specs.WeightDevice{Weight: &weight}
+		d.Major = int64(stat.Rdev / 256)
+		d.Major = int64(stat.Rdev % 256)
+		blkioWeightDevices = append(blkioWeightDevices, d)
 	}
 	}
 
 
 	return blkioWeightDevices, nil
 	return blkioWeightDevices, nil
@@ -99,61 +166,73 @@ func parseSecurityOpt(container *container.Container, config *containertypes.Hos
 	return err
 	return err
 }
 }
 
 
-func getBlkioReadIOpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) {
-	var blkioReadIOpsDevice []*blkiodev.ThrottleDevice
+func getBlkioReadIOpsDevices(config containertypes.Resources) ([]specs.ThrottleDevice, error) {
+	var blkioReadIOpsDevice []specs.ThrottleDevice
 	var stat syscall.Stat_t
 	var stat syscall.Stat_t
 
 
 	for _, iopsDevice := range config.BlkioDeviceReadIOps {
 	for _, iopsDevice := range config.BlkioDeviceReadIOps {
 		if err := syscall.Stat(iopsDevice.Path, &stat); err != nil {
 		if err := syscall.Stat(iopsDevice.Path, &stat); err != nil {
 			return nil, err
 			return nil, err
 		}
 		}
-		readIOpsDevice := blkiodev.NewThrottleDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), iopsDevice.Rate)
-		blkioReadIOpsDevice = append(blkioReadIOpsDevice, readIOpsDevice)
+		rate := iopsDevice.Rate
+		d := specs.ThrottleDevice{Rate: &rate}
+		d.Major = int64(stat.Rdev / 256)
+		d.Major = int64(stat.Rdev % 256)
+		blkioReadIOpsDevice = append(blkioReadIOpsDevice, d)
 	}
 	}
 
 
 	return blkioReadIOpsDevice, nil
 	return blkioReadIOpsDevice, nil
 }
 }
 
 
-func getBlkioWriteIOpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) {
-	var blkioWriteIOpsDevice []*blkiodev.ThrottleDevice
+func getBlkioWriteIOpsDevices(config containertypes.Resources) ([]specs.ThrottleDevice, error) {
+	var blkioWriteIOpsDevice []specs.ThrottleDevice
 	var stat syscall.Stat_t
 	var stat syscall.Stat_t
 
 
 	for _, iopsDevice := range config.BlkioDeviceWriteIOps {
 	for _, iopsDevice := range config.BlkioDeviceWriteIOps {
 		if err := syscall.Stat(iopsDevice.Path, &stat); err != nil {
 		if err := syscall.Stat(iopsDevice.Path, &stat); err != nil {
 			return nil, err
 			return nil, err
 		}
 		}
-		writeIOpsDevice := blkiodev.NewThrottleDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), iopsDevice.Rate)
-		blkioWriteIOpsDevice = append(blkioWriteIOpsDevice, writeIOpsDevice)
+		rate := iopsDevice.Rate
+		d := specs.ThrottleDevice{Rate: &rate}
+		d.Major = int64(stat.Rdev / 256)
+		d.Major = int64(stat.Rdev % 256)
+		blkioWriteIOpsDevice = append(blkioWriteIOpsDevice, d)
 	}
 	}
 
 
 	return blkioWriteIOpsDevice, nil
 	return blkioWriteIOpsDevice, nil
 }
 }
 
 
-func getBlkioReadBpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) {
-	var blkioReadBpsDevice []*blkiodev.ThrottleDevice
+func getBlkioReadBpsDevices(config containertypes.Resources) ([]specs.ThrottleDevice, error) {
+	var blkioReadBpsDevice []specs.ThrottleDevice
 	var stat syscall.Stat_t
 	var stat syscall.Stat_t
 
 
 	for _, bpsDevice := range config.BlkioDeviceReadBps {
 	for _, bpsDevice := range config.BlkioDeviceReadBps {
 		if err := syscall.Stat(bpsDevice.Path, &stat); err != nil {
 		if err := syscall.Stat(bpsDevice.Path, &stat); err != nil {
 			return nil, err
 			return nil, err
 		}
 		}
-		readBpsDevice := blkiodev.NewThrottleDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), bpsDevice.Rate)
-		blkioReadBpsDevice = append(blkioReadBpsDevice, readBpsDevice)
+		rate := bpsDevice.Rate
+		d := specs.ThrottleDevice{Rate: &rate}
+		d.Major = int64(stat.Rdev / 256)
+		d.Major = int64(stat.Rdev % 256)
+		blkioReadBpsDevice = append(blkioReadBpsDevice, d)
 	}
 	}
 
 
 	return blkioReadBpsDevice, nil
 	return blkioReadBpsDevice, nil
 }
 }
 
 
-func getBlkioWriteBpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) {
-	var blkioWriteBpsDevice []*blkiodev.ThrottleDevice
+func getBlkioWriteBpsDevices(config containertypes.Resources) ([]specs.ThrottleDevice, error) {
+	var blkioWriteBpsDevice []specs.ThrottleDevice
 	var stat syscall.Stat_t
 	var stat syscall.Stat_t
 
 
 	for _, bpsDevice := range config.BlkioDeviceWriteBps {
 	for _, bpsDevice := range config.BlkioDeviceWriteBps {
 		if err := syscall.Stat(bpsDevice.Path, &stat); err != nil {
 		if err := syscall.Stat(bpsDevice.Path, &stat); err != nil {
 			return nil, err
 			return nil, err
 		}
 		}
-		writeBpsDevice := blkiodev.NewThrottleDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), bpsDevice.Rate)
-		blkioWriteBpsDevice = append(blkioWriteBpsDevice, writeBpsDevice)
+		rate := bpsDevice.Rate
+		d := specs.ThrottleDevice{Rate: &rate}
+		d.Major = int64(stat.Rdev / 256)
+		d.Major = int64(stat.Rdev % 256)
+		blkioWriteBpsDevice = append(blkioWriteBpsDevice, d)
 	}
 	}
 
 
 	return blkioWriteBpsDevice, nil
 	return blkioWriteBpsDevice, nil
@@ -594,8 +673,8 @@ func initBridgeDriver(controller libnetwork.NetworkController, config *Config) e
 
 
 	nw, nw6List, err := ipamutils.ElectInterfaceAddresses(bridgeName)
 	nw, nw6List, err := ipamutils.ElectInterfaceAddresses(bridgeName)
 	if err == nil {
 	if err == nil {
-		ipamV4Conf.PreferredPool = types.GetIPNetCanonical(nw).String()
-		hip, _ := types.GetHostPartIP(nw.IP, nw.Mask)
+		ipamV4Conf.PreferredPool = lntypes.GetIPNetCanonical(nw).String()
+		hip, _ := lntypes.GetHostPartIP(nw.IP, nw.Mask)
 		if hip.IsGlobalUnicast() {
 		if hip.IsGlobalUnicast() {
 			ipamV4Conf.Gateway = nw.IP.String()
 			ipamV4Conf.Gateway = nw.IP.String()
 		}
 		}
@@ -947,11 +1026,69 @@ func (daemon *Daemon) conditionalMountOnStart(container *container.Container) er
 
 
 // conditionalUnmountOnCleanup is a platform specific helper function called
 // conditionalUnmountOnCleanup is a platform specific helper function called
 // during the cleanup of a container to unmount.
 // during the cleanup of a container to unmount.
-func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) {
-	daemon.Unmount(container)
+func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error {
+	return daemon.Unmount(container)
 }
 }
 
 
 func restoreCustomImage(is image.Store, ls layer.Store, rs reference.Store) error {
 func restoreCustomImage(is image.Store, ls layer.Store, rs reference.Store) error {
 	// Unix has no custom images to register
 	// Unix has no custom images to register
 	return nil
 	return nil
 }
 }
+
+func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
+	if !c.IsRunning() {
+		return nil, errNotRunning{c.ID}
+	}
+	stats, err := daemon.containerd.Stats(c.ID)
+	if err != nil {
+		return nil, err
+	}
+	s := &types.StatsJSON{}
+	cgs := stats.CgroupStats
+	if cgs != nil {
+		s.BlkioStats = types.BlkioStats{
+			IoServiceBytesRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceBytesRecursive),
+			IoServicedRecursive:     copyBlkioEntry(cgs.BlkioStats.IoServicedRecursive),
+			IoQueuedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoQueuedRecursive),
+			IoServiceTimeRecursive:  copyBlkioEntry(cgs.BlkioStats.IoServiceTimeRecursive),
+			IoWaitTimeRecursive:     copyBlkioEntry(cgs.BlkioStats.IoWaitTimeRecursive),
+			IoMergedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoMergedRecursive),
+			IoTimeRecursive:         copyBlkioEntry(cgs.BlkioStats.IoTimeRecursive),
+			SectorsRecursive:        copyBlkioEntry(cgs.BlkioStats.SectorsRecursive),
+		}
+		cpu := cgs.CpuStats
+		s.CPUStats = types.CPUStats{
+			CPUUsage: types.CPUUsage{
+				TotalUsage:        cpu.CpuUsage.TotalUsage,
+				PercpuUsage:       cpu.CpuUsage.PercpuUsage,
+				UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode,
+				UsageInUsermode:   cpu.CpuUsage.UsageInUsermode,
+			},
+			ThrottlingData: types.ThrottlingData{
+				Periods:          cpu.ThrottlingData.Periods,
+				ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods,
+				ThrottledTime:    cpu.ThrottlingData.ThrottledTime,
+			},
+		}
+		mem := cgs.MemoryStats.Usage
+		s.MemoryStats = types.MemoryStats{
+			Usage:    mem.Usage,
+			MaxUsage: mem.MaxUsage,
+			Stats:    cgs.MemoryStats.Stats,
+			Failcnt:  mem.Failcnt,
+		}
+		if cgs.PidsStats != nil {
+			s.PidsStats = types.PidsStats{
+				Current: cgs.PidsStats.Current,
+			}
+		}
+	}
+	s.Read = time.Unix(int64(stats.Timestamp), 0)
+	return s, nil
+}
+
+// setDefaultIsolation determine the default isolation mode for the
+// daemon to run in. This is only applicable on Windows
+func (daemon *Daemon) setDefaultIsolation() error {
+	return nil
+}

+ 0 - 3
daemon/delete.go

@@ -129,9 +129,6 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo
 		return fmt.Errorf("Driver %s failed to remove root filesystem %s: %s", daemon.GraphDriverName(), container.ID, err)
 		return fmt.Errorf("Driver %s failed to remove root filesystem %s: %s", daemon.GraphDriverName(), container.ID, err)
 	}
 	}
 
 
-	if err = daemon.execDriver.Clean(container.ID); err != nil {
-		return fmt.Errorf("Unable to remove execdriver data for %s: %s", container.ID, err)
-	}
 	return nil
 	return nil
 }
 }
 
 

+ 32 - 107
daemon/exec.go

@@ -11,10 +11,9 @@ import (
 	"github.com/Sirupsen/logrus"
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/daemon/exec"
 	"github.com/docker/docker/daemon/exec"
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/errors"
 	"github.com/docker/docker/errors"
+	"github.com/docker/docker/libcontainerd"
 	"github.com/docker/docker/pkg/pools"
 	"github.com/docker/docker/pkg/pools"
-	"github.com/docker/docker/pkg/promise"
 	"github.com/docker/docker/pkg/term"
 	"github.com/docker/docker/pkg/term"
 	"github.com/docker/engine-api/types"
 	"github.com/docker/engine-api/types"
 	"github.com/docker/engine-api/types/strslice"
 	"github.com/docker/engine-api/types/strslice"
@@ -106,33 +105,31 @@ func (d *Daemon) ContainerExecCreate(config *types.ExecConfig) (string, error) {
 		}
 		}
 	}
 	}
 
 
-	processConfig := &execdriver.ProcessConfig{
-		CommonProcessConfig: execdriver.CommonProcessConfig{
-			Tty:        config.Tty,
-			Entrypoint: entrypoint,
-			Arguments:  args,
-		},
-	}
-	setPlatformSpecificExecProcessConfig(config, container, processConfig)
-
 	execConfig := exec.NewConfig()
 	execConfig := exec.NewConfig()
 	execConfig.OpenStdin = config.AttachStdin
 	execConfig.OpenStdin = config.AttachStdin
 	execConfig.OpenStdout = config.AttachStdout
 	execConfig.OpenStdout = config.AttachStdout
 	execConfig.OpenStderr = config.AttachStderr
 	execConfig.OpenStderr = config.AttachStderr
-	execConfig.ProcessConfig = processConfig
 	execConfig.ContainerID = container.ID
 	execConfig.ContainerID = container.ID
 	execConfig.DetachKeys = keys
 	execConfig.DetachKeys = keys
+	execConfig.Entrypoint = entrypoint
+	execConfig.Args = args
+	execConfig.Tty = config.Tty
+	execConfig.Privileged = config.Privileged
+	execConfig.User = config.User
+	if len(execConfig.User) == 0 {
+		execConfig.User = container.Config.User
+	}
 
 
 	d.registerExecCommand(container, execConfig)
 	d.registerExecCommand(container, execConfig)
 
 
-	d.LogContainerEvent(container, "exec_create: "+execConfig.ProcessConfig.Entrypoint+" "+strings.Join(execConfig.ProcessConfig.Arguments, " "))
+	d.LogContainerEvent(container, "exec_create: "+execConfig.Entrypoint+" "+strings.Join(execConfig.Args, " "))
 
 
 	return execConfig.ID, nil
 	return execConfig.ID, nil
 }
 }
 
 
 // ContainerExecStart starts a previously set up exec instance. The
 // ContainerExecStart starts a previously set up exec instance. The
 // std streams are set up.
 // std streams are set up.
-func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) error {
+func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) (err error) {
 	var (
 	var (
 		cStdin           io.ReadCloser
 		cStdin           io.ReadCloser
 		cStdout, cStderr io.Writer
 		cStdout, cStderr io.Writer
@@ -155,11 +152,18 @@ func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io.
 		return fmt.Errorf("Error: Exec command %s is already running", ec.ID)
 		return fmt.Errorf("Error: Exec command %s is already running", ec.ID)
 	}
 	}
 	ec.Running = true
 	ec.Running = true
+	defer func() {
+		if err != nil {
+			ec.Running = false
+			exitCode := 126
+			ec.ExitCode = &exitCode
+		}
+	}()
 	ec.Unlock()
 	ec.Unlock()
 
 
 	c := d.containers.Get(ec.ContainerID)
 	c := d.containers.Get(ec.ContainerID)
 	logrus.Debugf("starting exec command %s in container %s", ec.ID, c.ID)
 	logrus.Debugf("starting exec command %s in container %s", ec.ID, c.ID)
-	d.LogContainerEvent(c, "exec_start: "+ec.ProcessConfig.Entrypoint+" "+strings.Join(ec.ProcessConfig.Arguments, " "))
+	d.LogContainerEvent(c, "exec_start: "+ec.Entrypoint+" "+strings.Join(ec.Args, " "))
 
 
 	if ec.OpenStdin && stdin != nil {
 	if ec.OpenStdin && stdin != nil {
 		r, w := io.Pipe()
 		r, w := io.Pipe()
@@ -183,56 +187,26 @@ func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io.
 		ec.NewNopInputPipe()
 		ec.NewNopInputPipe()
 	}
 	}
 
 
-	attachErr := container.AttachStreams(context.Background(), ec.StreamConfig, ec.OpenStdin, true, ec.ProcessConfig.Tty, cStdin, cStdout, cStderr, ec.DetachKeys)
-
-	execErr := make(chan error)
-
-	// Note, the ExecConfig data will be removed when the container
-	// itself is deleted.  This allows us to query it (for things like
-	// the exitStatus) even after the cmd is done running.
-
-	go func() {
-		execErr <- d.containerExec(c, ec)
-	}()
+	p := libcontainerd.Process{
+		Args:     append([]string{ec.Entrypoint}, ec.Args...),
+		Terminal: ec.Tty,
+	}
 
 
-	select {
-	case err := <-attachErr:
-		if err != nil {
-			return fmt.Errorf("attach failed with error: %v", err)
-		}
+	if err := execSetPlatformOpt(c, ec, &p); err != nil {
 		return nil
 		return nil
-	case err := <-execErr:
-		if aErr := <-attachErr; aErr != nil && err == nil {
-			return fmt.Errorf("attach failed with error: %v", aErr)
-		}
-		if err == nil {
-			return nil
-		}
-
-		// Maybe the container stopped while we were trying to exec
-		if !c.IsRunning() {
-			return fmt.Errorf("container stopped while running exec: %s", c.ID)
-		}
-		return fmt.Errorf("Cannot run exec command %s in container %s: %s", ec.ID, c.ID, err)
 	}
 	}
-}
 
 
-// Exec calls the underlying exec driver to run
-func (d *Daemon) Exec(c *container.Container, execConfig *exec.Config, pipes *execdriver.Pipes, startCallback execdriver.DriverCallback) (int, error) {
-	hooks := execdriver.Hooks{
-		Start: startCallback,
-	}
-	exitStatus, err := d.execDriver.Exec(c.Command, execConfig.ProcessConfig, pipes, hooks)
+	attachErr := container.AttachStreams(context.Background(), ec.StreamConfig, ec.OpenStdin, true, ec.Tty, cStdin, cStdout, cStderr, ec.DetachKeys)
 
 
-	// On err, make sure we don't leave ExitCode at zero
-	if err != nil && exitStatus == 0 {
-		exitStatus = 128
+	if err := d.containerd.AddProcess(c.ID, name, p); err != nil {
+		return err
 	}
 	}
 
 
-	execConfig.ExitCode = &exitStatus
-	execConfig.Running = false
-
-	return exitStatus, err
+	err = <-attachErr
+	if err != nil {
+		return fmt.Errorf("attach failed with error: %v", err)
+	}
+	return nil
 }
 }
 
 
 // execCommandGC runs a ticker to clean up the daemon references
 // execCommandGC runs a ticker to clean up the daemon references
@@ -270,52 +244,3 @@ func (d *Daemon) containerExecIds() map[string]struct{} {
 	}
 	}
 	return ids
 	return ids
 }
 }
-
-func (d *Daemon) containerExec(container *container.Container, ec *exec.Config) error {
-	container.Lock()
-	defer container.Unlock()
-
-	callback := func(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error {
-		if processConfig.Tty {
-			// The callback is called after the process Start()
-			// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlave
-			// which we close here.
-			if c, ok := processConfig.Stdout.(io.Closer); ok {
-				c.Close()
-			}
-		}
-		ec.Close()
-		return nil
-	}
-
-	// We use a callback here instead of a goroutine and an chan for
-	// synchronization purposes
-	cErr := promise.Go(func() error { return d.monitorExec(container, ec, callback) })
-	return ec.Wait(cErr)
-}
-
-func (d *Daemon) monitorExec(container *container.Container, execConfig *exec.Config, callback execdriver.DriverCallback) error {
-	pipes := execdriver.NewPipes(execConfig.Stdin(), execConfig.Stdout(), execConfig.Stderr(), execConfig.OpenStdin)
-	exitCode, err := d.Exec(container, execConfig, pipes, callback)
-	if err != nil {
-		logrus.Errorf("Error running command in existing container %s: %s", container.ID, err)
-	}
-	logrus.Debugf("Exec task in container %s exited with code %d", container.ID, exitCode)
-
-	if err := execConfig.CloseStreams(); err != nil {
-		logrus.Errorf("%s: %s", container.ID, err)
-	}
-
-	if execConfig.ProcessConfig.Terminal != nil {
-		if err := execConfig.WaitResize(); err != nil {
-			logrus.Errorf("Error waiting for resize: %v", err)
-		}
-		if err := execConfig.ProcessConfig.Terminal.Close(); err != nil {
-			logrus.Errorf("Error closing terminal while running in container %s: %s", container.ID, err)
-		}
-	}
-	// remove the exec command from the container's store only and not the
-	// daemon's store so that the exec command can be inspected.
-	container.ExecCommands.Delete(execConfig.ID)
-	return err
-}

+ 14 - 63
daemon/exec/exec.go

@@ -1,11 +1,8 @@
 package exec
 package exec
 
 
 import (
 import (
-	"fmt"
 	"sync"
 	"sync"
-	"time"
 
 
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/pkg/stringid"
 	"github.com/docker/docker/pkg/stringid"
 	"github.com/docker/docker/runconfig"
 	"github.com/docker/docker/runconfig"
 )
 )
@@ -16,22 +13,20 @@ import (
 type Config struct {
 type Config struct {
 	sync.Mutex
 	sync.Mutex
 	*runconfig.StreamConfig
 	*runconfig.StreamConfig
-	ID            string
-	Running       bool
-	ExitCode      *int
-	ProcessConfig *execdriver.ProcessConfig
-	OpenStdin     bool
-	OpenStderr    bool
-	OpenStdout    bool
-	CanRemove     bool
-	ContainerID   string
-	DetachKeys    []byte
-
-	// waitStart will be closed immediately after the exec is really started.
-	waitStart chan struct{}
-
-	// waitResize will be closed after Resize is finished.
-	waitResize chan struct{}
+	ID          string
+	Running     bool
+	ExitCode    *int
+	OpenStdin   bool
+	OpenStderr  bool
+	OpenStdout  bool
+	CanRemove   bool
+	ContainerID string
+	DetachKeys  []byte
+	Entrypoint  string
+	Args        []string
+	Tty         bool
+	Privileged  bool
+	User        string
 }
 }
 
 
 // NewConfig initializes the a new exec configuration
 // NewConfig initializes the a new exec configuration
@@ -39,8 +34,6 @@ func NewConfig() *Config {
 	return &Config{
 	return &Config{
 		ID:           stringid.GenerateNonCryptoID(),
 		ID:           stringid.GenerateNonCryptoID(),
 		StreamConfig: runconfig.NewStreamConfig(),
 		StreamConfig: runconfig.NewStreamConfig(),
-		waitStart:    make(chan struct{}),
-		waitResize:   make(chan struct{}),
 	}
 	}
 }
 }
 
 
@@ -98,45 +91,3 @@ func (e *Store) List() []string {
 	e.RUnlock()
 	e.RUnlock()
 	return IDs
 	return IDs
 }
 }
-
-// Wait waits until the exec process finishes or there is an error in the error channel.
-func (c *Config) Wait(cErr chan error) error {
-	// Exec should not return until the process is actually running
-	select {
-	case <-c.waitStart:
-	case err := <-cErr:
-		return err
-	}
-	return nil
-}
-
-// WaitResize waits until terminal resize finishes or time out.
-func (c *Config) WaitResize() error {
-	select {
-	case <-c.waitResize:
-	case <-time.After(time.Second):
-		return fmt.Errorf("Terminal resize for exec %s time out.", c.ID)
-	}
-	return nil
-}
-
-// Close closes the wait channel for the progress.
-func (c *Config) Close() {
-	close(c.waitStart)
-}
-
-// CloseResize closes the wait channel for resizing terminal.
-func (c *Config) CloseResize() {
-	close(c.waitResize)
-}
-
-// Resize changes the size of the terminal for the exec process.
-func (c *Config) Resize(h, w int) error {
-	defer c.CloseResize()
-	select {
-	case <-c.waitStart:
-	case <-time.After(time.Second):
-		return fmt.Errorf("Exec %s is not running, so it can not be resized.", c.ID)
-	}
-	return c.ProcessConfig.Terminal.Resize(h, w)
-}

+ 26 - 0
daemon/exec_linux.go

@@ -0,0 +1,26 @@
+package daemon
+
+import (
+	"github.com/docker/docker/container"
+	"github.com/docker/docker/daemon/caps"
+	"github.com/docker/docker/daemon/exec"
+	"github.com/docker/docker/libcontainerd"
+)
+
+func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainerd.Process) error {
+	if len(ec.User) > 0 {
+		uid, gid, additionalGids, err := getUser(c, ec.User)
+		if err != nil {
+			return err
+		}
+		p.User = &libcontainerd.User{
+			UID:            uid,
+			GID:            gid,
+			AdditionalGids: additionalGids,
+		}
+	}
+	if ec.Privileged {
+		p.Capabilities = caps.GetAllCapabilities()
+	}
+	return nil
+}

+ 0 - 21
daemon/exec_unix.go

@@ -1,21 +0,0 @@
-// +build linux freebsd
-
-package daemon
-
-import (
-	"github.com/docker/docker/container"
-	"github.com/docker/docker/daemon/execdriver"
-	"github.com/docker/engine-api/types"
-)
-
-// setPlatformSpecificExecProcessConfig sets platform-specific fields in the
-// ProcessConfig structure.
-func setPlatformSpecificExecProcessConfig(config *types.ExecConfig, container *container.Container, pc *execdriver.ProcessConfig) {
-	user := config.User
-	if len(user) == 0 {
-		user = container.Config.User
-	}
-
-	pc.User = user
-	pc.Privileged = config.Privileged
-}

+ 0 - 1
daemon/info.go

@@ -84,7 +84,6 @@ func (daemon *Daemon) SystemInfo() (*types.Info, error) {
 		NFd:                fileutils.GetTotalUsedFds(),
 		NFd:                fileutils.GetTotalUsedFds(),
 		NGoroutines:        runtime.NumGoroutine(),
 		NGoroutines:        runtime.NumGoroutine(),
 		SystemTime:         time.Now().Format(time.RFC3339Nano),
 		SystemTime:         time.Now().Format(time.RFC3339Nano),
-		ExecutionDriver:    daemon.ExecutionDriver().Name(),
 		LoggingDriver:      daemon.defaultLogConfig.Type,
 		LoggingDriver:      daemon.defaultLogConfig.Type,
 		CgroupDriver:       daemon.getCgroupDriver(),
 		CgroupDriver:       daemon.getCgroupDriver(),
 		NEventsListener:    daemon.EventsService.SubscribersCount(),
 		NEventsListener:    daemon.EventsService.SubscribersCount(),

+ 5 - 5
daemon/inspect_unix.go

@@ -82,10 +82,10 @@ func addMountPoints(container *container.Container) []types.MountPoint {
 
 
 func inspectExecProcessConfig(e *exec.Config) *backend.ExecProcessConfig {
 func inspectExecProcessConfig(e *exec.Config) *backend.ExecProcessConfig {
 	return &backend.ExecProcessConfig{
 	return &backend.ExecProcessConfig{
-		Tty:        e.ProcessConfig.Tty,
-		Entrypoint: e.ProcessConfig.Entrypoint,
-		Arguments:  e.ProcessConfig.Arguments,
-		Privileged: &e.ProcessConfig.Privileged,
-		User:       e.ProcessConfig.User,
+		Tty:        e.Tty,
+		Entrypoint: e.Entrypoint,
+		Arguments:  e.Args,
+		Privileged: &e.Privileged,
+		User:       e.User,
 	}
 	}
 }
 }

+ 4 - 0
daemon/kill.go

@@ -69,6 +69,10 @@ func (daemon *Daemon) killWithSignal(container *container.Container, sig int) er
 
 
 	container.ExitOnNext()
 	container.ExitOnNext()
 
 
+	if !daemon.IsShuttingDown() {
+		container.HasBeenManuallyStopped = true
+	}
+
 	// if the container is currently restarting we do not need to send the signal
 	// if the container is currently restarting we do not need to send the signal
 	// to the process.  Telling the monitor that it should exit on it's next event
 	// to the process.  Telling the monitor that it should exit on it's next event
 	// loop is enough
 	// loop is enough

+ 143 - 0
daemon/monitor.go

@@ -0,0 +1,143 @@
+package daemon
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"runtime"
+	"strconv"
+
+	"github.com/Sirupsen/logrus"
+	"github.com/docker/docker/libcontainerd"
+	"github.com/docker/docker/runconfig"
+)
+
+// StateChanged updates daemon state changes from containerd
+func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
+	c := daemon.containers.Get(id)
+	if c == nil {
+		return fmt.Errorf("no such container: %s", id)
+	}
+
+	switch e.State {
+	case libcontainerd.StateOOM:
+		// StateOOM is Linux specific and should never be hit on Windows
+		if runtime.GOOS == "windows" {
+			return errors.New("Received StateOOM from libcontainerd on Windows. This should never happen.")
+		}
+		daemon.LogContainerEvent(c, "oom")
+	case libcontainerd.StateExit:
+		c.Lock()
+		defer c.Unlock()
+		c.Wait()
+		c.Reset(false)
+		c.SetStopped(platformConstructExitStatus(e))
+		attributes := map[string]string{
+			"exitCode": strconv.Itoa(int(e.ExitCode)),
+		}
+		daemon.LogContainerEventWithAttributes(c, "die", attributes)
+		daemon.Cleanup(c)
+		// FIXME: here is race condition between two RUN instructions in Dockerfile
+		// because they share same runconfig and change image. Must be fixed
+		// in builder/builder.go
+		return c.ToDisk()
+	case libcontainerd.StateRestart:
+		c.Lock()
+		defer c.Unlock()
+		c.Reset(false)
+		c.RestartCount++
+		c.SetRestarting(platformConstructExitStatus(e))
+		attributes := map[string]string{
+			"exitCode": strconv.Itoa(int(e.ExitCode)),
+		}
+		daemon.LogContainerEventWithAttributes(c, "die", attributes)
+		return c.ToDisk()
+	case libcontainerd.StateExitProcess:
+		c.Lock()
+		defer c.Unlock()
+		if execConfig := c.ExecCommands.Get(e.ProcessID); execConfig != nil {
+			ec := int(e.ExitCode)
+			execConfig.ExitCode = &ec
+			execConfig.Running = false
+			execConfig.Wait()
+			if err := execConfig.CloseStreams(); err != nil {
+				logrus.Errorf("%s: %s", c.ID, err)
+			}
+
+			// remove the exec command from the container's store only and not the
+			// daemon's store so that the exec command can be inspected.
+			c.ExecCommands.Delete(execConfig.ID)
+		} else {
+			logrus.Warnf("Ignoring StateExitProcess for %v but no exec command found", e)
+		}
+	case libcontainerd.StateStart, libcontainerd.StateRestore:
+		c.SetRunning(int(e.Pid), e.State == libcontainerd.StateStart)
+		c.HasBeenManuallyStopped = false
+		if err := c.ToDisk(); err != nil {
+			c.Reset(false)
+			return err
+		}
+	case libcontainerd.StatePause:
+		c.Paused = true
+		daemon.LogContainerEvent(c, "pause")
+	case libcontainerd.StateResume:
+		c.Paused = false
+		daemon.LogContainerEvent(c, "unpause")
+	}
+
+	return nil
+}
+
+// AttachStreams is called by libcontainerd to connect the stdio.
+func (daemon *Daemon) AttachStreams(id string, iop libcontainerd.IOPipe) error {
+	var s *runconfig.StreamConfig
+	c := daemon.containers.Get(id)
+	if c == nil {
+		ec, err := daemon.getExecConfig(id)
+		if err != nil {
+			return fmt.Errorf("no such exec/container: %s", id)
+		}
+		s = ec.StreamConfig
+	} else {
+		s = c.StreamConfig
+		if err := daemon.StartLogging(c); err != nil {
+			c.Reset(false)
+			return err
+		}
+	}
+
+	if stdin := s.Stdin(); stdin != nil {
+		if iop.Stdin != nil {
+			go func() {
+				io.Copy(iop.Stdin, stdin)
+				iop.Stdin.Close()
+			}()
+		}
+	} else {
+		if c != nil && !c.Config.Tty {
+			// tty is enabled, so dont close containerd's iopipe stdin.
+			if iop.Stdin != nil {
+				iop.Stdin.Close()
+			}
+		}
+	}
+
+	copy := func(w io.Writer, r io.Reader) {
+		s.Add(1)
+		go func() {
+			if _, err := io.Copy(w, r); err != nil {
+				logrus.Errorf("%v stream copy error: %v", id, err)
+			}
+			s.Done()
+		}()
+	}
+
+	if iop.Stdout != nil {
+		copy(s.Stdout(), iop.Stdout)
+	}
+	if iop.Stderr != nil {
+		copy(s.Stderr(), iop.Stderr)
+	}
+
+	return nil
+}

+ 14 - 0
daemon/monitor_linux.go

@@ -0,0 +1,14 @@
+package daemon
+
+import (
+	"github.com/docker/docker/container"
+	"github.com/docker/docker/libcontainerd"
+)
+
+// platformConstructExitStatus returns a platform specific exit status structure
+func platformConstructExitStatus(e libcontainerd.StateInfo) *container.ExitStatus {
+	return &container.ExitStatus{
+		ExitCode:  int(e.ExitCode),
+		OOMKilled: e.OOMKilled,
+	}
+}

+ 652 - 0
daemon/oci_linux.go

@@ -0,0 +1,652 @@
+package daemon
+
+import (
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+
+	"github.com/docker/docker/container"
+	"github.com/docker/docker/daemon/caps"
+	"github.com/docker/docker/libcontainerd"
+	"github.com/docker/docker/oci"
+	"github.com/docker/docker/pkg/idtools"
+	"github.com/docker/docker/pkg/mount"
+	"github.com/docker/docker/pkg/stringutils"
+	"github.com/docker/docker/pkg/symlink"
+	"github.com/docker/docker/volume"
+	containertypes "github.com/docker/engine-api/types/container"
+	"github.com/opencontainers/runc/libcontainer/apparmor"
+	"github.com/opencontainers/runc/libcontainer/devices"
+	"github.com/opencontainers/runc/libcontainer/user"
+	"github.com/opencontainers/specs/specs-go"
+)
+
+func setResources(s *specs.Spec, r containertypes.Resources) error {
+	weightDevices, err := getBlkioWeightDevices(r)
+	if err != nil {
+		return err
+	}
+	readBpsDevice, err := getBlkioReadBpsDevices(r)
+	if err != nil {
+		return err
+	}
+	writeBpsDevice, err := getBlkioWriteBpsDevices(r)
+	if err != nil {
+		return err
+	}
+	readIOpsDevice, err := getBlkioReadIOpsDevices(r)
+	if err != nil {
+		return err
+	}
+	writeIOpsDevice, err := getBlkioWriteIOpsDevices(r)
+	if err != nil {
+		return err
+	}
+
+	memoryRes := getMemoryResources(r)
+	cpuRes := getCPUResources(r)
+	blkioWeight := r.BlkioWeight
+
+	specResources := &specs.Resources{
+		Memory: memoryRes,
+		CPU:    cpuRes,
+		BlockIO: &specs.BlockIO{
+			Weight:                  &blkioWeight,
+			WeightDevice:            weightDevices,
+			ThrottleReadBpsDevice:   readBpsDevice,
+			ThrottleWriteBpsDevice:  writeBpsDevice,
+			ThrottleReadIOPSDevice:  readIOpsDevice,
+			ThrottleWriteIOPSDevice: writeIOpsDevice,
+		},
+		DisableOOMKiller: r.OomKillDisable,
+		Pids: &specs.Pids{
+			Limit: &r.PidsLimit,
+		},
+	}
+
+	if s.Linux.Resources != nil && len(s.Linux.Resources.Devices) > 0 {
+		specResources.Devices = s.Linux.Resources.Devices
+	}
+
+	s.Linux.Resources = specResources
+	return nil
+}
+
+func setDevices(s *specs.Spec, c *container.Container) error {
+	// Build lists of devices allowed and created within the container.
+	var devs []specs.Device
+	if c.HostConfig.Privileged {
+		hostDevices, err := devices.HostDevices()
+		if err != nil {
+			return err
+		}
+		for _, d := range hostDevices {
+			devs = append(devs, specDevice(d))
+		}
+	} else {
+		for _, deviceMapping := range c.HostConfig.Devices {
+			d, err := getDevicesFromPath(deviceMapping)
+			if err != nil {
+				return err
+			}
+
+			devs = append(devs, d...)
+		}
+	}
+
+	s.Linux.Devices = append(s.Linux.Devices, devs...)
+	return nil
+}
+
+func setRlimits(daemon *Daemon, s *specs.Spec, c *container.Container) error {
+	var rlimits []specs.Rlimit
+
+	ulimits := c.HostConfig.Ulimits
+	// Merge ulimits with daemon defaults
+	ulIdx := make(map[string]struct{})
+	for _, ul := range ulimits {
+		ulIdx[ul.Name] = struct{}{}
+	}
+	for name, ul := range daemon.configStore.Ulimits {
+		if _, exists := ulIdx[name]; !exists {
+			ulimits = append(ulimits, ul)
+		}
+	}
+
+	for _, ul := range ulimits {
+		rlimits = append(rlimits, specs.Rlimit{
+			Type: "RLIMIT_" + strings.ToUpper(ul.Name),
+			Soft: uint64(ul.Soft),
+			Hard: uint64(ul.Hard),
+		})
+	}
+
+	s.Process.Rlimits = rlimits
+	return nil
+}
+
+func setUser(s *specs.Spec, c *container.Container) error {
+	uid, gid, additionalGids, err := getUser(c, c.Config.User)
+	if err != nil {
+		return err
+	}
+	s.Process.User.UID = uid
+	s.Process.User.GID = gid
+	s.Process.User.AdditionalGids = additionalGids
+	return nil
+}
+
+func readUserFile(c *container.Container, p string) (io.ReadCloser, error) {
+	fp, err := symlink.FollowSymlinkInScope(filepath.Join(c.BaseFS, p), c.BaseFS)
+	if err != nil {
+		return nil, err
+	}
+	return os.Open(fp)
+}
+
+func getUser(c *container.Container, username string) (uint32, uint32, []uint32, error) {
+	passwdPath, err := user.GetPasswdPath()
+	if err != nil {
+		return 0, 0, nil, err
+	}
+	groupPath, err := user.GetGroupPath()
+	if err != nil {
+		return 0, 0, nil, err
+	}
+	passwdFile, err := readUserFile(c, passwdPath)
+	if err == nil {
+		defer passwdFile.Close()
+	}
+	groupFile, err := readUserFile(c, groupPath)
+	if err == nil {
+		defer groupFile.Close()
+	}
+
+	execUser, err := user.GetExecUser(username, nil, passwdFile, groupFile)
+	if err != nil {
+		return 0, 0, nil, err
+	}
+
+	// todo: fix this double read by a change to libcontainer/user pkg
+	groupFile, err = readUserFile(c, groupPath)
+	if err == nil {
+		defer groupFile.Close()
+	}
+	var addGroups []int
+	if len(c.HostConfig.GroupAdd) > 0 {
+		addGroups, err = user.GetAdditionalGroups(c.HostConfig.GroupAdd, groupFile)
+		if err != nil {
+			return 0, 0, nil, err
+		}
+	}
+	uid := uint32(execUser.Uid)
+	gid := uint32(execUser.Gid)
+	sgids := append(execUser.Sgids, addGroups...)
+	var additionalGids []uint32
+	for _, g := range sgids {
+		additionalGids = append(additionalGids, uint32(g))
+	}
+	return uid, gid, additionalGids, nil
+}
+
+func setNamespace(s *specs.Spec, ns specs.Namespace) {
+	for i, n := range s.Linux.Namespaces {
+		if n.Type == ns.Type {
+			s.Linux.Namespaces[i] = ns
+			return
+		}
+	}
+	s.Linux.Namespaces = append(s.Linux.Namespaces, ns)
+}
+
+func setCapabilities(s *specs.Spec, c *container.Container) error {
+	var caplist []string
+	var err error
+	if c.HostConfig.Privileged {
+		caplist = caps.GetAllCapabilities()
+	} else {
+		caplist, err = caps.TweakCapabilities(s.Process.Capabilities, c.HostConfig.CapAdd, c.HostConfig.CapDrop)
+		if err != nil {
+			return err
+		}
+	}
+	s.Process.Capabilities = caplist
+	return nil
+}
+
+func delNamespace(s *specs.Spec, nsType specs.NamespaceType) {
+	idx := -1
+	for i, n := range s.Linux.Namespaces {
+		if n.Type == nsType {
+			idx = i
+		}
+	}
+	if idx >= 0 {
+		s.Linux.Namespaces = append(s.Linux.Namespaces[:idx], s.Linux.Namespaces[idx+1:]...)
+	}
+}
+
+func setNamespaces(daemon *Daemon, s *specs.Spec, c *container.Container) error {
+	// network
+	if !c.Config.NetworkDisabled {
+		ns := specs.Namespace{Type: "network"}
+		parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2)
+		if parts[0] == "container" {
+			nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer())
+			if err != nil {
+				return err
+			}
+			ns.Path = fmt.Sprintf("/proc/%d/ns/net", nc.State.GetPID())
+		} else if c.HostConfig.NetworkMode.IsHost() {
+			ns.Path = c.NetworkSettings.SandboxKey
+		}
+		setNamespace(s, ns)
+	}
+	// ipc
+	if c.HostConfig.IpcMode.IsContainer() {
+		ns := specs.Namespace{Type: "ipc"}
+		ic, err := daemon.getIpcContainer(c)
+		if err != nil {
+			return err
+		}
+		ns.Path = fmt.Sprintf("/proc/%d/ns/ipc", ic.State.GetPID())
+		setNamespace(s, ns)
+	} else if c.HostConfig.IpcMode.IsHost() {
+		delNamespace(s, specs.NamespaceType("ipc"))
+	} else {
+		ns := specs.Namespace{Type: "ipc"}
+		setNamespace(s, ns)
+	}
+	// pid
+	if c.HostConfig.PidMode.IsHost() {
+		delNamespace(s, specs.NamespaceType("pid"))
+	}
+	// uts
+	if c.HostConfig.UTSMode.IsHost() {
+		delNamespace(s, specs.NamespaceType("uts"))
+		s.Hostname = ""
+	}
+	// user
+	if c.HostConfig.UsernsMode.IsPrivate() {
+		uidMap, gidMap := daemon.GetUIDGIDMaps()
+		if uidMap != nil {
+			ns := specs.Namespace{Type: "user"}
+			setNamespace(s, ns)
+			s.Linux.UIDMappings = specMapping(uidMap)
+			s.Linux.GIDMappings = specMapping(gidMap)
+		}
+	}
+
+	return nil
+}
+
+func specMapping(s []idtools.IDMap) []specs.IDMapping {
+	var ids []specs.IDMapping
+	for _, item := range s {
+		ids = append(ids, specs.IDMapping{
+			HostID:      uint32(item.HostID),
+			ContainerID: uint32(item.ContainerID),
+			Size:        uint32(item.Size),
+		})
+	}
+	return ids
+}
+
+func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
+	for _, m := range mountinfo {
+		if m.Mountpoint == dir {
+			return m
+		}
+	}
+	return nil
+}
+
+// Get the source mount point of directory passed in as argument. Also return
+// optional fields.
+func getSourceMount(source string) (string, string, error) {
+	// Ensure any symlinks are resolved.
+	sourcePath, err := filepath.EvalSymlinks(source)
+	if err != nil {
+		return "", "", err
+	}
+
+	mountinfos, err := mount.GetMounts()
+	if err != nil {
+		return "", "", err
+	}
+
+	mountinfo := getMountInfo(mountinfos, sourcePath)
+	if mountinfo != nil {
+		return sourcePath, mountinfo.Optional, nil
+	}
+
+	path := sourcePath
+	for {
+		path = filepath.Dir(path)
+
+		mountinfo = getMountInfo(mountinfos, path)
+		if mountinfo != nil {
+			return path, mountinfo.Optional, nil
+		}
+
+		if path == "/" {
+			break
+		}
+	}
+
+	// If we are here, we did not find parent mount. Something is wrong.
+	return "", "", fmt.Errorf("Could not find source mount of %s", source)
+}
+
+// Ensure mount point on which path is mounted, is shared.
+func ensureShared(path string) error {
+	sharedMount := false
+
+	sourceMount, optionalOpts, err := getSourceMount(path)
+	if err != nil {
+		return err
+	}
+	// Make sure source mount point is shared.
+	optsSplit := strings.Split(optionalOpts, " ")
+	for _, opt := range optsSplit {
+		if strings.HasPrefix(opt, "shared:") {
+			sharedMount = true
+			break
+		}
+	}
+
+	if !sharedMount {
+		return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount)
+	}
+	return nil
+}
+
+// Ensure mount point on which path is mounted, is either shared or slave.
+func ensureSharedOrSlave(path string) error {
+	sharedMount := false
+	slaveMount := false
+
+	sourceMount, optionalOpts, err := getSourceMount(path)
+	if err != nil {
+		return err
+	}
+	// Make sure source mount point is shared.
+	optsSplit := strings.Split(optionalOpts, " ")
+	for _, opt := range optsSplit {
+		if strings.HasPrefix(opt, "shared:") {
+			sharedMount = true
+			break
+		} else if strings.HasPrefix(opt, "master:") {
+			slaveMount = true
+			break
+		}
+	}
+
+	if !sharedMount && !slaveMount {
+		return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount)
+	}
+	return nil
+}
+
+var (
+	mountPropagationMap = map[string]int{
+		"private":  mount.PRIVATE,
+		"rprivate": mount.RPRIVATE,
+		"shared":   mount.SHARED,
+		"rshared":  mount.RSHARED,
+		"slave":    mount.SLAVE,
+		"rslave":   mount.RSLAVE,
+	}
+
+	mountPropagationReverseMap = map[int]string{
+		mount.PRIVATE:  "private",
+		mount.RPRIVATE: "rprivate",
+		mount.SHARED:   "shared",
+		mount.RSHARED:  "rshared",
+		mount.SLAVE:    "slave",
+		mount.RSLAVE:   "rslave",
+	}
+)
+
+func setMounts(daemon *Daemon, s *specs.Spec, c *container.Container, mounts []container.Mount) error {
+	userMounts := make(map[string]struct{})
+	for _, m := range mounts {
+		userMounts[m.Destination] = struct{}{}
+	}
+
+	// Filter out mounts that are overriden by user supplied mounts
+	var defaultMounts []specs.Mount
+	_, mountDev := userMounts["/dev"]
+	for _, m := range s.Mounts {
+		if _, ok := userMounts[m.Destination]; !ok {
+			if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
+				continue
+			}
+			defaultMounts = append(defaultMounts, m)
+		}
+	}
+
+	s.Mounts = defaultMounts
+	for _, m := range mounts {
+		for _, cm := range s.Mounts {
+			if cm.Destination == m.Destination {
+				return fmt.Errorf("Duplicate mount point '%s'", m.Destination)
+			}
+		}
+
+		if m.Source == "tmpfs" {
+			opt := []string{"noexec", "nosuid", "nodev", volume.DefaultPropagationMode}
+			if m.Data != "" {
+				opt = append(opt, strings.Split(m.Data, ",")...)
+			} else {
+				opt = append(opt, "size=65536k")
+			}
+
+			s.Mounts = append(s.Mounts, specs.Mount{Destination: m.Destination, Source: m.Source, Type: "tmpfs", Options: opt})
+			continue
+		}
+
+		mt := specs.Mount{Destination: m.Destination, Source: m.Source, Type: "bind"}
+
+		// Determine property of RootPropagation based on volume
+		// properties. If a volume is shared, then keep root propagation
+		// shared. This should work for slave and private volumes too.
+		//
+		// For slave volumes, it can be either [r]shared/[r]slave.
+		//
+		// For private volumes any root propagation value should work.
+		pFlag := mountPropagationMap[m.Propagation]
+		if pFlag == mount.SHARED || pFlag == mount.RSHARED {
+			if err := ensureShared(m.Source); err != nil {
+				return err
+			}
+			rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
+			if rootpg != mount.SHARED && rootpg != mount.RSHARED {
+				s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.SHARED]
+			}
+		} else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
+			if err := ensureSharedOrSlave(m.Source); err != nil {
+				return err
+			}
+			rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
+			if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
+				s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.RSLAVE]
+			}
+		}
+
+		opts := []string{"rbind"}
+		if !m.Writable {
+			opts = append(opts, "ro")
+		}
+		if pFlag != 0 {
+			opts = append(opts, mountPropagationReverseMap[pFlag])
+		}
+
+		mt.Options = opts
+		s.Mounts = append(s.Mounts, mt)
+	}
+
+	if s.Root.Readonly {
+		for i, m := range s.Mounts {
+			switch m.Destination {
+			case "/proc", "/dev/pts", "/dev/mqueue": // /dev is remounted by runc
+				continue
+			}
+			if _, ok := userMounts[m.Destination]; !ok {
+				if !stringutils.InSlice(m.Options, "ro") {
+					s.Mounts[i].Options = append(s.Mounts[i].Options, "ro")
+				}
+			}
+		}
+	}
+
+	if c.HostConfig.Privileged {
+		if !s.Root.Readonly {
+			// clear readonly for /sys
+			for i := range s.Mounts {
+				if s.Mounts[i].Destination == "/sys" {
+					clearReadOnly(&s.Mounts[i])
+				}
+			}
+		}
+	}
+
+	// TODO: until a kernel/mount solution exists for handling remount in a user namespace,
+	// we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
+	if uidMap, _ := daemon.GetUIDGIDMaps(); uidMap != nil || c.HostConfig.Privileged {
+		for i, m := range s.Mounts {
+			if m.Type == "cgroup" {
+				clearReadOnly(&s.Mounts[i])
+			}
+		}
+	}
+
+	return nil
+}
+
+func (daemon *Daemon) populateCommonSpec(s *specs.Spec, c *container.Container) error {
+	linkedEnv, err := daemon.setupLinkedContainers(c)
+	if err != nil {
+		return err
+	}
+	s.Root = specs.Root{
+		Path:     c.BaseFS,
+		Readonly: c.HostConfig.ReadonlyRootfs,
+	}
+	rootUID, rootGID := daemon.GetRemappedUIDGID()
+	if err := c.SetupWorkingDirectory(rootUID, rootGID); err != nil {
+		return err
+	}
+	cwd := c.Config.WorkingDir
+	if len(cwd) == 0 {
+		cwd = "/"
+	}
+	s.Process.Args = append([]string{c.Path}, c.Args...)
+	s.Process.Cwd = cwd
+	s.Process.Env = c.CreateDaemonEnvironment(linkedEnv)
+	s.Process.Terminal = c.Config.Tty
+	s.Hostname = c.FullHostname()
+
+	return nil
+}
+
+func (daemon *Daemon) createSpec(c *container.Container) (*libcontainerd.Spec, error) {
+	s := oci.DefaultSpec()
+	if err := daemon.populateCommonSpec(&s, c); err != nil {
+		return nil, err
+	}
+
+	var cgroupsPath string
+	if c.HostConfig.CgroupParent != "" {
+		cgroupsPath = filepath.Join(c.HostConfig.CgroupParent, c.ID)
+	} else {
+		defaultCgroupParent := "/docker"
+		if daemon.configStore.CgroupParent != "" {
+			defaultCgroupParent = daemon.configStore.CgroupParent
+		} else if daemon.usingSystemd() {
+			defaultCgroupParent = "system.slice"
+		}
+		cgroupsPath = filepath.Join(defaultCgroupParent, c.ID)
+	}
+	s.Linux.CgroupsPath = &cgroupsPath
+
+	if err := setResources(&s, c.HostConfig.Resources); err != nil {
+		return nil, fmt.Errorf("linux runtime spec resources: %v", err)
+	}
+	s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj
+	if err := setDevices(&s, c); err != nil {
+		return nil, fmt.Errorf("linux runtime spec devices: %v", err)
+	}
+	if err := setRlimits(daemon, &s, c); err != nil {
+		return nil, fmt.Errorf("linux runtime spec rlimits: %v", err)
+	}
+	if err := setUser(&s, c); err != nil {
+		return nil, fmt.Errorf("linux spec user: %v", err)
+	}
+	if err := setNamespaces(daemon, &s, c); err != nil {
+		return nil, fmt.Errorf("linux spec namespaces: %v", err)
+	}
+	if err := setCapabilities(&s, c); err != nil {
+		return nil, fmt.Errorf("linux spec capabilities: %v", err)
+	}
+	if err := setSeccomp(daemon, &s, c); err != nil {
+		return nil, fmt.Errorf("linux seccomp: %v", err)
+	}
+
+	if err := daemon.setupIpcDirs(c); err != nil {
+		return nil, err
+	}
+
+	mounts, err := daemon.setupMounts(c)
+	if err != nil {
+		return nil, err
+	}
+	mounts = append(mounts, c.IpcMounts()...)
+	mounts = append(mounts, c.TmpfsMounts()...)
+	if err := setMounts(daemon, &s, c, mounts); err != nil {
+		return nil, fmt.Errorf("linux mounts: %v", err)
+	}
+
+	for _, ns := range s.Linux.Namespaces {
+		if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled {
+			target, err := os.Readlink(filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe"))
+			if err != nil {
+				return nil, err
+			}
+
+			s.Hooks = specs.Hooks{
+				Prestart: []specs.Hook{{
+					Path: target, // FIXME: cross-platform
+					Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()},
+				}},
+			}
+		}
+	}
+
+	if apparmor.IsEnabled() {
+		appArmorProfile := "docker-default"
+		if c.HostConfig.Privileged {
+			appArmorProfile = "unconfined"
+		} else if len(c.AppArmorProfile) > 0 {
+			appArmorProfile = c.AppArmorProfile
+		}
+		s.Process.ApparmorProfile = appArmorProfile
+	}
+	s.Process.SelinuxLabel = c.GetProcessLabel()
+	s.Process.NoNewPrivileges = c.NoNewPrivileges
+
+	return (*libcontainerd.Spec)(&s), nil
+}
+
+func clearReadOnly(m *specs.Mount) {
+	var opt []string
+	for _, o := range m.Options {
+		if o != "ro" {
+			opt = append(opt, o)
+		}
+	}
+	m.Options = opt
+}

+ 2 - 3
daemon/pause.go

@@ -41,10 +41,9 @@ func (daemon *Daemon) containerPause(container *container.Container) error {
 		return errContainerIsRestarting(container.ID)
 		return errContainerIsRestarting(container.ID)
 	}
 	}
 
 
-	if err := daemon.execDriver.Pause(container.Command); err != nil {
+	if err := daemon.containerd.Pause(container.ID); err != nil {
 		return fmt.Errorf("Cannot pause container %s: %s", container.ID, err)
 		return fmt.Errorf("Cannot pause container %s: %s", container.ID, err)
 	}
 	}
-	container.Paused = true
-	daemon.LogContainerEvent(container, "pause")
+
 	return nil
 	return nil
 }
 }

+ 8 - 5
daemon/resize.go

@@ -1,6 +1,10 @@
 package daemon
 package daemon
 
 
-import "fmt"
+import (
+	"fmt"
+
+	"github.com/docker/docker/libcontainerd"
+)
 
 
 // ContainerResize changes the size of the TTY of the process running
 // ContainerResize changes the size of the TTY of the process running
 // in the container with the given name to the given height and width.
 // in the container with the given name to the given height and width.
@@ -14,7 +18,7 @@ func (daemon *Daemon) ContainerResize(name string, height, width int) error {
 		return errNotRunning{container.ID}
 		return errNotRunning{container.ID}
 	}
 	}
 
 
-	if err = container.Resize(height, width); err == nil {
+	if err = daemon.containerd.Resize(container.ID, libcontainerd.InitFriendlyName, width, height); err == nil {
 		attributes := map[string]string{
 		attributes := map[string]string{
 			"height": fmt.Sprintf("%d", height),
 			"height": fmt.Sprintf("%d", height),
 			"width":  fmt.Sprintf("%d", width),
 			"width":  fmt.Sprintf("%d", width),
@@ -28,10 +32,9 @@ func (daemon *Daemon) ContainerResize(name string, height, width int) error {
 // running in the exec with the given name to the given height and
 // running in the exec with the given name to the given height and
 // width.
 // width.
 func (daemon *Daemon) ContainerExecResize(name string, height, width int) error {
 func (daemon *Daemon) ContainerExecResize(name string, height, width int) error {
-	ExecConfig, err := daemon.getExecConfig(name)
+	ec, err := daemon.getExecConfig(name)
 	if err != nil {
 	if err != nil {
 		return err
 		return err
 	}
 	}
-
-	return ExecConfig.Resize(height, width)
+	return daemon.containerd.Resize(ec.ContainerID, ec.ID, width, height)
 }
 }

+ 1600 - 0
daemon/seccomp_default_linux.go

@@ -0,0 +1,1600 @@
+// +build linux,seccomp
+
+package daemon
+
+import (
+	"syscall"
+
+	"github.com/opencontainers/specs/specs-go"
+	libseccomp "github.com/seccomp/libseccomp-golang"
+)
+
+func arches() []specs.Arch {
+	var native, err = libseccomp.GetNativeArch()
+	if err != nil {
+		return []specs.Arch{}
+	}
+	var a = native.String()
+	switch a {
+	case "amd64":
+		return []specs.Arch{specs.ArchX86_64, specs.ArchX86, specs.ArchX32}
+	case "arm64":
+		return []specs.Arch{specs.ArchAARCH64, specs.ArchARM}
+	case "mips64":
+		return []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64, specs.ArchMIPS64N32}
+	case "mips64n32":
+		return []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64, specs.ArchMIPS64N32}
+	case "mipsel64":
+		return []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64, specs.ArchMIPSEL64N32}
+	case "mipsel64n32":
+		return []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64, specs.ArchMIPSEL64N32}
+	default:
+		return []specs.Arch{}
+	}
+}
+
+var defaultSeccompProfile = specs.Seccomp{
+	DefaultAction: specs.ActErrno,
+	Architectures: arches(),
+	Syscalls: []specs.Syscall{
+		{
+			Name:   "accept",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "accept4",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "access",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "alarm",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "arch_prctl",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "bind",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "brk",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "capget",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "capset",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "chdir",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "chmod",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "chown",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "chown32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "chroot",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "clock_getres",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "clock_gettime",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "clock_nanosleep",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "clone",
+			Action: specs.ActAllow,
+			Args: []specs.Arg{
+				{
+					Index:    0,
+					Value:    syscall.CLONE_NEWNS | syscall.CLONE_NEWUTS | syscall.CLONE_NEWIPC | syscall.CLONE_NEWUSER | syscall.CLONE_NEWPID | syscall.CLONE_NEWNET,
+					ValueTwo: 0,
+					Op:       specs.OpMaskedEqual,
+				},
+			},
+		},
+		{
+			Name:   "close",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "connect",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "creat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "dup",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "dup2",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "dup3",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "epoll_create",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "epoll_create1",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "epoll_ctl",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "epoll_ctl_old",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "epoll_pwait",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "epoll_wait",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "epoll_wait_old",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "eventfd",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "eventfd2",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "execve",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "execveat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "exit",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "exit_group",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "faccessat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fadvise64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fadvise64_64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fallocate",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fanotify_init",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fanotify_mark",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fchdir",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fchmod",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fchmodat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fchown",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fchown32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fchownat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fcntl",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fcntl64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fdatasync",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fgetxattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "flistxattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "flock",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fork",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fremovexattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fsetxattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fstat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fstat64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fstatat64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fstatfs",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fstatfs64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fsync",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "ftruncate",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "ftruncate64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "futex",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "futimesat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getcpu",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getcwd",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getdents",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getdents64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getegid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getegid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "geteuid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "geteuid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getgid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getgid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getgroups",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getgroups32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getitimer",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getpeername",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getpgid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getpgrp",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getpid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getppid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getpriority",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getrandom",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getresgid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getresgid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getresuid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getresuid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getrlimit",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "get_robust_list",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getrusage",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getsid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getsockname",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getsockopt",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "get_thread_area",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "gettid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "gettimeofday",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getuid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getuid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getxattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "inotify_add_watch",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "inotify_init",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "inotify_init1",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "inotify_rm_watch",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "io_cancel",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "ioctl",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "io_destroy",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "io_getevents",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "ioprio_get",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "ioprio_set",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "io_setup",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "io_submit",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "kill",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "lchown",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "lchown32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "lgetxattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "link",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "linkat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "listen",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "listxattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "llistxattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "_llseek",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "lremovexattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "lseek",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "lsetxattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "lstat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "lstat64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "madvise",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "memfd_create",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mincore",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mkdir",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mkdirat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mknod",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mknodat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mlock",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mlockall",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mmap",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mmap2",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mprotect",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mq_getsetattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mq_notify",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mq_open",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mq_timedreceive",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mq_timedsend",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mq_unlink",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mremap",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "msgctl",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "msgget",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "msgrcv",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "msgsnd",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "msync",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "munlock",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "munlockall",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "munmap",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "nanosleep",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "newfstatat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "_newselect",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "open",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "openat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "pause",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "pipe",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "pipe2",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "poll",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "ppoll",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "prctl",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "pread64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "preadv",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "prlimit64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "pselect6",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "pwrite64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "pwritev",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "read",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "readahead",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "readlink",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "readlinkat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "readv",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "recv",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "recvfrom",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "recvmmsg",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "recvmsg",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "remap_file_pages",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "removexattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rename",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "renameat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "renameat2",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rmdir",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rt_sigaction",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rt_sigpending",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rt_sigprocmask",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rt_sigqueueinfo",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rt_sigreturn",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rt_sigsuspend",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rt_sigtimedwait",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rt_tgsigqueueinfo",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_getaffinity",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_getattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_getparam",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_get_priority_max",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_get_priority_min",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_getscheduler",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_rr_get_interval",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_setaffinity",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_setattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_setparam",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_setscheduler",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_yield",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "seccomp",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "select",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "semctl",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "semget",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "semop",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "semtimedop",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "send",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sendfile",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sendfile64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sendmmsg",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sendmsg",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sendto",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setdomainname",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setfsgid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setfsgid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setfsuid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setfsuid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setgid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setgid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setgroups",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setgroups32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sethostname",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setitimer",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setpgid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setpriority",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setregid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setregid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setresgid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setresgid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setresuid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setresuid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setreuid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setreuid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setrlimit",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "set_robust_list",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setsid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setsockopt",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "set_thread_area",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "set_tid_address",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setuid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setuid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setxattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "shmat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "shmctl",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "shmdt",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "shmget",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "shutdown",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sigaltstack",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "signalfd",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "signalfd4",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sigreturn",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "socket",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "socketpair",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "splice",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "stat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "stat64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "statfs",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "statfs64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "symlink",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "symlinkat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sync",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sync_file_range",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "syncfs",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sysinfo",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "syslog",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "tee",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "tgkill",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "time",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "timer_create",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "timer_delete",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "timerfd_create",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "timerfd_gettime",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "timerfd_settime",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "timer_getoverrun",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "timer_gettime",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "timer_settime",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "times",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "tkill",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "truncate",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "truncate64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "ugetrlimit",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "umask",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "uname",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "unlink",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "unlinkat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "utime",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "utimensat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "utimes",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "vfork",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "vhangup",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "vmsplice",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "wait4",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "waitid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "waitpid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "write",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "writev",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		// i386 specific syscalls
+		{
+			Name:   "modify_ldt",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		// arm specific syscalls
+		{
+			Name:   "breakpoint",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "cacheflush",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "set_tls",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+	},
+}

+ 12 - 0
daemon/seccomp_disabled.go

@@ -0,0 +1,12 @@
+// +build !seccomp,!windows
+
+package daemon
+
+import (
+	"github.com/docker/docker/container"
+	"github.com/opencontainers/specs/specs-go"
+)
+
+func setSeccomp(daemon *Daemon, rs *specs.Spec, c *container.Container) error {
+	return nil
+}

+ 100 - 0
daemon/seccomp_linux.go

@@ -0,0 +1,100 @@
+// +build linux,seccomp
+
+package daemon
+
+import (
+	"encoding/json"
+	"fmt"
+
+	"github.com/Sirupsen/logrus"
+	"github.com/docker/docker/container"
+	"github.com/docker/engine-api/types"
+	"github.com/opencontainers/specs/specs-go"
+)
+
+func setSeccomp(daemon *Daemon, rs *specs.Spec, c *container.Container) error {
+	var seccomp *specs.Seccomp
+	var err error
+
+	if c.HostConfig.Privileged {
+		return nil
+	}
+
+	if !daemon.seccompEnabled {
+		if c.SeccompProfile != "" && c.SeccompProfile != "unconfined" {
+			return fmt.Errorf("Seccomp is not enabled in your kernel, cannot run a custom seccomp profile.")
+		}
+		logrus.Warn("Seccomp is not enabled in your kernel, running container without default profile.")
+		c.SeccompProfile = "unconfined"
+	}
+	if c.SeccompProfile == "unconfined" {
+		return nil
+	}
+	if c.SeccompProfile != "" {
+		seccomp, err = loadSeccompProfile(c.SeccompProfile)
+		if err != nil {
+			return err
+		}
+	} else {
+		seccomp = &defaultSeccompProfile
+	}
+
+	rs.Linux.Seccomp = seccomp
+	return nil
+}
+
+func loadSeccompProfile(body string) (*specs.Seccomp, error) {
+	var config types.Seccomp
+	if err := json.Unmarshal([]byte(body), &config); err != nil {
+		return nil, fmt.Errorf("Decoding seccomp profile failed: %v", err)
+	}
+
+	return setupSeccomp(&config)
+}
+
+func setupSeccomp(config *types.Seccomp) (newConfig *specs.Seccomp, err error) {
+	if config == nil {
+		return nil, nil
+	}
+
+	// No default action specified, no syscalls listed, assume seccomp disabled
+	if config.DefaultAction == "" && len(config.Syscalls) == 0 {
+		return nil, nil
+	}
+
+	newConfig = &specs.Seccomp{}
+
+	// if config.Architectures == 0 then libseccomp will figure out the architecture to use
+	if len(config.Architectures) > 0 {
+		// newConfig.Architectures = []string{}
+		for _, arch := range config.Architectures {
+			newConfig.Architectures = append(newConfig.Architectures, specs.Arch(arch))
+		}
+	}
+
+	newConfig.DefaultAction = specs.Action(config.DefaultAction)
+
+	// Loop through all syscall blocks and convert them to libcontainer format
+	for _, call := range config.Syscalls {
+		newCall := specs.Syscall{
+			Name:   call.Name,
+			Action: specs.Action(call.Action),
+		}
+
+		// Loop through all the arguments of the syscall and convert them
+		for _, arg := range call.Args {
+			newArg := specs.Arg{
+				Index:    arg.Index,
+				Value:    arg.Value,
+				ValueTwo: arg.ValueTwo,
+				Op:       specs.Operator(arg.Op),
+			}
+
+			newCall.Args = append(newCall.Args, newArg)
+		}
+
+		newConfig.Syscalls = append(newConfig.Syscalls, newCall)
+	}
+
+	return newConfig, nil
+}

+ 28 - 27
daemon/start.go

@@ -4,10 +4,13 @@ import (
 	"fmt"
 	"fmt"
 	"net/http"
 	"net/http"
 	"runtime"
 	"runtime"
+	"strings"
+	"syscall"
 
 
 	"github.com/Sirupsen/logrus"
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/errors"
 	"github.com/docker/docker/errors"
+	"github.com/docker/docker/libcontainerd"
 	"github.com/docker/docker/runconfig"
 	"github.com/docker/docker/runconfig"
 	containertypes "github.com/docker/engine-api/types/container"
 	containertypes "github.com/docker/engine-api/types/container"
 )
 )
@@ -122,44 +125,36 @@ func (daemon *Daemon) containerStart(container *container.Container) (err error)
 	if err := daemon.initializeNetworking(container); err != nil {
 	if err := daemon.initializeNetworking(container); err != nil {
 		return err
 		return err
 	}
 	}
-	linkedEnv, err := daemon.setupLinkedContainers(container)
+
+	spec, err := daemon.createSpec(container)
 	if err != nil {
 	if err != nil {
 		return err
 		return err
 	}
 	}
-	rootUID, rootGID := daemon.GetRemappedUIDGID()
-	if err := container.SetupWorkingDirectory(rootUID, rootGID); err != nil {
-		return err
-	}
-	env := container.CreateDaemonEnvironment(linkedEnv)
-	if err := daemon.populateCommand(container, env); err != nil {
-		return err
-	}
 
 
-	if !container.HostConfig.IpcMode.IsContainer() && !container.HostConfig.IpcMode.IsHost() {
-		if err := daemon.setupIpcDirs(container); err != nil {
-			return err
+	defer daemon.LogContainerEvent(container, "start") // this is logged even on error
+	if err := daemon.containerd.Create(container.ID, *spec, libcontainerd.WithRestartManager(container.RestartManager(true))); err != nil {
+		// if we receive an internal error from the initial start of a container then lets
+		// return it instead of entering the restart loop
+		// set to 127 for container cmd not found/does not exist)
+		if strings.Contains(err.Error(), "executable file not found") ||
+			strings.Contains(err.Error(), "no such file or directory") ||
+			strings.Contains(err.Error(), "system cannot find the file specified") {
+			container.ExitCode = 127
+			err = fmt.Errorf("Container command not found or does not exist.")
+		}
+		// set to 126 for container cmd can't be invoked errors
+		if strings.Contains(err.Error(), syscall.EACCES.Error()) {
+			container.ExitCode = 126
+			err = fmt.Errorf("Container command could not be invoked.")
 		}
 		}
-	}
 
 
-	mounts, err := daemon.setupMounts(container)
-	if err != nil {
+		container.Reset(false)
 		return err
 		return err
 	}
 	}
-	mounts = append(mounts, container.IpcMounts()...)
-	mounts = append(mounts, container.TmpfsMounts()...)
 
 
-	container.Command.Mounts = mounts
-	if err := daemon.waitForStart(container); err != nil {
-		return err
-	}
-	container.HasBeenStartedBefore = true
 	return nil
 	return nil
 }
 }
 
 
-func (daemon *Daemon) waitForStart(container *container.Container) error {
-	return container.StartMonitor(daemon)
-}
-
 // Cleanup releases any network resources allocated to the container along with any rules
 // Cleanup releases any network resources allocated to the container along with any rules
 // around how containers are linked together.  It also unmounts the container's root filesystem.
 // around how containers are linked together.  It also unmounts the container's root filesystem.
 func (daemon *Daemon) Cleanup(container *container.Container) {
 func (daemon *Daemon) Cleanup(container *container.Container) {
@@ -167,7 +162,13 @@ func (daemon *Daemon) Cleanup(container *container.Container) {
 
 
 	container.UnmountIpcMounts(detachMounted)
 	container.UnmountIpcMounts(detachMounted)
 
 
-	daemon.conditionalUnmountOnCleanup(container)
+	if err := daemon.conditionalUnmountOnCleanup(container); err != nil {
+		// FIXME: remove once reference counting for graphdrivers has been refactored
+		// Ensure that all the mounts are gone
+		if mountid, err := daemon.layerStore.GetMountID(container.ID); err == nil {
+			daemon.cleanupMountsByID(mountid)
+		}
+	}
 
 
 	for _, eConfig := range container.ExecCommands.Commands() {
 	for _, eConfig := range container.ExecCommands.Commands() {
 		daemon.unregisterExecCommand(container, eConfig)
 		daemon.unregisterExecCommand(container, eConfig)

+ 2 - 6
daemon/stats.go

@@ -6,7 +6,6 @@ import (
 	"runtime"
 	"runtime"
 
 
 	"github.com/docker/docker/api/types/backend"
 	"github.com/docker/docker/api/types/backend"
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/pkg/ioutils"
 	"github.com/docker/docker/pkg/ioutils"
 	"github.com/docker/docker/pkg/version"
 	"github.com/docker/docker/pkg/version"
 	"github.com/docker/engine-api/types"
 	"github.com/docker/engine-api/types"
@@ -42,12 +41,9 @@ func (daemon *Daemon) ContainerStats(prefixOrName string, config *backend.Contai
 
 
 	var preCPUStats types.CPUStats
 	var preCPUStats types.CPUStats
 	getStatJSON := func(v interface{}) *types.StatsJSON {
 	getStatJSON := func(v interface{}) *types.StatsJSON {
-		update := v.(*execdriver.ResourceStats)
-		ss := convertStatsToAPITypes(update.Stats)
+		ss := v.(*types.StatsJSON)
 		ss.PreCPUStats = preCPUStats
 		ss.PreCPUStats = preCPUStats
-		ss.MemoryStats.Limit = uint64(update.MemoryLimit)
-		ss.Read = update.Read
-		ss.CPUStats.SystemUsage = update.SystemUsage
+		// ss.MemoryStats.Limit = uint64(update.MemoryLimit)
 		preCPUStats = ss.CPUStats
 		preCPUStats = ss.CPUStats
 		return ss
 		return ss
 	}
 	}

+ 5 - 4
daemon/stats_collector_unix.go

@@ -13,14 +13,14 @@ import (
 
 
 	"github.com/Sirupsen/logrus"
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/container"
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/pkg/pubsub"
 	"github.com/docker/docker/pkg/pubsub"
+	"github.com/docker/engine-api/types"
 	"github.com/opencontainers/runc/libcontainer/system"
 	"github.com/opencontainers/runc/libcontainer/system"
 )
 )
 
 
 type statsSupervisor interface {
 type statsSupervisor interface {
 	// GetContainerStats collects all the stats related to a container
 	// GetContainerStats collects all the stats related to a container
-	GetContainerStats(container *container.Container) (*execdriver.ResourceStats, error)
+	GetContainerStats(container *container.Container) (*types.StatsJSON, error)
 }
 }
 
 
 // newStatsCollector returns a new statsCollector that collections
 // newStatsCollector returns a new statsCollector that collections
@@ -120,12 +120,13 @@ func (s *statsCollector) run() {
 		for _, pair := range pairs {
 		for _, pair := range pairs {
 			stats, err := s.supervisor.GetContainerStats(pair.container)
 			stats, err := s.supervisor.GetContainerStats(pair.container)
 			if err != nil {
 			if err != nil {
-				if err != execdriver.ErrNotRunning {
+				if err, ok := err.(errNotRunning); ok {
 					logrus.Errorf("collecting stats for %s: %v", pair.container.ID, err)
 					logrus.Errorf("collecting stats for %s: %v", pair.container.ID, err)
 				}
 				}
 				continue
 				continue
 			}
 			}
-			stats.SystemUsage = systemUsage
+			// FIXME: move to containerd
+			stats.CPUStats.SystemUsage = systemUsage
 
 
 			pair.publisher.Publish(stats)
 			pair.publisher.Publish(stats)
 		}
 		}

+ 0 - 84
daemon/stats_linux.go

@@ -1,84 +0,0 @@
-package daemon
-
-import (
-	"github.com/docker/engine-api/types"
-	"github.com/opencontainers/runc/libcontainer"
-	"github.com/opencontainers/runc/libcontainer/cgroups"
-)
-
-// convertStatsToAPITypes converts the libcontainer.Stats to the api specific
-// structs. This is done to preserve API compatibility and versioning.
-func convertStatsToAPITypes(ls *libcontainer.Stats) *types.StatsJSON {
-	s := &types.StatsJSON{}
-	if ls.Interfaces != nil {
-		s.Networks = make(map[string]types.NetworkStats)
-		for _, iface := range ls.Interfaces {
-			// For API Version >= 1.21, the original data of network will
-			// be returned.
-			s.Networks[iface.Name] = types.NetworkStats{
-				RxBytes:   iface.RxBytes,
-				RxPackets: iface.RxPackets,
-				RxErrors:  iface.RxErrors,
-				RxDropped: iface.RxDropped,
-				TxBytes:   iface.TxBytes,
-				TxPackets: iface.TxPackets,
-				TxErrors:  iface.TxErrors,
-				TxDropped: iface.TxDropped,
-			}
-		}
-	}
-
-	cs := ls.CgroupStats
-	if cs != nil {
-		s.BlkioStats = types.BlkioStats{
-			IoServiceBytesRecursive: copyBlkioEntry(cs.BlkioStats.IoServiceBytesRecursive),
-			IoServicedRecursive:     copyBlkioEntry(cs.BlkioStats.IoServicedRecursive),
-			IoQueuedRecursive:       copyBlkioEntry(cs.BlkioStats.IoQueuedRecursive),
-			IoServiceTimeRecursive:  copyBlkioEntry(cs.BlkioStats.IoServiceTimeRecursive),
-			IoWaitTimeRecursive:     copyBlkioEntry(cs.BlkioStats.IoWaitTimeRecursive),
-			IoMergedRecursive:       copyBlkioEntry(cs.BlkioStats.IoMergedRecursive),
-			IoTimeRecursive:         copyBlkioEntry(cs.BlkioStats.IoTimeRecursive),
-			SectorsRecursive:        copyBlkioEntry(cs.BlkioStats.SectorsRecursive),
-		}
-		cpu := cs.CpuStats
-		s.CPUStats = types.CPUStats{
-			CPUUsage: types.CPUUsage{
-				TotalUsage:        cpu.CpuUsage.TotalUsage,
-				PercpuUsage:       cpu.CpuUsage.PercpuUsage,
-				UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode,
-				UsageInUsermode:   cpu.CpuUsage.UsageInUsermode,
-			},
-			ThrottlingData: types.ThrottlingData{
-				Periods:          cpu.ThrottlingData.Periods,
-				ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods,
-				ThrottledTime:    cpu.ThrottlingData.ThrottledTime,
-			},
-		}
-		mem := cs.MemoryStats
-		s.MemoryStats = types.MemoryStats{
-			Usage:    mem.Usage.Usage,
-			MaxUsage: mem.Usage.MaxUsage,
-			Stats:    mem.Stats,
-			Failcnt:  mem.Usage.Failcnt,
-		}
-		pids := cs.PidsStats
-		s.PidsStats = types.PidsStats{
-			Current: pids.Current,
-		}
-	}
-
-	return s
-}
-
-func copyBlkioEntry(entries []cgroups.BlkioStatEntry) []types.BlkioStatEntry {
-	out := make([]types.BlkioStatEntry, len(entries))
-	for i, re := range entries {
-		out[i] = types.BlkioStatEntry{
-			Major: re.Major,
-			Minor: re.Minor,
-			Op:    re.Op,
-			Value: re.Value,
-		}
-	}
-	return out
-}

+ 0 - 14
daemon/stats_windows.go

@@ -1,14 +0,0 @@
-package daemon
-
-import (
-	"github.com/docker/engine-api/types"
-	"github.com/opencontainers/runc/libcontainer"
-)
-
-// convertStatsToAPITypes converts the libcontainer.Stats to the api specific
-// structs. This is done to preserve API compatibility and versioning.
-func convertStatsToAPITypes(ls *libcontainer.Stats) *types.StatsJSON {
-	// TODO Windows. Refactor accordingly to fill in stats.
-	s := &types.StatsJSON{}
-	return s
-}

+ 2 - 1
daemon/top_unix.go

@@ -33,7 +33,8 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*types.Container
 	if container.IsRestarting() {
 	if container.IsRestarting() {
 		return nil, errContainerIsRestarting(container.ID)
 		return nil, errContainerIsRestarting(container.ID)
 	}
 	}
-	pids, err := daemon.ExecutionDriver().GetPidsForContainer(container.ID)
+
+	pids, err := daemon.containerd.GetPidsForContainer(container.ID)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
 	}
 	}

+ 1 - 3
daemon/unpause.go

@@ -35,11 +35,9 @@ func (daemon *Daemon) containerUnpause(container *container.Container) error {
 		return fmt.Errorf("Container %s is not paused", container.ID)
 		return fmt.Errorf("Container %s is not paused", container.ID)
 	}
 	}
 
 
-	if err := daemon.execDriver.Unpause(container.Command); err != nil {
+	if err := daemon.containerd.Resume(container.ID); err != nil {
 		return fmt.Errorf("Cannot unpause container %s: %s", container.ID, err)
 		return fmt.Errorf("Cannot unpause container %s: %s", container.ID, err)
 	}
 	}
 
 
-	container.Paused = false
-	daemon.LogContainerEvent(container, "unpause")
 	return nil
 	return nil
 }
 }

+ 1 - 1
daemon/update.go

@@ -84,7 +84,7 @@ func (daemon *Daemon) update(name string, hostConfig *container.HostConfig) erro
 	// If container is running (including paused), we need to update configs
 	// If container is running (including paused), we need to update configs
 	// to the real world.
 	// to the real world.
 	if container.IsRunning() && !container.IsRestarting() {
 	if container.IsRunning() && !container.IsRestarting() {
-		if err := daemon.execDriver.Update(container.Command); err != nil {
+		if err := daemon.containerd.UpdateResources(container.ID, toContainerdResources(hostConfig.Resources)); err != nil {
 			restoreConfig = true
 			restoreConfig = true
 			return errCannotUpdate(container.ID, err)
 			return errCannotUpdate(container.ID, err)
 		}
 		}

+ 25 - 0
daemon/update_linux.go

@@ -0,0 +1,25 @@
+// +build linux
+
+package daemon
+
+import (
+	"github.com/docker/docker/libcontainerd"
+	"github.com/docker/engine-api/types/container"
+)
+
+func toContainerdResources(resources container.Resources) libcontainerd.Resources {
+	var r libcontainerd.Resources
+	r.BlkioWeight = uint32(resources.BlkioWeight)
+	r.CpuShares = uint32(resources.CPUShares)
+	r.CpuPeriod = uint32(resources.CPUPeriod)
+	r.CpuQuota = uint32(resources.CPUQuota)
+	r.CpusetCpus = resources.CpusetCpus
+	r.CpusetMems = resources.CpusetMems
+	r.MemoryLimit = uint32(resources.Memory)
+	if resources.MemorySwap > 0 {
+		r.MemorySwap = uint32(resources.MemorySwap)
+	}
+	r.MemoryReservation = uint32(resources.MemoryReservation)
+	r.KernelMemoryLimit = uint32(resources.KernelMemory)
+	return r
+}

+ 1 - 2
daemon/volumes.go

@@ -8,7 +8,6 @@ import (
 	"strings"
 	"strings"
 
 
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/container"
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/volume"
 	"github.com/docker/docker/volume"
 	"github.com/docker/engine-api/types"
 	"github.com/docker/engine-api/types"
 	containertypes "github.com/docker/engine-api/types/container"
 	containertypes "github.com/docker/engine-api/types/container"
@@ -21,7 +20,7 @@ var (
 	ErrVolumeReadonly = errors.New("mounted volume is marked read-only")
 	ErrVolumeReadonly = errors.New("mounted volume is marked read-only")
 )
 )
 
 
-type mounts []execdriver.Mount
+type mounts []container.Mount
 
 
 // volumeToAPIType converts a volume.Volume to the type used by the remote API
 // volumeToAPIType converts a volume.Volume to the type used by the remote API
 func volumeToAPIType(v volume.Volume) *types.Volume {
 func volumeToAPIType(v volume.Volume) *types.Volume {

+ 9 - 10
daemon/volumes_unix.go

@@ -8,25 +8,24 @@ import (
 	"strconv"
 	"strconv"
 
 
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/container"
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/volume"
 	"github.com/docker/docker/volume"
 )
 )
 
 
 // setupMounts iterates through each of the mount points for a container and
 // setupMounts iterates through each of the mount points for a container and
 // calls Setup() on each. It also looks to see if is a network mount such as
 // calls Setup() on each. It also looks to see if is a network mount such as
 // /etc/resolv.conf, and if it is not, appends it to the array of mounts.
 // /etc/resolv.conf, and if it is not, appends it to the array of mounts.
-func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver.Mount, error) {
-	var mounts []execdriver.Mount
-	for _, m := range container.MountPoints {
-		if err := daemon.lazyInitializeVolume(container.ID, m); err != nil {
+func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, error) {
+	var mounts []container.Mount
+	for _, m := range c.MountPoints {
+		if err := daemon.lazyInitializeVolume(c.ID, m); err != nil {
 			return nil, err
 			return nil, err
 		}
 		}
 		path, err := m.Setup()
 		path, err := m.Setup()
 		if err != nil {
 		if err != nil {
 			return nil, err
 			return nil, err
 		}
 		}
-		if !container.TrySetNetworkMount(m.Destination, path) {
-			mnt := execdriver.Mount{
+		if !c.TrySetNetworkMount(m.Destination, path) {
+			mnt := container.Mount{
 				Source:      path,
 				Source:      path,
 				Destination: m.Destination,
 				Destination: m.Destination,
 				Writable:    m.RW,
 				Writable:    m.RW,
@@ -35,7 +34,7 @@ func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver.
 			if m.Volume != nil {
 			if m.Volume != nil {
 				attributes := map[string]string{
 				attributes := map[string]string{
 					"driver":      m.Volume.DriverName(),
 					"driver":      m.Volume.DriverName(),
-					"container":   container.ID,
+					"container":   c.ID,
 					"destination": m.Destination,
 					"destination": m.Destination,
 					"read/write":  strconv.FormatBool(m.RW),
 					"read/write":  strconv.FormatBool(m.RW),
 					"propagation": m.Propagation,
 					"propagation": m.Propagation,
@@ -47,7 +46,7 @@ func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver.
 	}
 	}
 
 
 	mounts = sortMounts(mounts)
 	mounts = sortMounts(mounts)
-	netMounts := container.NetworkMounts()
+	netMounts := c.NetworkMounts()
 	// if we are going to mount any of the network files from container
 	// if we are going to mount any of the network files from container
 	// metadata, the ownership must be set properly for potential container
 	// metadata, the ownership must be set properly for potential container
 	// remapped root (user namespaces)
 	// remapped root (user namespaces)
@@ -63,7 +62,7 @@ func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver.
 // sortMounts sorts an array of mounts in lexicographic order. This ensure that
 // sortMounts sorts an array of mounts in lexicographic order. This ensure that
 // when mounting, the mounts don't shadow other mounts. For example, if mounting
 // when mounting, the mounts don't shadow other mounts. For example, if mounting
 // /etc and /etc/resolv.conf, /etc/resolv.conf must not be mounted first.
 // /etc and /etc/resolv.conf, /etc/resolv.conf must not be mounted first.
-func sortMounts(m []execdriver.Mount) []execdriver.Mount {
+func sortMounts(m []container.Mount) []container.Mount {
 	sort.Sort(mounts(m))
 	sort.Sort(mounts(m))
 	return m
 	return m
 }
 }

+ 3 - 2
distribution/xfer/download_test.go

@@ -112,12 +112,13 @@ func (ls *mockLayerStore) CreateRWLayer(string, layer.ChainID, string, layer.Mou
 
 
 func (ls *mockLayerStore) GetRWLayer(string) (layer.RWLayer, error) {
 func (ls *mockLayerStore) GetRWLayer(string) (layer.RWLayer, error) {
 	return nil, errors.New("not implemented")
 	return nil, errors.New("not implemented")
-
 }
 }
 
 
 func (ls *mockLayerStore) ReleaseRWLayer(layer.RWLayer) ([]layer.Metadata, error) {
 func (ls *mockLayerStore) ReleaseRWLayer(layer.RWLayer) ([]layer.Metadata, error) {
 	return nil, errors.New("not implemented")
 	return nil, errors.New("not implemented")
-
+}
+func (ls *mockLayerStore) GetMountID(string) (string, error) {
+	return "", errors.New("not implemented")
 }
 }
 
 
 func (ls *mockLayerStore) Cleanup() error {
 func (ls *mockLayerStore) Cleanup() error {

+ 9 - 2
docker/daemon.go

@@ -29,6 +29,7 @@ import (
 	"github.com/docker/docker/daemon/logger"
 	"github.com/docker/docker/daemon/logger"
 	"github.com/docker/docker/docker/listeners"
 	"github.com/docker/docker/docker/listeners"
 	"github.com/docker/docker/dockerversion"
 	"github.com/docker/docker/dockerversion"
+	"github.com/docker/docker/libcontainerd"
 	"github.com/docker/docker/opts"
 	"github.com/docker/docker/opts"
 	"github.com/docker/docker/pkg/jsonlog"
 	"github.com/docker/docker/pkg/jsonlog"
 	flag "github.com/docker/docker/pkg/mflag"
 	flag "github.com/docker/docker/pkg/mflag"
@@ -264,7 +265,13 @@ func (cli *DaemonCli) CmdDaemon(args ...string) error {
 	cli.TrustKeyPath = commonFlags.TrustKey
 	cli.TrustKeyPath = commonFlags.TrustKey
 
 
 	registryService := registry.NewService(cli.Config.ServiceOptions)
 	registryService := registry.NewService(cli.Config.ServiceOptions)
-	d, err := daemon.NewDaemon(cli.Config, registryService)
+
+	containerdRemote, err := libcontainerd.New(filepath.Join(cli.Config.ExecRoot, "libcontainerd"), cli.getPlatformRemoteOptions()...)
+	if err != nil {
+		logrus.Fatal(err)
+	}
+
+	d, err := daemon.NewDaemon(cli.Config, registryService, containerdRemote)
 	if err != nil {
 	if err != nil {
 		if pfile != nil {
 		if pfile != nil {
 			if err := pfile.Remove(); err != nil {
 			if err := pfile.Remove(); err != nil {
@@ -279,7 +286,6 @@ func (cli *DaemonCli) CmdDaemon(args ...string) error {
 	logrus.WithFields(logrus.Fields{
 	logrus.WithFields(logrus.Fields{
 		"version":     dockerversion.Version,
 		"version":     dockerversion.Version,
 		"commit":      dockerversion.GitCommit,
 		"commit":      dockerversion.GitCommit,
-		"execdriver":  d.ExecutionDriver().Name(),
 		"graphdriver": d.GraphDriverName(),
 		"graphdriver": d.GraphDriverName(),
 	}).Info("Docker daemon")
 	}).Info("Docker daemon")
 
 
@@ -330,6 +336,7 @@ func (cli *DaemonCli) CmdDaemon(args ...string) error {
 	// Wait for serve API to complete
 	// Wait for serve API to complete
 	errAPI := <-serveAPIWait
 	errAPI := <-serveAPIWait
 	shutdownDaemon(d, 15)
 	shutdownDaemon(d, 15)
+	containerdRemote.Cleanup()
 	if errAPI != nil {
 	if errAPI != nil {
 		if pfile != nil {
 		if pfile != nil {
 			if err := pfile.Remove(); err != nil {
 			if err := pfile.Remove(); err != nil {

+ 13 - 2
docker/daemon_unix.go

@@ -11,10 +11,9 @@ import (
 	"github.com/Sirupsen/logrus"
 	"github.com/Sirupsen/logrus"
 	apiserver "github.com/docker/docker/api/server"
 	apiserver "github.com/docker/docker/api/server"
 	"github.com/docker/docker/daemon"
 	"github.com/docker/docker/daemon"
+	"github.com/docker/docker/libcontainerd"
 	"github.com/docker/docker/pkg/mflag"
 	"github.com/docker/docker/pkg/mflag"
 	"github.com/docker/docker/pkg/system"
 	"github.com/docker/docker/pkg/system"
-
-	_ "github.com/docker/docker/daemon/execdriver/native"
 )
 )
 
 
 const defaultDaemonConfigFile = "/etc/docker/daemon.json"
 const defaultDaemonConfigFile = "/etc/docker/daemon.json"
@@ -65,3 +64,15 @@ func setupConfigReloadTrap(configFile string, flags *mflag.FlagSet, reload func(
 		}
 		}
 	}()
 	}()
 }
 }
+
+func (cli *DaemonCli) getPlatformRemoteOptions() []libcontainerd.RemoteOption {
+	opts := []libcontainerd.RemoteOption{
+		libcontainerd.WithDebugLog(cli.Config.Debug),
+	}
+	if cli.Config.ContainerdAddr != "" {
+		opts = append(opts, libcontainerd.WithRemoteAddr(cli.Config.ContainerdAddr))
+	} else {
+		opts = append(opts, libcontainerd.WithStartDaemon(true))
+	}
+	return opts
+}

+ 28 - 0
integration-cli/daemon.go

@@ -142,6 +142,7 @@ func (d *Daemon) StartWithLogFile(out *os.File, providedArgs ...string) error {
 
 
 	args := append(d.GlobalFlags,
 	args := append(d.GlobalFlags,
 		d.Command,
 		d.Command,
+		"--containerd", "/var/run/docker/libcontainerd/containerd.sock",
 		"--graph", d.root,
 		"--graph", d.root,
 		"--pidfile", fmt.Sprintf("%s/docker.pid", d.folder),
 		"--pidfile", fmt.Sprintf("%s/docker.pid", d.folder),
 		fmt.Sprintf("--userland-proxy=%t", d.userlandProxy),
 		fmt.Sprintf("--userland-proxy=%t", d.userlandProxy),
@@ -245,6 +246,29 @@ func (d *Daemon) StartWithBusybox(arg ...string) error {
 	return d.LoadBusybox()
 	return d.LoadBusybox()
 }
 }
 
 
+// Kill will send a SIGKILL to the daemon
+func (d *Daemon) Kill() error {
+	if d.cmd == nil || d.wait == nil {
+		return errors.New("daemon not started")
+	}
+
+	defer func() {
+		d.logFile.Close()
+		d.cmd = nil
+	}()
+
+	if err := d.cmd.Process.Kill(); err != nil {
+		d.c.Logf("Could not kill daemon: %v", err)
+		return err
+	}
+
+	if err := os.Remove(fmt.Sprintf("%s/docker.pid", d.folder)); err != nil {
+		return err
+	}
+
+	return nil
+}
+
 // Stop will send a SIGINT every second and wait for the daemon to stop.
 // Stop will send a SIGINT every second and wait for the daemon to stop.
 // If it timeouts, a SIGKILL is sent.
 // If it timeouts, a SIGKILL is sent.
 // Stop will not delete the daemon directory. If a purged daemon is needed,
 // Stop will not delete the daemon directory. If a purged daemon is needed,
@@ -300,6 +324,10 @@ out2:
 		return err
 		return err
 	}
 	}
 
 
+	if err := os.Remove(fmt.Sprintf("%s/docker.pid", d.folder)); err != nil {
+		return err
+	}
+
 	return nil
 	return nil
 }
 }
 
 

+ 150 - 0
integration-cli/docker_cli_daemon_experimental_test.go

@@ -0,0 +1,150 @@
+// +build daemon,!windows,experimental
+
+package main
+
+import (
+	"os/exec"
+	"strings"
+	"time"
+
+	"github.com/go-check/check"
+)
+
+// TestDaemonRestartWithKilledRunningContainer requires live restore of running containers
+func (s *DockerDaemonSuite) TestDaemonRestartWithKilledRunningContainer(t *check.C) {
+	// TODO(mlaventure): Not sure what would the exit code be on windows
+	testRequires(t, DaemonIsLinux)
+	if err := s.d.StartWithBusybox(); err != nil {
+		t.Fatal(err)
+	}
+
+	cid, err := s.d.Cmd("run", "-d", "--name", "test", "busybox", "top")
+	defer s.d.Stop()
+	if err != nil {
+		t.Fatal(cid, err)
+	}
+	cid = strings.TrimSpace(cid)
+
+	// Kill the daemon
+	if err := s.d.Kill(); err != nil {
+		t.Fatal(err)
+	}
+
+	// kill the container
+	runCmd := exec.Command("ctr", "--address", "/var/run/docker/libcontainerd/containerd.sock", "containers", "kill", cid)
+	if out, ec, err := runCommandWithOutput(runCmd); err != nil {
+		t.Fatalf("Failed to run ctr, ExitCode: %d, err: '%v' output: '%s' cid: '%s'\n", ec, err, out, cid)
+	}
+
+	// Give time to containerd to process the command if we don't
+	// the exit event might be received after we do the inspect
+	time.Sleep(3 * time.Second)
+
+	// restart the daemon
+	if err := s.d.Start(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Check that we've got the correct exit code
+	out, err := s.d.Cmd("inspect", "-f", "{{.State.ExitCode}}", cid)
+	t.Assert(err, check.IsNil)
+
+	out = strings.TrimSpace(out)
+	if out != "143" {
+		t.Fatalf("Expected exit code '%s' got '%s' for container '%s'\n", "143", out, cid)
+	}
+
+}
+
+// TestDaemonRestartWithPausedRunningContainer requires live restore of running containers
+func (s *DockerDaemonSuite) TestDaemonRestartWithPausedRunningContainer(t *check.C) {
+	if err := s.d.StartWithBusybox(); err != nil {
+		t.Fatal(err)
+	}
+
+	cid, err := s.d.Cmd("run", "-d", "--name", "test", "busybox", "top")
+	defer s.d.Stop()
+	if err != nil {
+		t.Fatal(cid, err)
+	}
+	cid = strings.TrimSpace(cid)
+
+	// Kill the daemon
+	if err := s.d.Kill(); err != nil {
+		t.Fatal(err)
+	}
+
+	// kill the container
+	runCmd := exec.Command("ctr", "--address", "/var/run/docker/libcontainerd/containerd.sock", "containers", "pause", cid)
+	if out, ec, err := runCommandWithOutput(runCmd); err != nil {
+		t.Fatalf("Failed to run ctr, ExitCode: %d, err: '%v' output: '%s' cid: '%s'\n", ec, err, out, cid)
+	}
+
+	// Give time to containerd to process the command if we don't
+	// the pause event might be received after we do the inspect
+	time.Sleep(3 * time.Second)
+
+	// restart the daemon
+	if err := s.d.Start(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Check that we've got the correct status
+	out, err := s.d.Cmd("inspect", "-f", "{{.State.Status}}", cid)
+	t.Assert(err, check.IsNil)
+
+	out = strings.TrimSpace(out)
+	if out != "paused" {
+		t.Fatalf("Expected exit code '%s' got '%s' for container '%s'\n", "paused", out, cid)
+	}
+}
+
+// TestDaemonRestartWithUnpausedRunningContainer requires live restore of running containers.
+func (s *DockerDaemonSuite) TestDaemonRestartWithUnpausedRunningContainer(t *check.C) {
+	// TODO(mlaventure): Not sure what would the exit code be on windows
+	testRequires(t, DaemonIsLinux)
+	if err := s.d.StartWithBusybox(); err != nil {
+		t.Fatal(err)
+	}
+
+	cid, err := s.d.Cmd("run", "-d", "--name", "test", "busybox", "top")
+	defer s.d.Stop()
+	if err != nil {
+		t.Fatal(cid, err)
+	}
+	cid = strings.TrimSpace(cid)
+
+	// pause the container
+	if _, err := s.d.Cmd("pause", cid); err != nil {
+		t.Fatal(cid, err)
+	}
+
+	// Kill the daemon
+	if err := s.d.Kill(); err != nil {
+		t.Fatal(err)
+	}
+
+	// resume the container
+	runCmd := exec.Command("ctr", "--address", "/var/run/docker/libcontainerd/containerd.sock", "containers", "resume", cid)
+	if out, ec, err := runCommandWithOutput(runCmd); err != nil {
+		t.Fatalf("Failed to run ctr, ExitCode: %d, err: '%v' output: '%s' cid: '%s'\n", ec, err, out, cid)
+	}
+
+	// Give time to containerd to process the command if we don't
+	// the resume event might be received after we do the inspect
+	time.Sleep(3 * time.Second)
+
+	// restart the daemon
+	if err := s.d.Start(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Check that we've got the correct status
+	out, err := s.d.Cmd("inspect", "-f", "{{.State.Status}}", cid)
+	t.Assert(err, check.IsNil)
+
+	out = strings.TrimSpace(out)
+	if out != "running" {
+		t.Fatalf("Expected exit code '%s' got '%s' for container '%s'\n", "running", out, cid)
+	}
+}

+ 13 - 1
integration-cli/docker_cli_daemon_test.go

@@ -1507,7 +1507,18 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterCrash(c *check.C) {
 	out, err := s.d.Cmd("run", "-d", "busybox", "top")
 	out, err := s.d.Cmd("run", "-d", "busybox", "top")
 	c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
 	c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
 	id := strings.TrimSpace(out)
 	id := strings.TrimSpace(out)
-	c.Assert(s.d.cmd.Process.Signal(os.Kill), check.IsNil)
+	c.Assert(s.d.Kill(), check.IsNil)
+
+	// kill the container
+	runCmd := exec.Command("ctr", "--address", "/var/run/docker/libcontainerd/containerd.sock", "containers", "kill", id)
+	if out, ec, err := runCommandWithOutput(runCmd); err != nil {
+		c.Fatalf("Failed to run ctr, ExitCode: %d, err: '%v' output: '%s' cid: '%s'\n", ec, err, out, id)
+	}
+
+	// Give time to containerd to process the command if we don't
+	// the exit event might be received after we do the inspect
+	time.Sleep(3 * time.Second)
+
 	c.Assert(s.d.Start(), check.IsNil)
 	c.Assert(s.d.Start(), check.IsNil)
 	mountOut, err := ioutil.ReadFile("/proc/self/mountinfo")
 	mountOut, err := ioutil.ReadFile("/proc/self/mountinfo")
 	c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
 	c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
@@ -1840,6 +1851,7 @@ func (s *DockerDaemonSuite) TestDaemonNoSpaceleftOnDeviceError(c *check.C) {
 // Test daemon restart with container links + auto restart
 // Test daemon restart with container links + auto restart
 func (s *DockerDaemonSuite) TestDaemonRestartContainerLinksRestart(c *check.C) {
 func (s *DockerDaemonSuite) TestDaemonRestartContainerLinksRestart(c *check.C) {
 	d := NewDaemon(c)
 	d := NewDaemon(c)
+	defer d.Stop()
 	err := d.StartWithBusybox()
 	err := d.StartWithBusybox()
 	c.Assert(err, checker.IsNil)
 	c.Assert(err, checker.IsNil)
 
 

+ 0 - 52
integration-cli/docker_cli_exec_test.go

@@ -8,7 +8,6 @@ import (
 	"net/http"
 	"net/http"
 	"os"
 	"os"
 	"os/exec"
 	"os/exec"
-	"path/filepath"
 	"reflect"
 	"reflect"
 	"sort"
 	"sort"
 	"strings"
 	"strings"
@@ -375,57 +374,6 @@ func (s *DockerSuite) TestLinksPingLinkedContainersOnRename(c *check.C) {
 	dockerCmd(c, "exec", "container2", "ping", "-c", "1", "alias1", "-W", "1")
 	dockerCmd(c, "exec", "container2", "ping", "-c", "1", "alias1", "-W", "1")
 }
 }
 
 
-func (s *DockerSuite) TestExecDir(c *check.C) {
-	// TODO Windows CI. This requires some work to port as it uses execDriverPath
-	// which is currently (and incorrectly) hard coded as a string assuming
-	// the daemon is running Linux :(
-	testRequires(c, SameHostDaemon, DaemonIsLinux)
-
-	out, _ := runSleepingContainer(c, "-d")
-	id := strings.TrimSpace(out)
-
-	execDir := filepath.Join(execDriverPath, id)
-	stateFile := filepath.Join(execDir, "state.json")
-
-	{
-		fi, err := os.Stat(execDir)
-		c.Assert(err, checker.IsNil)
-		if !fi.IsDir() {
-			c.Fatalf("%q must be a directory", execDir)
-		}
-		fi, err = os.Stat(stateFile)
-		c.Assert(err, checker.IsNil)
-	}
-
-	dockerCmd(c, "stop", id)
-	{
-		_, err := os.Stat(execDir)
-		c.Assert(err, checker.NotNil)
-		c.Assert(err, checker.NotNil, check.Commentf("Exec directory %q exists for removed container!", execDir))
-		if !os.IsNotExist(err) {
-			c.Fatalf("Error should be about non-existing, got %s", err)
-		}
-	}
-	dockerCmd(c, "start", id)
-	{
-		fi, err := os.Stat(execDir)
-		c.Assert(err, checker.IsNil)
-		if !fi.IsDir() {
-			c.Fatalf("%q must be a directory", execDir)
-		}
-		fi, err = os.Stat(stateFile)
-		c.Assert(err, checker.IsNil)
-	}
-	dockerCmd(c, "rm", "-f", id)
-	{
-		_, err := os.Stat(execDir)
-		c.Assert(err, checker.NotNil, check.Commentf("Exec directory %q exists for removed container!", execDir))
-		if !os.IsNotExist(err) {
-			c.Fatalf("Error should be about non-existing, got %s", err)
-		}
-	}
-}
-
 func (s *DockerSuite) TestRunMutableNetworkFiles(c *check.C) {
 func (s *DockerSuite) TestRunMutableNetworkFiles(c *check.C) {
 	// Not applicable on Windows to Windows CI.
 	// Not applicable on Windows to Windows CI.
 	testRequires(c, SameHostDaemon, DaemonIsLinux)
 	testRequires(c, SameHostDaemon, DaemonIsLinux)

+ 0 - 1
integration-cli/docker_cli_info_test.go

@@ -22,7 +22,6 @@ func (s *DockerSuite) TestInfoEnsureSucceeds(c *check.C) {
 		" Paused:",
 		" Paused:",
 		" Stopped:",
 		" Stopped:",
 		"Images:",
 		"Images:",
-		"Execution Driver:",
 		"OSType:",
 		"OSType:",
 		"Architecture:",
 		"Architecture:",
 		"Logging Driver:",
 		"Logging Driver:",

+ 9 - 4
integration-cli/docker_cli_run_test.go

@@ -1109,7 +1109,7 @@ func (s *DockerSuite) TestRunProcNotWritableInNonPrivilegedContainers(c *check.C
 func (s *DockerSuite) TestRunProcWritableInPrivilegedContainers(c *check.C) {
 func (s *DockerSuite) TestRunProcWritableInPrivilegedContainers(c *check.C) {
 	// Not applicable for Windows as there is no concept of --privileged
 	// Not applicable for Windows as there is no concept of --privileged
 	testRequires(c, DaemonIsLinux, NotUserNamespace)
 	testRequires(c, DaemonIsLinux, NotUserNamespace)
-	if _, code := dockerCmd(c, "run", "--privileged", "busybox", "touch", "/proc/sysrq-trigger"); code != 0 {
+	if _, code := dockerCmd(c, "run", "--privileged", "busybox", "sh", "-c", "umount /proc/sysrq-trigger && touch /proc/sysrq-trigger"); code != 0 {
 		c.Fatalf("proc should be writable in privileged container")
 		c.Fatalf("proc should be writable in privileged container")
 	}
 	}
 }
 }
@@ -3021,7 +3021,8 @@ func (s *DockerSuite) TestRunUnshareProc(c *check.C) {
 		out, _, err := dockerCmdWithError("run", "--name", name, "--security-opt", "seccomp:unconfined", "debian:jessie", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc")
 		out, _, err := dockerCmdWithError("run", "--name", name, "--security-opt", "seccomp:unconfined", "debian:jessie", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc")
 		if err == nil ||
 		if err == nil ||
 			!(strings.Contains(strings.ToLower(out), "mount: cannot mount none") ||
 			!(strings.Contains(strings.ToLower(out), "mount: cannot mount none") ||
-				strings.Contains(strings.ToLower(out), "permission denied")) {
+				strings.Contains(strings.ToLower(out), "permission denied") ||
+				strings.Contains(strings.ToLower(out), "operation not permitted")) {
 			errChan <- fmt.Errorf("unshare and mount of /proc should have failed with 'mount: cannot mount none' or 'permission denied', got: %s, %v", out, err)
 			errChan <- fmt.Errorf("unshare and mount of /proc should have failed with 'mount: cannot mount none' or 'permission denied', got: %s, %v", out, err)
 		} else {
 		} else {
 			errChan <- nil
 			errChan <- nil
@@ -3034,7 +3035,8 @@ func (s *DockerSuite) TestRunUnshareProc(c *check.C) {
 		out, _, err := dockerCmdWithError("run", "--privileged", "--security-opt", "seccomp:unconfined", "--security-opt", "apparmor:docker-default", "--name", name, "debian:jessie", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc")
 		out, _, err := dockerCmdWithError("run", "--privileged", "--security-opt", "seccomp:unconfined", "--security-opt", "apparmor:docker-default", "--name", name, "debian:jessie", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc")
 		if err == nil ||
 		if err == nil ||
 			!(strings.Contains(strings.ToLower(out), "mount: cannot mount none") ||
 			!(strings.Contains(strings.ToLower(out), "mount: cannot mount none") ||
-				strings.Contains(strings.ToLower(out), "permission denied")) {
+				strings.Contains(strings.ToLower(out), "permission denied") ||
+				strings.Contains(strings.ToLower(out), "operation not permitted")) {
 			errChan <- fmt.Errorf("privileged unshare with apparmor should have failed with 'mount: cannot mount none' or 'permission denied', got: %s, %v", out, err)
 			errChan <- fmt.Errorf("privileged unshare with apparmor should have failed with 'mount: cannot mount none' or 'permission denied', got: %s, %v", out, err)
 		} else {
 		} else {
 			errChan <- nil
 			errChan <- nil
@@ -4232,7 +4234,10 @@ func (s *DockerSuite) TestRunAttachFailedNoLeak(c *check.C) {
 	out, _, err := dockerCmdWithError("run", "-p", "8000:8000", "busybox", "true")
 	out, _, err := dockerCmdWithError("run", "-p", "8000:8000", "busybox", "true")
 	c.Assert(err, checker.NotNil)
 	c.Assert(err, checker.NotNil)
 	// check for windows error as well
 	// check for windows error as well
-	c.Assert(strings.Contains(string(out), "port is already allocated") || strings.Contains(string(out), "were not connected because a duplicate name exists"), checker.Equals, true, check.Commentf("Output: %s", out))
+	// TODO Windows Post TP5. Fix the error message string
+	c.Assert(strings.Contains(string(out), "port is already allocated") ||
+		strings.Contains(string(out), "were not connected because a duplicate name exists") ||
+		strings.Contains(string(out), "HNS failed with error : Failed to create endpoint"), checker.Equals, true, check.Commentf("Output: %s", out))
 	dockerCmd(c, "rm", "-f", "test")
 	dockerCmd(c, "rm", "-f", "test")
 
 
 	// NGoroutines is not updated right away, so we need to wait before failing
 	// NGoroutines is not updated right away, so we need to wait before failing

+ 1 - 0
layer/layer.go

@@ -169,6 +169,7 @@ type Store interface {
 
 
 	CreateRWLayer(id string, parent ChainID, mountLabel string, initFunc MountInit) (RWLayer, error)
 	CreateRWLayer(id string, parent ChainID, mountLabel string, initFunc MountInit) (RWLayer, error)
 	GetRWLayer(id string) (RWLayer, error)
 	GetRWLayer(id string) (RWLayer, error)
+	GetMountID(id string) (string, error)
 	ReleaseRWLayer(RWLayer) ([]Metadata, error)
 	ReleaseRWLayer(RWLayer) ([]Metadata, error)
 
 
 	Cleanup() error
 	Cleanup() error

+ 12 - 0
layer/layer_store.go

@@ -480,6 +480,18 @@ func (ls *layerStore) GetRWLayer(id string) (RWLayer, error) {
 	return mount.getReference(), nil
 	return mount.getReference(), nil
 }
 }
 
 
+func (ls *layerStore) GetMountID(id string) (string, error) {
+	ls.mountL.Lock()
+	defer ls.mountL.Unlock()
+	mount, ok := ls.mounts[id]
+	if !ok {
+		return "", ErrMountDoesNotExist
+	}
+	logrus.Debugf("GetRWLayer id: %s -> mountID: %s", id, mount.mountID)
+
+	return mount.mountID, nil
+}
+
 func (ls *layerStore) ReleaseRWLayer(l RWLayer) ([]Metadata, error) {
 func (ls *layerStore) ReleaseRWLayer(l RWLayer) ([]Metadata, error) {
 	ls.mountL.Lock()
 	ls.mountL.Lock()
 	defer ls.mountL.Unlock()
 	defer ls.mountL.Unlock()

+ 58 - 0
libcontainerd/client.go

@@ -0,0 +1,58 @@
+package libcontainerd
+
+import (
+	"fmt"
+	"sync"
+
+	"github.com/Sirupsen/logrus"
+)
+
+// clientCommon contains the platform agnostic fields used in the client structure
+type clientCommon struct {
+	backend          Backend
+	containers       map[string]*container
+	containerMutexes map[string]*sync.Mutex // lock by container ID
+	mapMutex         sync.RWMutex           // protects read/write oprations from containers map
+	sync.Mutex                              // lock for containerMutexes map access
+}
+
+func (clnt *client) lock(containerID string) {
+	clnt.Lock()
+	if _, ok := clnt.containerMutexes[containerID]; !ok {
+		clnt.containerMutexes[containerID] = &sync.Mutex{}
+	}
+	clnt.Unlock()
+	clnt.containerMutexes[containerID].Lock()
+}
+
+func (clnt *client) unlock(containerID string) {
+	clnt.Lock()
+	if l, ok := clnt.containerMutexes[containerID]; ok {
+		l.Unlock()
+	} else {
+		logrus.Warnf("unlock of non-existing mutex: %s", containerID)
+	}
+	clnt.Unlock()
+}
+
+// must hold a lock for cont.containerID
+func (clnt *client) appendContainer(cont *container) {
+	clnt.mapMutex.Lock()
+	clnt.containers[cont.containerID] = cont
+	clnt.mapMutex.Unlock()
+}
+func (clnt *client) deleteContainer(friendlyName string) {
+	clnt.mapMutex.Lock()
+	delete(clnt.containers, friendlyName)
+	clnt.mapMutex.Unlock()
+}
+
+func (clnt *client) getContainer(containerID string) (*container, error) {
+	clnt.mapMutex.RLock()
+	container, ok := clnt.containers[containerID]
+	defer clnt.mapMutex.RUnlock()
+	if !ok {
+		return nil, fmt.Errorf("invalid container: %s", containerID) // fixme: typed error
+	}
+	return container, nil
+}

+ 394 - 0
libcontainerd/client_linux.go

@@ -0,0 +1,394 @@
+package libcontainerd
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+	"syscall"
+
+	"github.com/Sirupsen/logrus"
+	containerd "github.com/docker/containerd/api/grpc/types"
+	"github.com/docker/docker/pkg/idtools"
+	"github.com/docker/docker/pkg/mount"
+	"github.com/opencontainers/specs/specs-go"
+	"golang.org/x/net/context"
+)
+
+type client struct {
+	clientCommon
+
+	// Platform specific properties below here.
+	remote        *remote
+	q             queue
+	exitNotifiers map[string]*exitNotifier
+}
+
+func (clnt *client) AddProcess(containerID, processFriendlyName string, specp Process) error {
+	clnt.lock(containerID)
+	defer clnt.unlock(containerID)
+	container, err := clnt.getContainer(containerID)
+	if err != nil {
+		return err
+	}
+
+	spec, err := container.spec()
+	if err != nil {
+		return err
+	}
+	sp := spec.Process
+	sp.Args = specp.Args
+	sp.Terminal = specp.Terminal
+	if specp.Env != nil {
+		sp.Env = specp.Env
+	}
+	if specp.Cwd != nil {
+		sp.Cwd = *specp.Cwd
+	}
+	if specp.User != nil {
+		sp.User = specs.User{
+			UID:            specp.User.UID,
+			GID:            specp.User.GID,
+			AdditionalGids: specp.User.AdditionalGids,
+		}
+	}
+	if specp.Capabilities != nil {
+		sp.Capabilities = specp.Capabilities
+	}
+
+	p := container.newProcess(processFriendlyName)
+
+	r := &containerd.AddProcessRequest{
+		Args:     sp.Args,
+		Cwd:      sp.Cwd,
+		Terminal: sp.Terminal,
+		Id:       containerID,
+		Env:      sp.Env,
+		User: &containerd.User{
+			Uid:            sp.User.UID,
+			Gid:            sp.User.GID,
+			AdditionalGids: sp.User.AdditionalGids,
+		},
+		Pid:             processFriendlyName,
+		Stdin:           p.fifo(syscall.Stdin),
+		Stdout:          p.fifo(syscall.Stdout),
+		Stderr:          p.fifo(syscall.Stderr),
+		Capabilities:    sp.Capabilities,
+		ApparmorProfile: sp.ApparmorProfile,
+		SelinuxLabel:    sp.SelinuxLabel,
+		NoNewPrivileges: sp.NoNewPrivileges,
+	}
+
+	iopipe, err := p.openFifos(sp.Terminal)
+	if err != nil {
+		return err
+	}
+
+	if _, err := clnt.remote.apiClient.AddProcess(context.Background(), r); err != nil {
+		p.closeFifos(iopipe)
+		return err
+	}
+
+	container.processes[processFriendlyName] = p
+
+	clnt.unlock(containerID)
+
+	if err := clnt.backend.AttachStreams(processFriendlyName, *iopipe); err != nil {
+		return err
+	}
+	clnt.lock(containerID)
+
+	return nil
+}
+
+func (clnt *client) prepareBundleDir(uid, gid int) (string, error) {
+	root, err := filepath.Abs(clnt.remote.stateDir)
+	if err != nil {
+		return "", err
+	}
+	if uid == 0 && gid == 0 {
+		return root, nil
+	}
+	p := string(filepath.Separator)
+	for _, d := range strings.Split(root, string(filepath.Separator))[1:] {
+		p = filepath.Join(p, d)
+		fi, err := os.Stat(p)
+		if err != nil && !os.IsNotExist(err) {
+			return "", err
+		}
+		if os.IsNotExist(err) || fi.Mode()&1 == 0 {
+			p = fmt.Sprintf("%s.%d.%d", p, uid, gid)
+			if err := idtools.MkdirAs(p, 0700, uid, gid); err != nil && !os.IsExist(err) {
+				return "", err
+			}
+		}
+	}
+	return p, nil
+}
+
+func (clnt *client) Create(containerID string, spec Spec, options ...CreateOption) (err error) {
+	clnt.lock(containerID)
+	defer clnt.unlock(containerID)
+
+	if ctr, err := clnt.getContainer(containerID); err == nil {
+		if ctr.restarting { // docker doesn't actually call start if restart is on atm, but probably should in the future
+			ctr.restartManager.Cancel()
+			ctr.clean()
+		} else {
+			return fmt.Errorf("Container %s is aleady active", containerID)
+		}
+	}
+
+	uid, gid, err := getRootIDs(specs.Spec(spec))
+	if err != nil {
+		return err
+	}
+	dir, err := clnt.prepareBundleDir(uid, gid)
+	if err != nil {
+		return err
+	}
+
+	container := clnt.newContainer(filepath.Join(dir, containerID), options...)
+	if err := container.clean(); err != nil {
+		return err
+	}
+
+	defer func() {
+		if err != nil {
+			container.clean()
+			clnt.deleteContainer(containerID)
+		}
+	}()
+
+	// uid/gid
+	rootfsDir := filepath.Join(container.dir, "rootfs")
+	if err := idtools.MkdirAllAs(rootfsDir, 0700, uid, gid); err != nil && !os.IsExist(err) {
+		return err
+	}
+	if err := syscall.Mount(spec.Root.Path, rootfsDir, "bind", syscall.MS_REC|syscall.MS_BIND, ""); err != nil {
+		return err
+	}
+	spec.Root.Path = "rootfs"
+
+	f, err := os.Create(filepath.Join(container.dir, configFilename))
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	if err := json.NewEncoder(f).Encode(spec); err != nil {
+		return err
+	}
+
+	return container.start()
+}
+
+func (clnt *client) Signal(containerID string, sig int) error {
+	clnt.lock(containerID)
+	defer clnt.unlock(containerID)
+	_, err := clnt.remote.apiClient.Signal(context.Background(), &containerd.SignalRequest{
+		Id:     containerID,
+		Pid:    InitFriendlyName,
+		Signal: uint32(sig),
+	})
+	return err
+}
+
+func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error {
+	clnt.lock(containerID)
+	defer clnt.unlock(containerID)
+	if _, err := clnt.getContainer(containerID); err != nil {
+		return err
+	}
+	_, err := clnt.remote.apiClient.UpdateProcess(context.Background(), &containerd.UpdateProcessRequest{
+		Id:     containerID,
+		Pid:    processFriendlyName,
+		Width:  uint32(width),
+		Height: uint32(height),
+	})
+	return err
+}
+
+func (clnt *client) Pause(containerID string) error {
+	return clnt.setState(containerID, StatePause)
+}
+
+func (clnt *client) setState(containerID, state string) error {
+	clnt.lock(containerID)
+	container, err := clnt.getContainer(containerID)
+	if err != nil {
+		clnt.unlock(containerID)
+		return err
+	}
+	if container.systemPid == 0 {
+		clnt.unlock(containerID)
+		return fmt.Errorf("No active process for container %s", containerID)
+	}
+	st := "running"
+	if state == StatePause {
+		st = "paused"
+	}
+	chstate := make(chan struct{})
+	_, err = clnt.remote.apiClient.UpdateContainer(context.Background(), &containerd.UpdateContainerRequest{
+		Id:     containerID,
+		Pid:    InitFriendlyName,
+		Status: st,
+	})
+	if err != nil {
+		clnt.unlock(containerID)
+		return err
+	}
+	container.pauseMonitor.append(state, chstate)
+	clnt.unlock(containerID)
+	<-chstate
+	return nil
+}
+
+func (clnt *client) Resume(containerID string) error {
+	return clnt.setState(containerID, StateResume)
+}
+
+func (clnt *client) Stats(containerID string) (*Stats, error) {
+	resp, err := clnt.remote.apiClient.Stats(context.Background(), &containerd.StatsRequest{containerID})
+	if err != nil {
+		return nil, err
+	}
+	return (*Stats)(resp), nil
+}
+
+func (clnt *client) setExited(containerID string) error {
+	clnt.lock(containerID)
+	defer clnt.unlock(containerID)
+
+	var exitCode uint32
+	if event, ok := clnt.remote.pastEvents[containerID]; ok {
+		exitCode = event.Status
+		delete(clnt.remote.pastEvents, containerID)
+	}
+
+	err := clnt.backend.StateChanged(containerID, StateInfo{
+		State:    StateExit,
+		ExitCode: exitCode,
+	})
+
+	// Unmount and delete the bundle folder
+	if mts, err := mount.GetMounts(); err == nil {
+		for _, mts := range mts {
+			if strings.HasSuffix(mts.Mountpoint, containerID+"/rootfs") {
+				if err := syscall.Unmount(mts.Mountpoint, syscall.MNT_DETACH); err == nil {
+					os.RemoveAll(strings.TrimSuffix(mts.Mountpoint, "/rootfs"))
+				}
+				break
+			}
+		}
+	}
+
+	return err
+}
+
+func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) {
+	cont, err := clnt.getContainerdContainer(containerID)
+	if err != nil {
+		return nil, err
+	}
+	pids := make([]int, len(cont.Pids))
+	for i, p := range cont.Pids {
+		pids[i] = int(p)
+	}
+	return pids, nil
+}
+
+func (clnt *client) getContainerdContainer(containerID string) (*containerd.Container, error) {
+	resp, err := clnt.remote.apiClient.State(context.Background(), &containerd.StateRequest{Id: containerID})
+	if err != nil {
+		return nil, err
+	}
+	for _, cont := range resp.Containers {
+		if cont.Id == containerID {
+			return cont, nil
+		}
+	}
+	return nil, fmt.Errorf("invalid state response")
+}
+
+func (clnt *client) newContainer(dir string, options ...CreateOption) *container {
+	container := &container{
+		containerCommon: containerCommon{
+			process: process{
+				dir: dir,
+				processCommon: processCommon{
+					containerID:  filepath.Base(dir),
+					client:       clnt,
+					friendlyName: InitFriendlyName,
+				},
+			},
+			processes: make(map[string]*process),
+		},
+	}
+	for _, option := range options {
+		if err := option.Apply(container); err != nil {
+			logrus.Error(err)
+		}
+	}
+	return container
+}
+
+func (clnt *client) UpdateResources(containerID string, resources Resources) error {
+	clnt.lock(containerID)
+	defer clnt.unlock(containerID)
+	container, err := clnt.getContainer(containerID)
+	if err != nil {
+		return err
+	}
+	if container.systemPid == 0 {
+		return fmt.Errorf("No active process for container %s", containerID)
+	}
+	_, err = clnt.remote.apiClient.UpdateContainer(context.Background(), &containerd.UpdateContainerRequest{
+		Id:        containerID,
+		Pid:       InitFriendlyName,
+		Resources: (*containerd.UpdateResource)(&resources),
+	})
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+func (clnt *client) getExitNotifier(containerID string) *exitNotifier {
+	clnt.mapMutex.RLock()
+	defer clnt.mapMutex.RUnlock()
+	return clnt.exitNotifiers[containerID]
+}
+
+func (clnt *client) getOrCreateExitNotifier(containerID string) *exitNotifier {
+	clnt.mapMutex.Lock()
+	w, ok := clnt.exitNotifiers[containerID]
+	defer clnt.mapMutex.Unlock()
+	if !ok {
+		w = &exitNotifier{c: make(chan struct{}), client: clnt}
+		clnt.exitNotifiers[containerID] = w
+	}
+	return w
+}
+
+type exitNotifier struct {
+	id     string
+	client *client
+	c      chan struct{}
+	once   sync.Once
+}
+
+func (en *exitNotifier) close() {
+	en.once.Do(func() {
+		close(en.c)
+		en.client.mapMutex.Lock()
+		if en == en.client.exitNotifiers[en.id] {
+			delete(en.client.exitNotifiers, en.id)
+		}
+		en.client.mapMutex.Unlock()
+	})
+}
+func (en *exitNotifier) wait() <-chan struct{} {
+	return en.c
+}

+ 83 - 0
libcontainerd/client_liverestore_linux.go

@@ -0,0 +1,83 @@
+// +build experimental
+
+package libcontainerd
+
+import (
+	"fmt"
+
+	"github.com/Sirupsen/logrus"
+	containerd "github.com/docker/containerd/api/grpc/types"
+)
+
+func (clnt *client) restore(cont *containerd.Container, options ...CreateOption) (err error) {
+	clnt.lock(cont.Id)
+	defer clnt.unlock(cont.Id)
+
+	logrus.Debugf("restore container %s state %s", cont.Id, cont.Status)
+
+	containerID := cont.Id
+	if _, err := clnt.getContainer(containerID); err == nil {
+		return fmt.Errorf("container %s is aleady active", containerID)
+	}
+
+	defer func() {
+		if err != nil {
+			clnt.deleteContainer(cont.Id)
+		}
+	}()
+
+	container := clnt.newContainer(cont.BundlePath, options...)
+	container.systemPid = systemPid(cont)
+
+	var terminal bool
+	for _, p := range cont.Processes {
+		if p.Pid == InitFriendlyName {
+			terminal = p.Terminal
+		}
+	}
+
+	iopipe, err := container.openFifos(terminal)
+	if err != nil {
+		return err
+	}
+
+	if err := clnt.backend.AttachStreams(containerID, *iopipe); err != nil {
+		return err
+	}
+
+	clnt.appendContainer(container)
+
+	err = clnt.backend.StateChanged(containerID, StateInfo{
+		State: StateRestore,
+		Pid:   container.systemPid,
+	})
+
+	if err != nil {
+		return err
+	}
+
+	if event, ok := clnt.remote.pastEvents[containerID]; ok {
+		// This should only be a pause or resume event
+		if event.Type == StatePause || event.Type == StateResume {
+			return clnt.backend.StateChanged(containerID, StateInfo{
+				State: event.Type,
+				Pid:   container.systemPid,
+			})
+		}
+
+		logrus.Warnf("unexpected backlog event: %#v", event)
+	}
+
+	return nil
+}
+
+func (clnt *client) Restore(containerID string, options ...CreateOption) error {
+	cont, err := clnt.getContainerdContainer(containerID)
+	if err == nil && cont.Status != "stopped" {
+		if err := clnt.restore(cont, options...); err != nil {
+			logrus.Errorf("error restoring %s: %v", containerID, err)
+		}
+		return nil
+	}
+	return clnt.setExited(containerID)
+}

+ 39 - 0
libcontainerd/client_shutdownrestore_linux.go

@@ -0,0 +1,39 @@
+// +build !experimental
+
+package libcontainerd
+
+import (
+	"syscall"
+	"time"
+
+	"github.com/Sirupsen/logrus"
+)
+
+func (clnt *client) Restore(containerID string, options ...CreateOption) error {
+	w := clnt.getOrCreateExitNotifier(containerID)
+	defer w.close()
+	cont, err := clnt.getContainerdContainer(containerID)
+	if err == nil && cont.Status != "stopped" {
+		clnt.lock(cont.Id)
+		container := clnt.newContainer(cont.BundlePath)
+		container.systemPid = systemPid(cont)
+		clnt.appendContainer(container)
+		clnt.unlock(cont.Id)
+
+		if err := clnt.Signal(containerID, int(syscall.SIGTERM)); err != nil {
+			logrus.Errorf("error sending sigterm to %v: %v", containerID, err)
+		}
+		select {
+		case <-time.After(10 * time.Second):
+			if err := clnt.Signal(containerID, int(syscall.SIGKILL)); err != nil {
+				logrus.Errorf("error sending sigkill to %v: %v", containerID, err)
+			}
+			select {
+			case <-time.After(2 * time.Second):
+			case <-w.wait():
+			}
+		case <-w.wait():
+		}
+	}
+	return clnt.setExited(containerID)
+}

+ 38 - 0
libcontainerd/container.go

@@ -0,0 +1,38 @@
+package libcontainerd
+
+import (
+	"fmt"
+
+	"github.com/docker/docker/restartmanager"
+)
+
+const (
+	// InitFriendlyName is the name given in the lookup map of processes
+	// for the first process started in a container.
+	InitFriendlyName = "init"
+	configFilename   = "config.json"
+)
+
+type containerCommon struct {
+	process
+	restartManager restartmanager.RestartManager
+	restarting     bool
+	processes      map[string]*process
+}
+
+// WithRestartManager sets the restartmanager to be used with the container.
+func WithRestartManager(rm restartmanager.RestartManager) CreateOption {
+	return restartManager{rm}
+}
+
+type restartManager struct {
+	rm restartmanager.RestartManager
+}
+
+func (rm restartManager) Apply(p interface{}) error {
+	if pr, ok := p.(*container); ok {
+		pr.restartManager = rm.rm
+		return nil
+	}
+	return fmt.Errorf("WithRestartManager option not supported for this client")
+}

+ 166 - 0
libcontainerd/container_linux.go

@@ -0,0 +1,166 @@
+package libcontainerd
+
+import (
+	"encoding/json"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"syscall"
+
+	"github.com/Sirupsen/logrus"
+	containerd "github.com/docker/containerd/api/grpc/types"
+	"github.com/opencontainers/specs/specs-go"
+	"golang.org/x/net/context"
+)
+
+type container struct {
+	containerCommon
+
+	// Platform specific fields are below here.
+	pauseMonitor
+	oom bool
+}
+
+func (ctr *container) clean() error {
+	if _, err := os.Lstat(ctr.dir); err != nil {
+		if os.IsNotExist(err) {
+			return nil
+		}
+		return err
+	}
+
+	syscall.Unmount(filepath.Join(ctr.dir, "rootfs"), syscall.MNT_DETACH) // ignore error
+	if err := os.RemoveAll(ctr.dir); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (ctr *container) spec() (*specs.Spec, error) {
+	var spec specs.Spec
+	dt, err := ioutil.ReadFile(filepath.Join(ctr.dir, configFilename))
+	if err != nil {
+		return nil, err
+	}
+	if err := json.Unmarshal(dt, &spec); err != nil {
+		return nil, err
+	}
+	return &spec, nil
+}
+
+func (ctr *container) start() error {
+	spec, err := ctr.spec()
+	if err != nil {
+		return nil
+	}
+	iopipe, err := ctr.openFifos(spec.Process.Terminal)
+	if err != nil {
+		return err
+	}
+
+	r := &containerd.CreateContainerRequest{
+		Id:         ctr.containerID,
+		BundlePath: ctr.dir,
+		Stdin:      ctr.fifo(syscall.Stdin),
+		Stdout:     ctr.fifo(syscall.Stdout),
+		Stderr:     ctr.fifo(syscall.Stderr),
+	}
+	ctr.client.appendContainer(ctr)
+
+	resp, err := ctr.client.remote.apiClient.CreateContainer(context.Background(), r)
+	if err != nil {
+		ctr.closeFifos(iopipe)
+		return err
+	}
+
+	if err := ctr.client.backend.AttachStreams(ctr.containerID, *iopipe); err != nil {
+		return err
+	}
+	ctr.systemPid = systemPid(resp.Container)
+
+	return ctr.client.backend.StateChanged(ctr.containerID, StateInfo{
+		State: StateStart,
+		Pid:   ctr.systemPid,
+	})
+}
+
+func (ctr *container) newProcess(friendlyName string) *process {
+	return &process{
+		dir: ctr.dir,
+		processCommon: processCommon{
+			containerID:  ctr.containerID,
+			friendlyName: friendlyName,
+			client:       ctr.client,
+		},
+	}
+}
+
+func (ctr *container) handleEvent(e *containerd.Event) error {
+	ctr.client.lock(ctr.containerID)
+	defer ctr.client.unlock(ctr.containerID)
+	switch e.Type {
+	case StateExit, StatePause, StateResume, StateOOM:
+		st := StateInfo{
+			State:     e.Type,
+			ExitCode:  e.Status,
+			OOMKilled: e.Type == StateExit && ctr.oom,
+		}
+		if e.Type == StateOOM {
+			ctr.oom = true
+		}
+		if e.Type == StateExit && e.Pid != InitFriendlyName {
+			st.ProcessID = e.Pid
+			st.State = StateExitProcess
+		}
+		if st.State == StateExit && ctr.restartManager != nil {
+			restart, wait, err := ctr.restartManager.ShouldRestart(e.Status)
+			if err != nil {
+				logrus.Error(err)
+			} else if restart {
+				st.State = StateRestart
+				ctr.restarting = true
+				go func() {
+					err := <-wait
+					ctr.restarting = false
+					if err != nil {
+						st.State = StateExit
+						ctr.client.q.append(e.Id, func() {
+							if err := ctr.client.backend.StateChanged(e.Id, st); err != nil {
+								logrus.Error(err)
+							}
+						})
+						logrus.Error(err)
+					} else {
+						ctr.start()
+					}
+				}()
+			}
+		}
+
+		// Remove process from list if we have exited
+		// We need to do so here in case the Message Handler decides to restart it.
+		if st.State == StateExit {
+			if os.Getenv("LIBCONTAINERD_NOCLEAN") != "1" {
+				ctr.clean()
+			}
+			ctr.client.deleteContainer(e.Id)
+		}
+		ctr.client.q.append(e.Id, func() {
+			if err := ctr.client.backend.StateChanged(e.Id, st); err != nil {
+				logrus.Error(err)
+			}
+			if e.Type == StatePause || e.Type == StateResume {
+				ctr.pauseMonitor.handle(e.Type)
+			}
+			if e.Type == StateExit {
+				if en := ctr.client.getExitNotifier(e.Id); en != nil {
+					en.close()
+				}
+			}
+		})
+
+	default:
+		logrus.Debugf("event unhandled: %+v", e)
+	}
+	return nil
+}

+ 31 - 0
libcontainerd/pausemonitor_linux.go

@@ -0,0 +1,31 @@
+package libcontainerd
+
+// pauseMonitor is helper to get notifications from pause state changes.
+type pauseMonitor struct {
+	waiters map[string][]chan struct{}
+}
+
+func (m *pauseMonitor) handle(t string) {
+	if m.waiters == nil {
+		return
+	}
+	q, ok := m.waiters[t]
+	if !ok {
+		return
+	}
+	if len(q) > 0 {
+		close(q[0])
+		m.waiters[t] = q[1:]
+	}
+}
+
+func (m *pauseMonitor) append(t string, waiter chan struct{}) {
+	if m.waiters == nil {
+		m.waiters = make(map[string][]chan struct{})
+	}
+	_, ok := m.waiters[t]
+	if !ok {
+		m.waiters[t] = make([]chan struct{}, 0)
+	}
+	m.waiters[t] = append(m.waiters[t], waiter)
+}

+ 18 - 0
libcontainerd/process.go

@@ -0,0 +1,18 @@
+package libcontainerd
+
+// processCommon are the platform common fields as part of the process structure
+// which keeps the state for the main container process, as well as any exec
+// processes.
+type processCommon struct {
+	client *client
+
+	// containerID is the Container ID
+	containerID string
+
+	// friendlyName is an identifier for the process (or `InitFriendlyName`
+	// for the first process)
+	friendlyName string
+
+	// systemPid is the PID of the main container process
+	systemPid uint32
+}

+ 107 - 0
libcontainerd/process_linux.go

@@ -0,0 +1,107 @@
+package libcontainerd
+
+import (
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"syscall"
+
+	containerd "github.com/docker/containerd/api/grpc/types"
+	"github.com/docker/docker/pkg/ioutils"
+	"golang.org/x/net/context"
+)
+
+var fdNames = map[int]string{
+	syscall.Stdin:  "stdin",
+	syscall.Stdout: "stdout",
+	syscall.Stderr: "stderr",
+}
+
+// process keeps the state for both main container process and exec process.
+type process struct {
+	processCommon
+
+	// Platform specific fields are below here.
+	dir string
+}
+
+func (p *process) openFifos(terminal bool) (*IOPipe, error) {
+	bundleDir := p.dir
+	if err := os.MkdirAll(bundleDir, 0700); err != nil {
+		return nil, err
+	}
+
+	for i := 0; i < 3; i++ {
+		f := p.fifo(i)
+		if err := syscall.Mkfifo(f, 0700); err != nil && !os.IsExist(err) {
+			return nil, fmt.Errorf("mkfifo: %s %v", f, err)
+		}
+	}
+
+	io := &IOPipe{}
+	stdinf, err := os.OpenFile(p.fifo(syscall.Stdin), syscall.O_RDWR, 0)
+	if err != nil {
+		return nil, err
+	}
+
+	io.Stdout = openReaderFromFifo(p.fifo(syscall.Stdout))
+	if !terminal {
+		io.Stderr = openReaderFromFifo(p.fifo(syscall.Stderr))
+	} else {
+		io.Stderr = emptyReader{}
+	}
+
+	io.Stdin = ioutils.NewWriteCloserWrapper(stdinf, func() error {
+		stdinf.Close()
+		_, err := p.client.remote.apiClient.UpdateProcess(context.Background(), &containerd.UpdateProcessRequest{
+			Id:         p.containerID,
+			Pid:        p.friendlyName,
+			CloseStdin: true,
+		})
+		return err
+	})
+
+	return io, nil
+}
+
+func (p *process) closeFifos(io *IOPipe) {
+	io.Stdin.Close()
+	closeReaderFifo(p.fifo(syscall.Stdout))
+	closeReaderFifo(p.fifo(syscall.Stderr))
+}
+
+type emptyReader struct{}
+
+func (r emptyReader) Read(b []byte) (int, error) {
+	return 0, io.EOF
+}
+
+func openReaderFromFifo(fn string) io.Reader {
+	r, w := io.Pipe()
+	go func() {
+		stdoutf, err := os.OpenFile(fn, syscall.O_RDONLY, 0)
+		if err != nil {
+			r.CloseWithError(err)
+		}
+		if _, err := io.Copy(w, stdoutf); err != nil {
+			r.CloseWithError(err)
+		}
+		w.Close()
+		stdoutf.Close()
+	}()
+	return r
+}
+
+// closeReaderFifo closes fifo that may be blocked on open by opening the write side.
+func closeReaderFifo(fn string) {
+	f, err := os.OpenFile(fn, syscall.O_WRONLY|syscall.O_NONBLOCK, 0)
+	if err != nil {
+		return
+	}
+	f.Close()
+}
+
+func (p *process) fifo(index int) string {
+	return filepath.Join(p.dir, p.friendlyName+"-"+fdNames[index])
+}

+ 29 - 0
libcontainerd/queue_linux.go

@@ -0,0 +1,29 @@
+package libcontainerd
+
+import "sync"
+
+type queue struct {
+	sync.Mutex
+	fns map[string]chan struct{}
+}
+
+func (q *queue) append(id string, f func()) {
+	q.Lock()
+	defer q.Unlock()
+
+	if q.fns == nil {
+		q.fns = make(map[string]chan struct{})
+	}
+
+	done := make(chan struct{})
+
+	fn, ok := q.fns[id]
+	q.fns[id] = done
+	go func() {
+		if ok {
+			<-fn
+		}
+		f()
+		close(done)
+	}()
+}

+ 18 - 0
libcontainerd/remote.go

@@ -0,0 +1,18 @@
+package libcontainerd
+
+// Remote on Linux defines the accesspoint to the containerd grpc API.
+// Remote on Windows is largely an unimplemented interface as there is
+// no remote containerd.
+type Remote interface {
+	// Client returns a new Client instance connected with given Backend.
+	Client(Backend) (Client, error)
+	// Cleanup stops containerd if it was started by libcontainerd.
+	// Note this is not used on Windows as there is no remote containerd.
+	Cleanup()
+}
+
+// RemoteOption allows to configure paramters of remotes.
+// This is unused on Windows.
+type RemoteOption interface {
+	Apply(Remote) error
+}

+ 401 - 0
libcontainerd/remote_linux.go

@@ -0,0 +1,401 @@
+package libcontainerd
+
+import (
+	"fmt"
+	"io"
+	"net"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strconv"
+	"sync"
+	"syscall"
+	"time"
+
+	"github.com/Sirupsen/logrus"
+	containerd "github.com/docker/containerd/api/grpc/types"
+	sysinfo "github.com/docker/docker/pkg/system"
+	"github.com/docker/docker/utils"
+	"golang.org/x/net/context"
+	"google.golang.org/grpc"
+)
+
+const (
+	maxConnectionRetryCount   = 3
+	connectionRetryDelay      = 3 * time.Second
+	containerdShutdownTimeout = 15 * time.Second
+	containerdBinary          = "containerd"
+	containerdPidFilename     = "containerd.pid"
+	containerdSockFilename    = "containerd.sock"
+	eventTimestampFilename    = "event.ts"
+)
+
+type remote struct {
+	sync.RWMutex
+	apiClient   containerd.APIClient
+	daemonPid   int
+	stateDir    string
+	rpcAddr     string
+	startDaemon bool
+	debugLog    bool
+	rpcConn     *grpc.ClientConn
+	clients     []*client
+	eventTsPath string
+	pastEvents  map[string]*containerd.Event
+}
+
+// New creates a fresh instance of libcontainerd remote.
+func New(stateDir string, options ...RemoteOption) (_ Remote, err error) {
+	defer func() {
+		if err != nil {
+			err = fmt.Errorf("Failed to connect to containerd. Please make sure containerd is installed in your PATH or you have specificed the correct address. Got error: %v", err)
+		}
+	}()
+	r := &remote{
+		stateDir:    stateDir,
+		daemonPid:   -1,
+		eventTsPath: filepath.Join(stateDir, eventTimestampFilename),
+		pastEvents:  make(map[string]*containerd.Event),
+	}
+	for _, option := range options {
+		if err := option.Apply(r); err != nil {
+			return nil, err
+		}
+	}
+
+	if err := sysinfo.MkdirAll(stateDir, 0700); err != nil {
+		return nil, err
+	}
+
+	if r.rpcAddr == "" {
+		r.rpcAddr = filepath.Join(stateDir, containerdSockFilename)
+	}
+
+	if r.startDaemon {
+		if err := r.runContainerdDaemon(); err != nil {
+			return nil, err
+		}
+	}
+
+	dialOpts := append([]grpc.DialOption{grpc.WithInsecure()},
+		grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
+			return net.DialTimeout("unix", addr, timeout)
+		}),
+	)
+	conn, err := grpc.Dial(r.rpcAddr, dialOpts...)
+	if err != nil {
+		return nil, fmt.Errorf("error connecting to containerd: %v", err)
+	}
+
+	r.rpcConn = conn
+	r.apiClient = containerd.NewAPIClient(conn)
+
+	go r.handleConnectionChange()
+
+	if err := r.startEventsMonitor(); err != nil {
+		return nil, err
+	}
+
+	return r, nil
+}
+
+func (r *remote) handleConnectionChange() {
+	var transientFailureCount = 0
+	state := grpc.Idle
+	for {
+		s, err := r.rpcConn.WaitForStateChange(context.Background(), state)
+		if err != nil {
+			break
+		}
+		state = s
+		logrus.Debugf("containerd connection state change: %v", s)
+
+		if r.daemonPid != -1 {
+			switch state {
+			case grpc.TransientFailure:
+				// Reset state to be notified of next failure
+				transientFailureCount++
+				if transientFailureCount >= maxConnectionRetryCount {
+					transientFailureCount = 0
+					if utils.IsProcessAlive(r.daemonPid) {
+						utils.KillProcess(r.daemonPid)
+					}
+					if err := r.runContainerdDaemon(); err != nil { //FIXME: Handle error
+						logrus.Errorf("error restarting containerd: %v", err)
+					}
+				} else {
+					state = grpc.Idle
+					time.Sleep(connectionRetryDelay)
+				}
+			case grpc.Shutdown:
+				// Well, we asked for it to stop, just return
+				return
+			}
+		}
+	}
+}
+
+func (r *remote) Cleanup() {
+	if r.daemonPid == -1 {
+		return
+	}
+	r.rpcConn.Close()
+	// Ask the daemon to quit
+	syscall.Kill(r.daemonPid, syscall.SIGTERM)
+
+	// Wait up to 15secs for it to stop
+	for i := time.Duration(0); i < containerdShutdownTimeout; i += time.Second {
+		if !utils.IsProcessAlive(r.daemonPid) {
+			break
+		}
+		time.Sleep(time.Second)
+	}
+
+	if utils.IsProcessAlive(r.daemonPid) {
+		logrus.Warnf("libcontainerd: containerd (%d) didn't stop within 15 secs, killing it\n", r.daemonPid)
+		syscall.Kill(r.daemonPid, syscall.SIGKILL)
+	}
+
+	// cleanup some files
+	os.Remove(filepath.Join(r.stateDir, containerdPidFilename))
+	os.Remove(filepath.Join(r.stateDir, containerdSockFilename))
+}
+
+func (r *remote) Client(b Backend) (Client, error) {
+	c := &client{
+		clientCommon: clientCommon{
+			backend:          b,
+			containerMutexes: make(map[string]*sync.Mutex),
+			containers:       make(map[string]*container),
+		},
+		remote:        r,
+		exitNotifiers: make(map[string]*exitNotifier),
+	}
+
+	r.Lock()
+	r.clients = append(r.clients, c)
+	r.Unlock()
+	return c, nil
+}
+
+func (r *remote) updateEventTimestamp(t time.Time) {
+	f, err := os.OpenFile(r.eventTsPath, syscall.O_CREAT|syscall.O_WRONLY|syscall.O_TRUNC, 0600)
+	defer f.Close()
+	if err != nil {
+		logrus.Warnf("libcontainerd: failed to open event timestamp file: %v", err)
+		return
+	}
+
+	b, err := t.MarshalText()
+	if err != nil {
+		logrus.Warnf("libcontainerd: failed to encode timestamp: %v", err)
+		return
+	}
+
+	n, err := f.Write(b)
+	if err != nil || n != len(b) {
+		logrus.Warnf("libcontainerd: failed to update event timestamp file: %v", err)
+		f.Truncate(0)
+		return
+	}
+
+}
+
+func (r *remote) getLastEventTimestamp() int64 {
+	t := time.Now()
+
+	fi, err := os.Stat(r.eventTsPath)
+	if os.IsNotExist(err) {
+		return t.Unix()
+	}
+
+	f, err := os.Open(r.eventTsPath)
+	defer f.Close()
+	if err != nil {
+		logrus.Warn("libcontainerd: Unable to access last event ts: %v", err)
+		return t.Unix()
+	}
+
+	b := make([]byte, fi.Size())
+	n, err := f.Read(b)
+	if err != nil || n != len(b) {
+		logrus.Warn("libcontainerd: Unable to read last event ts: %v", err)
+		return t.Unix()
+	}
+
+	t.UnmarshalText(b)
+
+	return t.Unix()
+}
+
+func (r *remote) startEventsMonitor() error {
+	// First, get past events
+	er := &containerd.EventsRequest{
+		Timestamp: uint64(r.getLastEventTimestamp()),
+	}
+	events, err := r.apiClient.Events(context.Background(), er)
+	if err != nil {
+		return err
+	}
+	go r.handleEventStream(events)
+	return nil
+}
+
+func (r *remote) handleEventStream(events containerd.API_EventsClient) {
+	live := false
+	for {
+		e, err := events.Recv()
+		if err != nil {
+			logrus.Errorf("failed to receive event from containerd: %v", err)
+			go r.startEventsMonitor()
+			return
+		}
+
+		if live == false {
+			logrus.Debugf("received past containerd event: %#v", e)
+
+			// Pause/Resume events should never happens after exit one
+			switch e.Type {
+			case StateExit:
+				r.pastEvents[e.Id] = e
+			case StatePause:
+				r.pastEvents[e.Id] = e
+			case StateResume:
+				r.pastEvents[e.Id] = e
+			case stateLive:
+				live = true
+				r.updateEventTimestamp(time.Unix(int64(e.Timestamp), 0))
+			}
+		} else {
+			logrus.Debugf("received containerd event: %#v", e)
+
+			var container *container
+			var c *client
+			r.RLock()
+			for _, c = range r.clients {
+				container, err = c.getContainer(e.Id)
+				if err == nil {
+					break
+				}
+			}
+			r.RUnlock()
+			if container == nil {
+				logrus.Errorf("no state for container: %q", err)
+				continue
+			}
+
+			if err := container.handleEvent(e); err != nil {
+				logrus.Errorf("error processing state change for %s: %v", e.Id, err)
+			}
+
+			r.updateEventTimestamp(time.Unix(int64(e.Timestamp), 0))
+		}
+	}
+}
+
+func (r *remote) runContainerdDaemon() error {
+	pidFilename := filepath.Join(r.stateDir, containerdPidFilename)
+	f, err := os.OpenFile(pidFilename, os.O_RDWR|os.O_CREATE, 0600)
+	defer f.Close()
+	if err != nil {
+		return err
+	}
+
+	// File exist, check if the daemon is alive
+	b := make([]byte, 8)
+	n, err := f.Read(b)
+	if err != nil && err != io.EOF {
+		return err
+	}
+
+	if n > 0 {
+		pid, err := strconv.ParseUint(string(b[:n]), 10, 64)
+		if err != nil {
+			return err
+		}
+		if utils.IsProcessAlive(int(pid)) {
+			logrus.Infof("previous instance of containerd still alive (%d)", pid)
+			r.daemonPid = int(pid)
+			return nil
+		}
+	}
+
+	// rewind the file
+	_, err = f.Seek(0, os.SEEK_SET)
+	if err != nil {
+		return err
+	}
+
+	// Truncate it
+	err = f.Truncate(0)
+	if err != nil {
+		return err
+	}
+
+	// Start a new instance
+	args := []string{"-l", r.rpcAddr}
+	if r.debugLog {
+		args = append(args, "--debug", "true")
+	}
+	cmd := exec.Command(containerdBinary, args...)
+	// TODO: store logs?
+	cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
+	if err := cmd.Start(); err != nil {
+		return err
+	}
+	logrus.Infof("New containerd process, pid: %d\n", cmd.Process.Pid)
+
+	if _, err := f.WriteString(fmt.Sprintf("%d", cmd.Process.Pid)); err != nil {
+		utils.KillProcess(cmd.Process.Pid)
+		return err
+	}
+
+	go cmd.Wait() // Reap our child when needed
+	r.daemonPid = cmd.Process.Pid
+	return nil
+}
+
+// WithRemoteAddr sets the external containerd socket to connect to.
+func WithRemoteAddr(addr string) RemoteOption {
+	return rpcAddr(addr)
+}
+
+type rpcAddr string
+
+func (a rpcAddr) Apply(r Remote) error {
+	if remote, ok := r.(*remote); ok {
+		remote.rpcAddr = string(a)
+		return nil
+	}
+	return fmt.Errorf("WithRemoteAddr option not supported for this remote")
+}
+
+// WithStartDaemon defines if libcontainerd should also run containerd daemon.
+func WithStartDaemon(start bool) RemoteOption {
+	return startDaemon(start)
+}
+
+type startDaemon bool
+
+func (s startDaemon) Apply(r Remote) error {
+	if remote, ok := r.(*remote); ok {
+		remote.startDaemon = bool(s)
+		return nil
+	}
+	return fmt.Errorf("WithStartDaemon option not supported for this remote")
+}
+
+// WithDebugLog defines if containerd debug logs will be enabled for daemon.
+func WithDebugLog(debug bool) RemoteOption {
+	return debugLog(debug)
+}
+
+type debugLog bool
+
+func (d debugLog) Apply(r Remote) error {
+	if remote, ok := r.(*remote); ok {
+		remote.debugLog = bool(d)
+		return nil
+	}
+	return fmt.Errorf("WithDebugLog option not supported for this remote")
+}

+ 59 - 0
libcontainerd/types.go

@@ -0,0 +1,59 @@
+package libcontainerd
+
+import "io"
+
+// State constants used in state change reporting.
+const (
+	StateStart        = "start-container"
+	StatePause        = "pause"
+	StateResume       = "resume"
+	StateExit         = "exit"
+	StateRestart      = "restart"
+	StateRestore      = "restore"
+	StateStartProcess = "start-process"
+	StateExitProcess  = "exit-process"
+	StateOOM          = "oom" // fake state
+	stateLive         = "live"
+)
+
+// StateInfo contains description about the new state container has entered.
+type StateInfo struct { // FIXME: event?
+	State     string
+	Pid       uint32
+	ExitCode  uint32
+	ProcessID string
+	OOMKilled bool // TODO Windows containerd factor out
+}
+
+// Backend defines callbacks that the client of the library needs to implement.
+type Backend interface {
+	StateChanged(containerID string, state StateInfo) error
+	AttachStreams(processFriendlyName string, io IOPipe) error
+}
+
+// Client provides access to containerd features.
+type Client interface {
+	Create(containerID string, spec Spec, options ...CreateOption) error
+	Signal(containerID string, sig int) error
+	AddProcess(containerID, processFriendlyName string, process Process) error
+	Resize(containerID, processFriendlyName string, width, height int) error
+	Pause(containerID string) error
+	Resume(containerID string) error
+	Restore(containerID string, options ...CreateOption) error
+	Stats(containerID string) (*Stats, error)
+	GetPidsForContainer(containerID string) ([]int, error)
+	UpdateResources(containerID string, resources Resources) error
+}
+
+// CreateOption allows to configure parameters of container creation.
+type CreateOption interface {
+	Apply(interface{}) error
+}
+
+// IOPipe contains the stdio streams.
+type IOPipe struct {
+	Stdin    io.WriteCloser
+	Stdout   io.Reader
+	Stderr   io.Reader
+	Terminal bool // Whether stderr is connected on Windows
+}

+ 44 - 0
libcontainerd/types_linux.go

@@ -0,0 +1,44 @@
+package libcontainerd
+
+import (
+	containerd "github.com/docker/containerd/api/grpc/types"
+	"github.com/opencontainers/specs/specs-go"
+)
+
+// Spec is the base configuration for the container.  It specifies platform
+// independent configuration. This information must be included when the
+// bundle is packaged for distribution.
+type Spec specs.Spec
+
+// Process contains information to start a specific application inside the container.
+type Process struct {
+	// Terminal creates an interactive terminal for the container.
+	Terminal bool `json:"terminal"`
+	// User specifies user information for the process.
+	User *User `json:"user"`
+	// Args specifies the binary and arguments for the application to execute.
+	Args []string `json:"args"`
+	// Env populates the process environment for the process.
+	Env []string `json:"env,omitempty"`
+	// Cwd is the current working directory for the process and must be
+	// relative to the container's root.
+	Cwd *string `json:"cwd"`
+	// Capabilities are linux capabilities that are kept for the container.
+	Capabilities []string `json:"capabilities,omitempty"`
+	// Rlimits specifies rlimit options to apply to the process.
+	Rlimits []specs.Rlimit `json:"rlimits,omitempty"`
+	// ApparmorProfile specified the apparmor profile for the container.
+	ApparmorProfile *string `json:"apparmorProfile,omitempty"`
+	// SelinuxProcessLabel specifies the selinux context that the container process is run as.
+	SelinuxLabel *string `json:"selinuxLabel,omitempty"`
+}
+
+// Stats contains a stats properties from containerd.
+type Stats containerd.StatsResponse
+
+// User specifies linux specific user and group information for the container's
+// main process.
+type User specs.User
+
+// Resources defines updatable container resource values.
+type Resources containerd.UpdateResource

+ 41 - 0
libcontainerd/utils_linux.go

@@ -0,0 +1,41 @@
+package libcontainerd
+
+import (
+	containerd "github.com/docker/containerd/api/grpc/types"
+	"github.com/opencontainers/specs/specs-go"
+)
+
+func getRootIDs(s specs.Spec) (int, int, error) {
+	var hasUserns bool
+	for _, ns := range s.Linux.Namespaces {
+		if ns.Type == specs.UserNamespace {
+			hasUserns = true
+			break
+		}
+	}
+	if !hasUserns {
+		return 0, 0, nil
+	}
+	uid := hostIDFromMap(0, s.Linux.UIDMappings)
+	gid := hostIDFromMap(0, s.Linux.GIDMappings)
+	return uid, gid, nil
+}
+
+func hostIDFromMap(id uint32, mp []specs.IDMapping) int {
+	for _, m := range mp {
+		if id >= m.ContainerID && id <= m.ContainerID+m.Size-1 {
+			return int(m.HostID + id - m.ContainerID)
+		}
+	}
+	return 0
+}
+
+func systemPid(ctr *containerd.Container) uint32 {
+	var pid uint32
+	for _, p := range ctr.Processes {
+		if p.Pid == InitFriendlyName {
+			pid = p.SystemPid
+		}
+	}
+	return pid
+}

+ 214 - 0
oci/defaults_linux.go

@@ -0,0 +1,214 @@
+package oci
+
+import (
+	"os"
+	"runtime"
+
+	"github.com/opencontainers/specs/specs-go"
+)
+
+func sPtr(s string) *string      { return &s }
+func rPtr(r rune) *rune          { return &r }
+func iPtr(i int64) *int64        { return &i }
+func u32Ptr(i int64) *uint32     { u := uint32(i); return &u }
+func fmPtr(i int64) *os.FileMode { fm := os.FileMode(i); return &fm }
+
+// DefaultSpec returns default oci spec used by docker.
+func DefaultSpec() specs.Spec {
+	s := specs.Spec{
+		Version: specs.Version,
+		Platform: specs.Platform{
+			OS:   runtime.GOOS,
+			Arch: runtime.GOARCH,
+		},
+	}
+	s.Mounts = []specs.Mount{
+		{
+			Destination: "/proc",
+			Type:        "proc",
+			Source:      "proc",
+			Options:     []string{"nosuid", "noexec", "nodev"},
+		},
+		{
+			Destination: "/dev",
+			Type:        "tmpfs",
+			Source:      "tmpfs",
+			Options:     []string{"nosuid", "strictatime", "mode=755"},
+		},
+		{
+			Destination: "/dev/pts",
+			Type:        "devpts",
+			Source:      "devpts",
+			Options:     []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"},
+		},
+		{
+			Destination: "/sys",
+			Type:        "sysfs",
+			Source:      "sysfs",
+			Options:     []string{"nosuid", "noexec", "nodev", "ro"},
+		},
+		{
+			Destination: "/sys/fs/cgroup",
+			Type:        "cgroup",
+			Source:      "cgroup",
+			Options:     []string{"ro", "nosuid", "noexec", "nodev"},
+		},
+		{
+			Destination: "/dev/mqueue",
+			Type:        "mqueue",
+			Source:      "mqueue",
+			Options:     []string{"nosuid", "noexec", "nodev"},
+		},
+	}
+
+	s.Process.Capabilities = []string{
+		"CAP_CHOWN",
+		"CAP_DAC_OVERRIDE",
+		"CAP_FSETID",
+		"CAP_FOWNER",
+		"CAP_MKNOD",
+		"CAP_NET_RAW",
+		"CAP_SETGID",
+		"CAP_SETUID",
+		"CAP_SETFCAP",
+		"CAP_SETPCAP",
+		"CAP_NET_BIND_SERVICE",
+		"CAP_SYS_CHROOT",
+		"CAP_KILL",
+		"CAP_AUDIT_WRITE",
+	}
+
+	s.Linux = specs.Linux{
+		Namespaces: []specs.Namespace{
+			{Type: "mount"},
+			{Type: "network"},
+			{Type: "uts"},
+			{Type: "pid"},
+			{Type: "ipc"},
+		},
+		Devices: []specs.Device{
+			{
+				Type:     "c",
+				Path:     "/dev/zero",
+				Major:    1,
+				Minor:    5,
+				FileMode: fmPtr(0666),
+				UID:      u32Ptr(0),
+				GID:      u32Ptr(0),
+			},
+			{
+				Type:     "c",
+				Path:     "/dev/null",
+				Major:    1,
+				Minor:    3,
+				FileMode: fmPtr(0666),
+				UID:      u32Ptr(0),
+				GID:      u32Ptr(0),
+			},
+			{
+				Type:     "c",
+				Path:     "/dev/urandom",
+				Major:    1,
+				Minor:    9,
+				FileMode: fmPtr(0666),
+				UID:      u32Ptr(0),
+				GID:      u32Ptr(0),
+			},
+			{
+				Type:     "c",
+				Path:     "/dev/random",
+				Major:    1,
+				Minor:    8,
+				FileMode: fmPtr(0666),
+				UID:      u32Ptr(0),
+				GID:      u32Ptr(0),
+			},
+			// {
+			// 	Type:     "c",
+			// 	Path:     "/dev/tty",
+			// 	Major:    5,
+			// 	Minor:    0,
+			// 	FileMode: fmPtr(0666),
+			// 	UID:      u32Ptr(0),
+			// 	GID:      u32Ptr(0),
+			// },
+			// {
+			// 	Type:     "c",
+			// 	Path:     "/dev/console",
+			// 	Major:    5,
+			// 	Minor:    1,
+			// 	FileMode: fmPtr(0666),
+			// 	UID:      u32Ptr(0),
+			// 	GID:      u32Ptr(0),
+			// },
+			{
+				Type:     "c",
+				Path:     "/dev/fuse",
+				Major:    10,
+				Minor:    229,
+				FileMode: fmPtr(0666),
+				UID:      u32Ptr(0),
+				GID:      u32Ptr(0),
+			},
+		},
+		Resources: &specs.Resources{
+			Devices: []specs.DeviceCgroup{
+				{
+					Allow:  false,
+					Access: sPtr("rwm"),
+				},
+				{
+					Allow:  true,
+					Type:   sPtr("c"),
+					Major:  iPtr(1),
+					Minor:  iPtr(5),
+					Access: sPtr("rwm"),
+				},
+				{
+					Allow:  true,
+					Type:   sPtr("c"),
+					Major:  iPtr(1),
+					Minor:  iPtr(3),
+					Access: sPtr("rwm"),
+				},
+				{
+					Allow:  true,
+					Type:   sPtr("c"),
+					Major:  iPtr(1),
+					Minor:  iPtr(9),
+					Access: sPtr("rwm"),
+				},
+				{
+					Allow:  true,
+					Type:   sPtr("c"),
+					Major:  iPtr(1),
+					Minor:  iPtr(8),
+					Access: sPtr("rwm"),
+				},
+				{
+					Allow:  true,
+					Type:   sPtr("c"),
+					Major:  iPtr(5),
+					Minor:  iPtr(0),
+					Access: sPtr("rwm"),
+				},
+				{
+					Allow:  true,
+					Type:   sPtr("c"),
+					Major:  iPtr(5),
+					Minor:  iPtr(1),
+					Access: sPtr("rwm"),
+				},
+				{
+					Allow:  false,
+					Type:   sPtr("c"),
+					Major:  iPtr(10),
+					Minor:  iPtr(229),
+					Access: sPtr("rwm"),
+				},
+			},
+		},
+	}
+
+	return s
+}

+ 6 - 0
pkg/system/syscall_unix.go

@@ -9,3 +9,9 @@ import "syscall"
 func Unmount(dest string) error {
 func Unmount(dest string) error {
 	return syscall.Unmount(dest, 0)
 	return syscall.Unmount(dest, 0)
 }
 }
+
+// CommandLineToArgv should not be used on Unix.
+// It simply returns commandLine in the only element in the returned array.
+func CommandLineToArgv(commandLine string) ([]string, error) {
+	return []string{commandLine}, nil
+}

+ 24 - 0
pkg/system/syscall_windows.go

@@ -3,6 +3,7 @@ package system
 import (
 import (
 	"fmt"
 	"fmt"
 	"syscall"
 	"syscall"
+	"unsafe"
 )
 )
 
 
 // OSVersion is a wrapper for Windows version information
 // OSVersion is a wrapper for Windows version information
@@ -34,3 +35,26 @@ func GetOSVersion() (OSVersion, error) {
 func Unmount(dest string) error {
 func Unmount(dest string) error {
 	return nil
 	return nil
 }
 }
+
+// CommandLineToArgv wraps the Windows syscall to turn a commandline into an argument array.
+func CommandLineToArgv(commandLine string) ([]string, error) {
+	var argc int32
+
+	argsPtr, err := syscall.UTF16PtrFromString(commandLine)
+	if err != nil {
+		return nil, err
+	}
+
+	argv, err := syscall.CommandLineToArgv(argsPtr, &argc)
+	if err != nil {
+		return nil, err
+	}
+	defer syscall.LocalFree(syscall.Handle(uintptr(unsafe.Pointer(argv))))
+
+	newArgs := make([]string, argc)
+	for i, v := range (*argv)[:argc] {
+		newArgs[i] = string(syscall.UTF16ToString((*v)[:]))
+	}
+
+	return newArgs, nil
+}

+ 118 - 0
restartmanager/restartmanager.go

@@ -0,0 +1,118 @@
+package restartmanager
+
+import (
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/docker/engine-api/types/container"
+)
+
+const (
+	backoffMultiplier = 2
+	defaultTimeout    = 100 * time.Millisecond
+)
+
+// RestartManager defines object that controls container restarting rules.
+type RestartManager interface {
+	Cancel() error
+	ShouldRestart(exitCode uint32) (bool, chan error, error)
+}
+
+type restartManager struct {
+	sync.Mutex
+	sync.Once
+	policy       container.RestartPolicy
+	failureCount int
+	timeout      time.Duration
+	active       bool
+	cancel       chan struct{}
+	canceled     bool
+}
+
+// New returns a new restartmanager based on a policy.
+func New(policy container.RestartPolicy) RestartManager {
+	return &restartManager{policy: policy, cancel: make(chan struct{})}
+}
+
+func (rm *restartManager) SetPolicy(policy container.RestartPolicy) {
+	rm.Lock()
+	rm.policy = policy
+	rm.Unlock()
+}
+
+func (rm *restartManager) ShouldRestart(exitCode uint32) (bool, chan error, error) {
+	rm.Lock()
+	unlockOnExit := true
+	defer func() {
+		if unlockOnExit {
+			rm.Unlock()
+		}
+	}()
+
+	if rm.canceled {
+		return false, nil, nil
+	}
+
+	if rm.active {
+		return false, nil, fmt.Errorf("invalid call on active restartmanager")
+	}
+
+	if exitCode != 0 {
+		rm.failureCount++
+	} else {
+		rm.failureCount = 0
+	}
+
+	if rm.timeout == 0 {
+		rm.timeout = defaultTimeout
+	} else {
+		rm.timeout *= backoffMultiplier
+	}
+
+	var restart bool
+	switch {
+	case rm.policy.IsAlways(), rm.policy.IsUnlessStopped():
+		restart = true
+	case rm.policy.IsOnFailure():
+		// the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count
+		if max := rm.policy.MaximumRetryCount; max == 0 || rm.failureCount <= max {
+			restart = exitCode != 0
+		}
+	}
+
+	if !restart {
+		rm.active = false
+		return false, nil, nil
+	}
+
+	unlockOnExit = false
+	rm.active = true
+	rm.Unlock()
+
+	ch := make(chan error)
+	go func() {
+		select {
+		case <-rm.cancel:
+			ch <- fmt.Errorf("restartmanager canceled")
+			close(ch)
+		case <-time.After(rm.timeout):
+			rm.Lock()
+			close(ch)
+			rm.active = false
+			rm.Unlock()
+		}
+	}()
+
+	return true, ch, nil
+}
+
+func (rm *restartManager) Cancel() error {
+	rm.Do(func() {
+		rm.Lock()
+		rm.canceled = true
+		close(rm.cancel)
+		rm.Unlock()
+	})
+	return nil
+}

+ 3 - 0
restartmanager/restartmanager_test.go

@@ -0,0 +1,3 @@
+package restartmanager
+
+// FIXME

+ 2 - 0
runconfig/streams.go

@@ -5,6 +5,7 @@ import (
 	"io"
 	"io"
 	"io/ioutil"
 	"io/ioutil"
 	"strings"
 	"strings"
+	"sync"
 
 
 	"github.com/docker/docker/pkg/broadcaster"
 	"github.com/docker/docker/pkg/broadcaster"
 	"github.com/docker/docker/pkg/ioutils"
 	"github.com/docker/docker/pkg/ioutils"
@@ -20,6 +21,7 @@ import (
 // copied and delivered to all StdoutPipe and StderrPipe consumers, using
 // copied and delivered to all StdoutPipe and StderrPipe consumers, using
 // a kind of "broadcaster".
 // a kind of "broadcaster".
 type StreamConfig struct {
 type StreamConfig struct {
+	sync.WaitGroup
 	stdout    *broadcaster.Unbuffered
 	stdout    *broadcaster.Unbuffered
 	stderr    *broadcaster.Unbuffered
 	stderr    *broadcaster.Unbuffered
 	stdin     io.ReadCloser
 	stdin     io.ReadCloser

+ 22 - 0
utils/process_unix.go

@@ -0,0 +1,22 @@
+// +build linux freebsd
+
+package utils
+
+import (
+	"syscall"
+)
+
+// IsProcessAlive returns true if process with a given pid is running.
+func IsProcessAlive(pid int) bool {
+	err := syscall.Kill(pid, syscall.Signal(0))
+	if err == nil || err == syscall.EPERM {
+		return true
+	}
+
+	return false
+}
+
+// KillProcess force-stops a process.
+func KillProcess(pid int) {
+	syscall.Kill(pid, syscall.SIGKILL)
+}

+ 20 - 0
utils/process_windows.go

@@ -0,0 +1,20 @@
+package utils
+
+// IsProcessAlive returns true if process with a given pid is running.
+func IsProcessAlive(pid int) bool {
+	// TODO Windows containerd. Not sure this is needed
+	//	p, err := os.FindProcess(pid)
+	//	if err == nil {
+	//		return true
+	//	}
+	return false
+}
+
+// KillProcess force-stops a process.
+func KillProcess(pid int) {
+	// TODO Windows containerd. Not sure this is needed
+	//	p, err := os.FindProcess(pid)
+	//	if err == nil {
+	//		p.Kill()
+	//	}
+}