Преглед на файлове

Merge pull request #20662 from tonistiigi/containerd-integration

Containerd integration
Jess Frazelle преди 9 години
родител
ревизия
e5a3f86e44
променени са 100 файла, в които са добавени 3741 реда и са изтрити 4269 реда
  1. 18 0
      Dockerfile
  2. 18 0
      Dockerfile.aarch64
  3. 18 0
      Dockerfile.armhf
  4. 18 0
      Dockerfile.gccgo
  5. 18 0
      Dockerfile.ppc64le
  6. 18 0
      Dockerfile.s390x
  7. 18 0
      Dockerfile.simple
  8. 0 11
      api/client/run.go
  9. 2 0
      api/server/router/container/exec.go
  10. 32 29
      container/container.go
  11. 23 38
      container/container_unix.go
  12. 8 3
      container/container_windows.go
  13. 12 11
      container/memory_store.go
  14. 4 340
      container/monitor.go
  15. 12 0
      container/mounts_unix.go
  16. 8 0
      container/mounts_windows.go
  17. 9 7
      container/state.go
  18. 3 5
      container/state_test.go
  19. 1 3
      container/state_unix.go
  20. 1 3
      container/state_windows.go
  21. 0 9
      daemon/README.md
  22. 30 0
      daemon/apparmor_default.go
  23. 6 0
      daemon/apparmor_default_unsupported.go
  24. 10 4
      daemon/caps/utils_unix.go
  25. 1 1
      daemon/config.go
  26. 5 2
      daemon/config_unix.go
  27. 3 2
      daemon/config_windows.go
  28. 3 4
      daemon/container_operations.go
  29. 56 235
      daemon/container_operations_unix.go
  30. 1 135
      daemon/container_operations_windows.go
  31. 89 75
      daemon/daemon.go
  32. 60 2
      daemon/daemon_linux.go
  33. 65 35
      daemon/daemon_linux_test.go
  34. 163 26
      daemon/daemon_unix.go
  35. 59 9
      daemon/daemon_windows.go
  36. 0 3
      daemon/delete.go
  37. 32 107
      daemon/exec.go
  38. 14 63
      daemon/exec/exec.go
  39. 26 0
      daemon/exec_linux.go
  40. 0 21
      daemon/exec_unix.go
  41. 7 5
      daemon/exec_windows.go
  42. 0 133
      daemon/execdriver/driver.go
  43. 0 323
      daemon/execdriver/driver_unix.go
  44. 0 66
      daemon/execdriver/driver_windows.go
  45. 0 15
      daemon/execdriver/execdrivers/execdrivers_freebsd.go
  46. 0 16
      daemon/execdriver/execdrivers/execdrivers_linux.go
  47. 0 14
      daemon/execdriver/execdrivers/execdrivers_windows.go
  48. 0 514
      daemon/execdriver/native/create.go
  49. 0 606
      daemon/execdriver/native/driver.go
  50. 0 14
      daemon/execdriver/native/driver_unsupported.go
  51. 0 14
      daemon/execdriver/native/driver_unsupported_nocgo.go
  52. 0 96
      daemon/execdriver/native/exec.go
  53. 0 40
      daemon/execdriver/native/init.go
  54. 0 106
      daemon/execdriver/native/template/default_template_linux.go
  55. 0 3
      daemon/execdriver/native/template/default_template_unsupported.go
  56. 0 24
      daemon/execdriver/pipes.go
  57. 0 55
      daemon/execdriver/termconsole.go
  58. 0 8
      daemon/execdriver/windows/clean.go
  59. 0 36
      daemon/execdriver/windows/commandlinebuilder.go
  60. 0 89
      daemon/execdriver/windows/exec.go
  61. 0 11
      daemon/execdriver/windows/getpids.go
  62. 0 63
      daemon/execdriver/windows/namedpipes.go
  63. 0 19
      daemon/execdriver/windows/pauseunpause.go
  64. 0 366
      daemon/execdriver/windows/run.go
  65. 0 14
      daemon/execdriver/windows/stats.go
  66. 0 24
      daemon/execdriver/windows/stdconsole.go
  67. 0 49
      daemon/execdriver/windows/terminatekill.go
  68. 0 32
      daemon/execdriver/windows/ttyconsole.go
  69. 0 14
      daemon/execdriver/windows/unsupported.go
  70. 0 14
      daemon/execdriver/windows/update.go
  71. 0 123
      daemon/execdriver/windows/windows.go
  72. 0 1
      daemon/info.go
  73. 5 5
      daemon/inspect_unix.go
  74. 3 3
      daemon/inspect_windows.go
  75. 4 0
      daemon/kill.go
  76. 143 0
      daemon/monitor.go
  77. 14 0
      daemon/monitor_linux.go
  78. 13 0
      daemon/monitor_windows.go
  79. 652 0
      daemon/oci_linux.go
  80. 204 0
      daemon/oci_windows.go
  81. 2 3
      daemon/pause.go
  82. 8 5
      daemon/resize.go
  83. 1600 0
      daemon/seccomp_default_linux.go
  84. 12 0
      daemon/seccomp_disabled.go
  85. 100 0
      daemon/seccomp_linux.go
  86. 28 27
      daemon/start.go
  87. 2 6
      daemon/stats.go
  88. 5 4
      daemon/stats_collector_unix.go
  89. 0 84
      daemon/stats_linux.go
  90. 0 14
      daemon/stats_windows.go
  91. 2 1
      daemon/top_unix.go
  92. 1 3
      daemon/unpause.go
  93. 1 1
      daemon/update.go
  94. 25 0
      daemon/update_linux.go
  95. 13 0
      daemon/update_windows.go
  96. 1 2
      daemon/volumes.go
  97. 9 10
      daemon/volumes_unix.go
  98. 11 7
      daemon/volumes_windows.go
  99. 3 2
      distribution/xfer/download_test.go
  100. 9 2
      docker/daemon.go

+ 18 - 0
Dockerfile

@@ -249,6 +249,24 @@ RUN set -x \
 	&& go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \
 	&& rm -rf "$GOPATH"
 
+# Install runc
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
+	&& git checkout -q "$RUNC_COMMIT" \
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
+
+# Install containerd
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
+	&& git checkout -q "$CONTAINERD_COMMIT" \
+	&& make && make install
+
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 ENTRYPOINT ["hack/dind"]
 

+ 18 - 0
Dockerfile.aarch64

@@ -186,6 +186,24 @@ RUN set -x \
 	&& go build -v -o /usr/local/bin/tomlv github.com/BurntSushi/toml/cmd/tomlv \
 	&& rm -rf "$GOPATH"
 
+# Install runc
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
+	&& git checkout -q "$RUNC_COMMIT" \
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
+
+# Install containerd
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
+	&& git checkout -q "$CONTAINERD_COMMIT" \
+	&& make && make install
+
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 ENTRYPOINT ["hack/dind"]
 

+ 18 - 0
Dockerfile.armhf

@@ -205,6 +205,24 @@ RUN set -x \
 	&& go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \
 	&& rm -rf "$GOPATH"
 
+# Install runc
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
+	&& git checkout -q "$RUNC_COMMIT" \
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
+
+# Install containerd
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
+	&& git checkout -q "$CONTAINERD_COMMIT" \
+	&& make && make install
+
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 ENTRYPOINT ["hack/dind"]
 

+ 18 - 0
Dockerfile.gccgo

@@ -73,6 +73,24 @@ VOLUME /var/lib/docker
 WORKDIR /go/src/github.com/docker/docker
 ENV DOCKER_BUILDTAGS apparmor seccomp selinux
 
+# Install runc
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
+	&& git checkout -q "$RUNC_COMMIT" \
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
+
+# Install containerd
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
+	&& git checkout -q "$CONTAINERD_COMMIT" \
+	&& make && make install
+
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 ENTRYPOINT ["hack/dind"]
 

+ 18 - 0
Dockerfile.ppc64le

@@ -197,6 +197,24 @@ RUN set -x \
 	&& go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \
 	&& rm -rf "$GOPATH"
 
+# Install runc
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
+	&& git checkout -q "$RUNC_COMMIT" \
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
+
+# Install containerd
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
+	&& git checkout -q "$CONTAINERD_COMMIT" \
+	&& make && make install
+
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 ENTRYPOINT ["hack/dind"]
 

+ 18 - 0
Dockerfile.s390x

@@ -176,6 +176,24 @@ RUN set -x \
 	&& go build -v -o /usr/local/bin/rsrc github.com/akavel/rsrc \
 	&& rm -rf "$GOPATH"
 
+# Install runc
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
+	&& git checkout -q "$RUNC_COMMIT" \
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
+
+# Install containerd
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
+	&& git checkout -q "$CONTAINERD_COMMIT" \
+	&& make && make install
+
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 ENTRYPOINT ["hack/dind"]
 

+ 18 - 0
Dockerfile.simple

@@ -29,6 +29,24 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 		aufs-tools \
 	&& rm -rf /var/lib/apt/lists/*
 
+# Install runc
+ENV RUNC_COMMIT bbde9c426ff363d813b8722f0744115c13b408b6
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/opencontainers/runc.git "$GOPATH/src/github.com/opencontainers/runc" \
+	&& cd "$GOPATH/src/github.com/opencontainers/runc" \
+	&& git checkout -q "$RUNC_COMMIT" \
+	&& make BUILDTAGS="seccomp apparmor selinux" && make install
+
+# Install containerd
+ENV CONTAINERD_COMMIT 7146b01a3d7aaa146414cdfb0a6c96cfba5d9091
+RUN set -x \
+	&& export GOPATH="$(mktemp -d)" \
+  && git clone git://github.com/docker/containerd.git "$GOPATH/src/github.com/docker/containerd" \
+	&& cd "$GOPATH/src/github.com/docker/containerd" \
+	&& git checkout -q "$CONTAINERD_COMMIT" \
+	&& make && make install
+
 ENV AUTO_GOPATH 1
 WORKDIR /usr/src/docker
 COPY . /usr/src/docker

+ 0 - 11
api/client/run.go

@@ -14,7 +14,6 @@ import (
 	"github.com/docker/docker/opts"
 	"github.com/docker/docker/pkg/promise"
 	"github.com/docker/docker/pkg/signal"
-	"github.com/docker/docker/pkg/stringid"
 	runconfigopts "github.com/docker/docker/runconfig/opts"
 	"github.com/docker/engine-api/types"
 	"github.com/docker/libnetwork/resolvconf/dns"
@@ -256,16 +255,6 @@ func (cli *DockerCli) CmdRun(args ...string) error {
 
 	// Attached mode
 	if *flAutoRemove {
-		// Warn user if they detached us
-		js, err := cli.client.ContainerInspect(context.Background(), createResponse.ID)
-		if err != nil {
-			return runStartContainerErr(err)
-		}
-		if js.State.Running == true || js.State.Paused == true {
-			fmt.Fprintf(cli.out, "Detached from %s, awaiting its termination in order to uphold \"--rm\".\n",
-				stringid.TruncateID(createResponse.ID))
-		}
-
 		// Autoremove: wait for the container to finish, retrieve
 		// the exit code and remove the container
 		if status, err = cli.client.ContainerWait(context.Background(), createResponse.ID); err != nil {

+ 2 - 0
api/server/router/container/exec.go

@@ -112,7 +112,9 @@ func (s *containerRouter) postContainerExecStart(ctx context.Context, w http.Res
 		if execStartCheck.Detach {
 			return err
 		}
+		stdout.Write([]byte(err.Error()))
 		logrus.Errorf("Error running exec in container: %v\n", err)
+		return err
 	}
 	return nil
 }

+ 32 - 29
container/container.go

@@ -17,7 +17,6 @@ import (
 
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/docker/daemon/exec"
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/daemon/logger"
 	"github.com/docker/docker/daemon/logger/jsonfilelog"
 	"github.com/docker/docker/daemon/network"
@@ -27,6 +26,7 @@ import (
 	"github.com/docker/docker/pkg/promise"
 	"github.com/docker/docker/pkg/signal"
 	"github.com/docker/docker/pkg/symlink"
+	"github.com/docker/docker/restartmanager"
 	"github.com/docker/docker/runconfig"
 	runconfigopts "github.com/docker/docker/runconfig/opts"
 	"github.com/docker/docker/volume"
@@ -74,13 +74,12 @@ type CommonContainer struct {
 	HasBeenManuallyStopped bool // used for unless-stopped restart policy
 	MountPoints            map[string]*volume.MountPoint
 	HostConfig             *containertypes.HostConfig `json:"-"` // do not serialize the host config in the json, otherwise we'll make the container unportable
-	Command                *execdriver.Command        `json:"-"`
-	monitor                *containerMonitor
-	ExecCommands           *exec.Store `json:"-"`
+	ExecCommands           *exec.Store                `json:"-"`
 	// logDriver for closing
-	LogDriver     logger.Logger  `json:"-"`
-	LogCopier     *logger.Copier `json:"-"`
-	attachContext *attachContext
+	LogDriver      logger.Logger  `json:"-"`
+	LogCopier      *logger.Copier `json:"-"`
+	restartManager restartmanager.RestartManager
+	attachContext  *attachContext
 }
 
 // NewBaseContainer creates a new container with its
@@ -276,19 +275,9 @@ func (container *Container) GetRootResourcePath(path string) (string, error) {
 // ExitOnNext signals to the monitor that it should not restart the container
 // after we send the kill signal.
 func (container *Container) ExitOnNext() {
-	container.monitor.ExitOnNext()
-}
-
-// Resize changes the TTY of the process running inside the container
-// to the given height and width. The container must be running.
-func (container *Container) Resize(h, w int) error {
-	if container.Command.ProcessConfig.Terminal == nil {
-		return fmt.Errorf("Container %s does not have a terminal ready", container.ID)
-	}
-	if err := container.Command.ProcessConfig.Terminal.Resize(h, w); err != nil {
-		return err
+	if container.restartManager != nil {
+		container.restartManager.Cancel()
 	}
-	return nil
 }
 
 // HostConfigPath returns the path to the container's JSON hostconfig
@@ -897,19 +886,33 @@ func (container *Container) BuildCreateEndpointOptions(n libnetwork.Network, epC
 
 // UpdateMonitor updates monitor configure for running container
 func (container *Container) UpdateMonitor(restartPolicy containertypes.RestartPolicy) {
-	monitor := container.monitor
-	// No need to update monitor if container hasn't got one
-	// monitor will be generated correctly according to container
-	if monitor == nil {
-		return
+	type policySetter interface {
+		SetPolicy(containertypes.RestartPolicy)
+	}
+
+	if rm, ok := container.RestartManager(false).(policySetter); ok {
+		rm.SetPolicy(restartPolicy)
+	}
+}
+
+// FullHostname returns hostname and optional domain appended to it.
+func (container *Container) FullHostname() string {
+	fullHostname := container.Config.Hostname
+	if container.Config.Domainname != "" {
+		fullHostname = fmt.Sprintf("%s.%s", fullHostname, container.Config.Domainname)
 	}
+	return fullHostname
+}
 
-	monitor.mux.Lock()
-	// to check whether restart policy has changed.
-	if restartPolicy.Name != "" && !monitor.restartPolicy.IsSame(&restartPolicy) {
-		monitor.restartPolicy = restartPolicy
+// RestartManager returns the current restartmanager instace connected to container.
+func (container *Container) RestartManager(reset bool) restartmanager.RestartManager {
+	if reset {
+		container.RestartCount = 0
+	}
+	if container.restartManager == nil {
+		container.restartManager = restartmanager.New(container.HostConfig.RestartPolicy)
 	}
-	monitor.mux.Unlock()
+	return container.restartManager
 }
 
 type attachContext struct {

+ 23 - 38
container/container_unix.go

@@ -11,7 +11,6 @@ import (
 	"syscall"
 
 	"github.com/Sirupsen/logrus"
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/pkg/chrootarchive"
 	"github.com/docker/docker/pkg/symlink"
 	"github.com/docker/docker/pkg/system"
@@ -39,6 +38,15 @@ type Container struct {
 	NoNewPrivileges bool
 }
 
+// ExitStatus provides exit reasons for a container.
+type ExitStatus struct {
+	// The exit code with which the container exited.
+	ExitCode int
+
+	// Whether the container encountered an OOM.
+	OOMKilled bool
+}
+
 // CreateDaemonEnvironment returns the list of all environment variables given the list of
 // environment variables related to links.
 // Sets PATH, HOSTNAME and if container.Config.Tty is set: TERM.
@@ -57,7 +65,6 @@ func (container *Container) CreateDaemonEnvironment(linkedEnv []string) []string
 	// we need to replace the 'env' keys where they match and append anything
 	// else.
 	env = utils.ReplaceOrAppendEnvValues(env, container.Config.Env)
-
 	return env
 }
 
@@ -103,8 +110,8 @@ func appendNetworkMounts(container *Container, volumeMounts []volume.MountPoint)
 }
 
 // NetworkMounts returns the list of network mounts.
-func (container *Container) NetworkMounts() []execdriver.Mount {
-	var mounts []execdriver.Mount
+func (container *Container) NetworkMounts() []Mount {
+	var mounts []Mount
 	shared := container.HostConfig.NetworkMode.IsContainer()
 	if container.ResolvConfPath != "" {
 		if _, err := os.Stat(container.ResolvConfPath); err != nil {
@@ -115,7 +122,7 @@ func (container *Container) NetworkMounts() []execdriver.Mount {
 			if m, exists := container.MountPoints["/etc/resolv.conf"]; exists {
 				writable = m.RW
 			}
-			mounts = append(mounts, execdriver.Mount{
+			mounts = append(mounts, Mount{
 				Source:      container.ResolvConfPath,
 				Destination: "/etc/resolv.conf",
 				Writable:    writable,
@@ -132,7 +139,7 @@ func (container *Container) NetworkMounts() []execdriver.Mount {
 			if m, exists := container.MountPoints["/etc/hostname"]; exists {
 				writable = m.RW
 			}
-			mounts = append(mounts, execdriver.Mount{
+			mounts = append(mounts, Mount{
 				Source:      container.HostnamePath,
 				Destination: "/etc/hostname",
 				Writable:    writable,
@@ -149,7 +156,7 @@ func (container *Container) NetworkMounts() []execdriver.Mount {
 			if m, exists := container.MountPoints["/etc/hosts"]; exists {
 				writable = m.RW
 			}
-			mounts = append(mounts, execdriver.Mount{
+			mounts = append(mounts, Mount{
 				Source:      container.HostsPath,
 				Destination: "/etc/hosts",
 				Writable:    writable,
@@ -224,37 +231,26 @@ func (container *Container) UnmountIpcMounts(unmount func(pth string) error) {
 }
 
 // IpcMounts returns the list of IPC mounts
-func (container *Container) IpcMounts() []execdriver.Mount {
-	var mounts []execdriver.Mount
+func (container *Container) IpcMounts() []Mount {
+	var mounts []Mount
 
 	if !container.HasMountFor("/dev/shm") {
 		label.SetFileLabel(container.ShmPath, container.MountLabel)
-		mounts = append(mounts, execdriver.Mount{
+		mounts = append(mounts, Mount{
 			Source:      container.ShmPath,
 			Destination: "/dev/shm",
 			Writable:    true,
 			Propagation: volume.DefaultPropagationMode,
 		})
 	}
-	return mounts
-}
 
-func updateCommand(c *execdriver.Command, resources containertypes.Resources) {
-	c.Resources.BlkioWeight = resources.BlkioWeight
-	c.Resources.CPUShares = resources.CPUShares
-	c.Resources.CPUPeriod = resources.CPUPeriod
-	c.Resources.CPUQuota = resources.CPUQuota
-	c.Resources.CpusetCpus = resources.CpusetCpus
-	c.Resources.CpusetMems = resources.CpusetMems
-	c.Resources.Memory = resources.Memory
-	c.Resources.MemorySwap = resources.MemorySwap
-	c.Resources.MemoryReservation = resources.MemoryReservation
-	c.Resources.KernelMemory = resources.KernelMemory
+	return mounts
 }
 
 // UpdateContainer updates configuration of a container.
 func (container *Container) UpdateContainer(hostConfig *containertypes.HostConfig) error {
 	container.Lock()
+	defer container.Unlock()
 
 	// update resources of container
 	resources := hostConfig.Resources
@@ -294,19 +290,8 @@ func (container *Container) UpdateContainer(hostConfig *containertypes.HostConfi
 	if hostConfig.RestartPolicy.Name != "" {
 		container.HostConfig.RestartPolicy = hostConfig.RestartPolicy
 	}
-	container.Unlock()
-
-	// If container is not running, update hostConfig struct is enough,
-	// resources will be updated when the container is started again.
-	// If container is running (including paused), we need to update
-	// the command so we can update configs to the real world.
-	if container.IsRunning() {
-		container.Lock()
-		updateCommand(container.Command, *cResources)
-		container.Unlock()
-	}
 
-	if err := container.ToDiskLocking(); err != nil {
+	if err := container.ToDisk(); err != nil {
 		logrus.Errorf("Error saving updated container: %v", err)
 		return err
 	}
@@ -400,10 +385,10 @@ func copyOwnership(source, destination string) error {
 }
 
 // TmpfsMounts returns the list of tmpfs mounts
-func (container *Container) TmpfsMounts() []execdriver.Mount {
-	var mounts []execdriver.Mount
+func (container *Container) TmpfsMounts() []Mount {
+	var mounts []Mount
 	for dest, data := range container.HostConfig.Tmpfs {
-		mounts = append(mounts, execdriver.Mount{
+		mounts = append(mounts, Mount{
 			Source:      "tmpfs",
 			Destination: dest,
 			Data:        data,

+ 8 - 3
container/container_windows.go

@@ -7,7 +7,6 @@ import (
 	"os"
 	"path/filepath"
 
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/volume"
 	containertypes "github.com/docker/engine-api/types/container"
 )
@@ -23,6 +22,12 @@ type Container struct {
 	// Fields below here are platform specific.
 }
 
+// ExitStatus provides exit reasons for a container.
+type ExitStatus struct {
+	// The exit code with which the container exited.
+	ExitCode int
+}
+
 // CreateDaemonEnvironment creates a new environment variable slice for this container.
 func (container *Container) CreateDaemonEnvironment(linkedEnv []string) []string {
 	// On Windows, nothing to link. Just return the container environment.
@@ -35,7 +40,7 @@ func (container *Container) UnmountIpcMounts(unmount func(pth string) error) {
 }
 
 // IpcMounts returns the list of Ipc related mounts.
-func (container *Container) IpcMounts() []execdriver.Mount {
+func (container *Container) IpcMounts() []Mount {
 	return nil
 }
 
@@ -45,7 +50,7 @@ func (container *Container) UnmountVolumes(forceSyscall bool, volumeEventLog fun
 }
 
 // TmpfsMounts returns the list of tmpfs mounts
-func (container *Container) TmpfsMounts() []execdriver.Mount {
+func (container *Container) TmpfsMounts() []Mount {
 	return nil
 }
 

+ 12 - 11
container/memory_store.go

@@ -5,7 +5,7 @@ import "sync"
 // memoryStore implements a Store in memory.
 type memoryStore struct {
 	s map[string]*Container
-	sync.Mutex
+	sync.RWMutex
 }
 
 // NewMemoryStore initializes a new memory store.
@@ -25,9 +25,9 @@ func (c *memoryStore) Add(id string, cont *Container) {
 
 // Get returns a container from the store by id.
 func (c *memoryStore) Get(id string) *Container {
-	c.Lock()
+	c.RLock()
 	res := c.s[id]
-	c.Unlock()
+	c.RUnlock()
 	return res
 }
 
@@ -42,26 +42,26 @@ func (c *memoryStore) Delete(id string) {
 // The containers are ordered by creation date.
 func (c *memoryStore) List() []*Container {
 	containers := new(History)
-	c.Lock()
+	c.RLock()
 	for _, cont := range c.s {
 		containers.Add(cont)
 	}
-	c.Unlock()
+	c.RUnlock()
 	containers.sort()
 	return *containers
 }
 
 // Size returns the number of containers in the store.
 func (c *memoryStore) Size() int {
-	c.Lock()
-	defer c.Unlock()
+	c.RLock()
+	defer c.RUnlock()
 	return len(c.s)
 }
 
 // First returns the first container found in the store by a given filter.
 func (c *memoryStore) First(filter StoreFilter) *Container {
-	c.Lock()
-	defer c.Unlock()
+	c.RLock()
+	defer c.RUnlock()
 	for _, cont := range c.s {
 		if filter(cont) {
 			return cont
@@ -72,9 +72,10 @@ func (c *memoryStore) First(filter StoreFilter) *Container {
 
 // ApplyAll calls the reducer function with every container in the store.
 // This operation is asyncronous in the memory store.
+// NOTE: Modifications to the store MUST NOT be done by the StoreReducer.
 func (c *memoryStore) ApplyAll(apply StoreReducer) {
-	c.Lock()
-	defer c.Unlock()
+	c.RLock()
+	defer c.RUnlock()
 
 	wg := new(sync.WaitGroup)
 	for _, cont := range c.s {

+ 4 - 340
container/monitor.go

@@ -1,24 +1,13 @@
 package container
 
 import (
-	"fmt"
-	"io"
-	"os/exec"
-	"strings"
-	"sync"
-	"syscall"
 	"time"
 
 	"github.com/Sirupsen/logrus"
-	"github.com/docker/docker/daemon/execdriver"
-	"github.com/docker/docker/pkg/promise"
-	"github.com/docker/docker/pkg/stringid"
-	"github.com/docker/engine-api/types/container"
 )
 
 const (
-	defaultTimeIncrement = 100
-	loggerCloseTimeout   = 10 * time.Second
+	loggerCloseTimeout = 10 * time.Second
 )
 
 // supervisor defines the interface that a supervisor must implement
@@ -30,311 +19,13 @@ type supervisor interface {
 	// StartLogging starts the logging driver for the container
 	StartLogging(*Container) error
 	// Run starts a container
-	Run(c *Container, pipes *execdriver.Pipes, startCallback execdriver.DriverCallback) (execdriver.ExitStatus, error)
+	Run(c *Container) error
 	// IsShuttingDown tells whether the supervisor is shutting down or not
 	IsShuttingDown() bool
 }
 
-// containerMonitor monitors the execution of a container's main process.
-// If a restart policy is specified for the container the monitor will ensure that the
-// process is restarted based on the rules of the policy.  When the container is finally stopped
-// the monitor will reset and cleanup any of the container resources such as networking allocations
-// and the rootfs
-type containerMonitor struct {
-	mux sync.Mutex
-
-	// supervisor keeps track of the container and the events it generates
-	supervisor supervisor
-
-	// container is the container being monitored
-	container *Container
-
-	// restartPolicy is the current policy being applied to the container monitor
-	restartPolicy container.RestartPolicy
-
-	// failureCount is the number of times the container has failed to
-	// start in a row
-	failureCount int
-
-	// shouldStop signals the monitor that the next time the container exits it is
-	// either because docker or the user asked for the container to be stopped
-	shouldStop bool
-
-	// startSignal is a channel that is closes after the container initially starts
-	startSignal chan struct{}
-
-	// stopChan is used to signal to the monitor whenever there is a wait for the
-	// next restart so that the timeIncrement is not honored and the user is not
-	// left waiting for nothing to happen during this time
-	stopChan chan struct{}
-
-	// timeIncrement is the amount of time to wait between restarts
-	// this is in milliseconds
-	timeIncrement int
-
-	// lastStartTime is the time which the monitor last exec'd the container's process
-	lastStartTime time.Time
-}
-
-// StartMonitor initializes a containerMonitor for this container with the provided supervisor and restart policy
-// and starts the container's process.
-func (container *Container) StartMonitor(s supervisor) error {
-	container.monitor = &containerMonitor{
-		supervisor:    s,
-		container:     container,
-		restartPolicy: container.HostConfig.RestartPolicy,
-		timeIncrement: defaultTimeIncrement,
-		stopChan:      make(chan struct{}),
-		startSignal:   make(chan struct{}),
-	}
-
-	return container.monitor.wait()
-}
-
-// wait starts the container and wait until
-// we either receive an error from the initial start of the container's
-// process or until the process is running in the container
-func (m *containerMonitor) wait() error {
-	select {
-	case <-m.startSignal:
-	case err := <-promise.Go(m.start):
-		return err
-	}
-
-	return nil
-}
-
-// Stop signals to the container monitor that it should stop monitoring the container
-// for exits the next time the process dies
-func (m *containerMonitor) ExitOnNext() {
-	m.mux.Lock()
-
-	// we need to protect having a double close of the channel when stop is called
-	// twice or else we will get a panic
-	if !m.shouldStop {
-		m.shouldStop = true
-		close(m.stopChan)
-	}
-
-	m.mux.Unlock()
-}
-
-// Close closes the container's resources such as networking allocations and
-// unmounts the container's root filesystem
-func (m *containerMonitor) Close() error {
-	// Cleanup networking and mounts
-	m.supervisor.Cleanup(m.container)
-
-	if err := m.container.ToDisk(); err != nil {
-		logrus.Errorf("Error dumping container %s state to disk: %s", m.container.ID, err)
-
-		return err
-	}
-
-	return nil
-}
-
-// Start starts the containers process and monitors it according to the restart policy
-func (m *containerMonitor) start() error {
-	var (
-		err        error
-		exitStatus execdriver.ExitStatus
-		// this variable indicates where we in execution flow:
-		// before Run or after
-		afterRun bool
-	)
-
-	// ensure that when the monitor finally exits we release the networking and unmount the rootfs
-	defer func() {
-		if afterRun {
-			m.container.Lock()
-			defer m.container.Unlock()
-			m.container.SetStopped(&exitStatus)
-		}
-		m.Close()
-	}()
-	// reset stopped flag
-	if m.container.HasBeenManuallyStopped {
-		m.container.HasBeenManuallyStopped = false
-	}
-
-	// reset the restart count
-	m.container.RestartCount = -1
-
-	for {
-		m.container.RestartCount++
-
-		if err := m.supervisor.StartLogging(m.container); err != nil {
-			m.resetContainer(false)
-
-			return err
-		}
-
-		pipes := execdriver.NewPipes(m.container.Stdin(), m.container.Stdout(), m.container.Stderr(), m.container.Config.OpenStdin)
-
-		m.logEvent("start")
-
-		m.lastStartTime = time.Now()
-
-		if exitStatus, err = m.supervisor.Run(m.container, pipes, m.callback); err != nil {
-			// if we receive an internal error from the initial start of a container then lets
-			// return it instead of entering the restart loop
-			// set to 127 for container cmd not found/does not exist)
-			if strings.Contains(err.Error(), "executable file not found") ||
-				strings.Contains(err.Error(), "no such file or directory") ||
-				strings.Contains(err.Error(), "system cannot find the file specified") {
-				if m.container.RestartCount == 0 {
-					m.container.ExitCode = 127
-					m.resetContainer(false)
-					return fmt.Errorf("Container command not found or does not exist.")
-				}
-			}
-			// set to 126 for container cmd can't be invoked errors
-			if strings.Contains(err.Error(), syscall.EACCES.Error()) {
-				if m.container.RestartCount == 0 {
-					m.container.ExitCode = 126
-					m.resetContainer(false)
-					return fmt.Errorf("Container command could not be invoked.")
-				}
-			}
-
-			if m.container.RestartCount == 0 {
-				m.container.ExitCode = -1
-				m.resetContainer(false)
-
-				return fmt.Errorf("Cannot start container %s: %v", m.container.ID, err)
-			}
-
-			logrus.Errorf("Error running container: %s", err)
-		}
-
-		// here container.Lock is already lost
-		afterRun = true
-
-		m.resetMonitor(err == nil && exitStatus.ExitCode == 0)
-
-		if m.shouldRestart(exitStatus.ExitCode) {
-			m.container.SetRestartingLocking(&exitStatus)
-			m.logEvent("die")
-			m.resetContainer(true)
-
-			// sleep with a small time increment between each restart to help avoid issues cased by quickly
-			// restarting the container because of some types of errors ( networking cut out, etc... )
-			m.waitForNextRestart()
-
-			// we need to check this before reentering the loop because the waitForNextRestart could have
-			// been terminated by a request from a user
-			if m.shouldStop {
-				return err
-			}
-			continue
-		}
-
-		m.logEvent("die")
-		m.resetContainer(true)
-		return err
-	}
-}
-
-// resetMonitor resets the stateful fields on the containerMonitor based on the
-// previous runs success or failure.  Regardless of success, if the container had
-// an execution time of more than 10s then reset the timer back to the default
-func (m *containerMonitor) resetMonitor(successful bool) {
-	executionTime := time.Now().Sub(m.lastStartTime).Seconds()
-
-	if executionTime > 10 {
-		m.timeIncrement = defaultTimeIncrement
-	} else {
-		// otherwise we need to increment the amount of time we wait before restarting
-		// the process.  We will build up by multiplying the increment by 2
-		m.timeIncrement *= 2
-	}
-
-	// the container exited successfully so we need to reset the failure counter
-	if successful {
-		m.failureCount = 0
-	} else {
-		m.failureCount++
-	}
-}
-
-// waitForNextRestart waits with the default time increment to restart the container unless
-// a user or docker asks for the container to be stopped
-func (m *containerMonitor) waitForNextRestart() {
-	select {
-	case <-time.After(time.Duration(m.timeIncrement) * time.Millisecond):
-	case <-m.stopChan:
-	}
-}
-
-// shouldRestart checks the restart policy and applies the rules to determine if
-// the container's process should be restarted
-func (m *containerMonitor) shouldRestart(exitCode int) bool {
-	m.mux.Lock()
-	defer m.mux.Unlock()
-
-	// do not restart if the user or docker has requested that this container be stopped
-	if m.shouldStop {
-		m.container.HasBeenManuallyStopped = !m.supervisor.IsShuttingDown()
-		return false
-	}
-
-	switch {
-	case m.restartPolicy.IsAlways(), m.restartPolicy.IsUnlessStopped():
-		return true
-	case m.restartPolicy.IsOnFailure():
-		// the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count
-		if max := m.restartPolicy.MaximumRetryCount; max != 0 && m.failureCount > max {
-			logrus.Debugf("stopping restart of container %s because maximum failure could of %d has been reached",
-				stringid.TruncateID(m.container.ID), max)
-			return false
-		}
-
-		return exitCode != 0
-	}
-
-	return false
-}
-
-// callback ensures that the container's state is properly updated after we
-// received ack from the execution drivers
-func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error {
-	go func() {
-		for range chOOM {
-			m.logEvent("oom")
-		}
-	}()
-
-	if processConfig.Tty {
-		// The callback is called after the process start()
-		// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlave
-		// which we close here.
-		if c, ok := processConfig.Stdout.(io.Closer); ok {
-			c.Close()
-		}
-	}
-
-	m.container.SetRunning(pid)
-
-	// signal that the process has started
-	// close channel only if not closed
-	select {
-	case <-m.startSignal:
-	default:
-		close(m.startSignal)
-	}
-
-	if err := m.container.ToDiskLocking(); err != nil {
-		logrus.Errorf("Error saving container to disk: %v", err)
-	}
-	return nil
-}
-
-// resetContainer resets the container's IO and ensures that the command is able to be executed again
-// by copying the data into a new struct
-// if lock is true, then container locked during reset
-func (m *containerMonitor) resetContainer(lock bool) {
-	container := m.container
+// Reset puts a container into a state where it can be restarted again.
+func (container *Container) Reset(lock bool) {
 	if lock {
 		container.Lock()
 		defer container.Unlock()
@@ -344,12 +35,6 @@ func (m *containerMonitor) resetContainer(lock bool) {
 		logrus.Errorf("%s: %s", container.ID, err)
 	}
 
-	if container.Command != nil && container.Command.ProcessConfig.Terminal != nil {
-		if err := container.Command.ProcessConfig.Terminal.Close(); err != nil {
-			logrus.Errorf("%s: Error closing terminal: %s", container.ID, err)
-		}
-	}
-
 	// Re-create a brand new stdin pipe once the container exited
 	if container.Config.OpenStdin {
 		container.NewInputPipes()
@@ -365,9 +50,6 @@ func (m *containerMonitor) resetContainer(lock bool) {
 			select {
 			case <-time.After(loggerCloseTimeout):
 				logrus.Warnf("Logger didn't exit in time: logs may be truncated")
-				container.LogCopier.Close()
-				// always waits for the LogCopier to finished before closing
-				<-exit
 			case <-exit:
 			}
 		}
@@ -375,22 +57,4 @@ func (m *containerMonitor) resetContainer(lock bool) {
 		container.LogCopier = nil
 		container.LogDriver = nil
 	}
-
-	c := container.Command.ProcessConfig.Cmd
-
-	container.Command.ProcessConfig.Cmd = exec.Cmd{
-		Stdin:       c.Stdin,
-		Stdout:      c.Stdout,
-		Stderr:      c.Stderr,
-		Path:        c.Path,
-		Env:         c.Env,
-		ExtraFiles:  c.ExtraFiles,
-		Args:        c.Args,
-		Dir:         c.Dir,
-		SysProcAttr: c.SysProcAttr,
-	}
-}
-
-func (m *containerMonitor) logEvent(action string) {
-	m.supervisor.LogContainerEvent(m.container, action)
 }

+ 12 - 0
container/mounts_unix.go

@@ -0,0 +1,12 @@
+// +build !windows
+
+package container
+
+// Mount contains information for a mount operation.
+type Mount struct {
+	Source      string `json:"source"`
+	Destination string `json:"destination"`
+	Writable    bool   `json:"writable"`
+	Data        string `json:"data"`
+	Propagation string `json:"mountpropagation"`
+}

+ 8 - 0
container/mounts_windows.go

@@ -0,0 +1,8 @@
+package container
+
+// Mount contains information for a mount operation.
+type Mount struct {
+	Source      string `json:"source"`
+	Destination string `json:"destination"`
+	Writable    bool   `json:"writable"`
+}

+ 9 - 7
container/state.go

@@ -5,7 +5,6 @@ import (
 	"sync"
 	"time"
 
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/go-units"
 )
 
@@ -179,28 +178,31 @@ func (s *State) getExitCode() int {
 }
 
 // SetRunning sets the state of the container to "running".
-func (s *State) SetRunning(pid int) {
+func (s *State) SetRunning(pid int, initial bool) {
 	s.Error = ""
 	s.Running = true
 	s.Paused = false
 	s.Restarting = false
 	s.ExitCode = 0
 	s.Pid = pid
-	s.StartedAt = time.Now().UTC()
+	if initial {
+		s.StartedAt = time.Now().UTC()
+	}
 	close(s.waitChan) // fire waiters for start
 	s.waitChan = make(chan struct{})
 }
 
 // SetStoppedLocking locks the container state is sets it to "stopped".
-func (s *State) SetStoppedLocking(exitStatus *execdriver.ExitStatus) {
+func (s *State) SetStoppedLocking(exitStatus *ExitStatus) {
 	s.Lock()
 	s.SetStopped(exitStatus)
 	s.Unlock()
 }
 
 // SetStopped sets the container state to "stopped" without locking.
-func (s *State) SetStopped(exitStatus *execdriver.ExitStatus) {
+func (s *State) SetStopped(exitStatus *ExitStatus) {
 	s.Running = false
+	s.Paused = false
 	s.Restarting = false
 	s.Pid = 0
 	s.FinishedAt = time.Now().UTC()
@@ -211,7 +213,7 @@ func (s *State) SetStopped(exitStatus *execdriver.ExitStatus) {
 
 // SetRestartingLocking is when docker handles the auto restart of containers when they are
 // in the middle of a stop and being restarted again
-func (s *State) SetRestartingLocking(exitStatus *execdriver.ExitStatus) {
+func (s *State) SetRestartingLocking(exitStatus *ExitStatus) {
 	s.Lock()
 	s.SetRestarting(exitStatus)
 	s.Unlock()
@@ -219,7 +221,7 @@ func (s *State) SetRestartingLocking(exitStatus *execdriver.ExitStatus) {
 
 // SetRestarting sets the container state to "restarting".
 // It also sets the container PID to 0.
-func (s *State) SetRestarting(exitStatus *execdriver.ExitStatus) {
+func (s *State) SetRestarting(exitStatus *ExitStatus) {
 	// we should consider the container running when it is restarting because of
 	// all the checks in docker around rm/stop/etc
 	s.Running = true

+ 3 - 5
container/state_test.go

@@ -4,8 +4,6 @@ import (
 	"sync/atomic"
 	"testing"
 	"time"
-
-	"github.com/docker/docker/daemon/execdriver"
 )
 
 func TestStateRunStop(t *testing.T) {
@@ -19,7 +17,7 @@ func TestStateRunStop(t *testing.T) {
 			close(started)
 		}()
 		s.Lock()
-		s.SetRunning(i + 100)
+		s.SetRunning(i+100, false)
 		s.Unlock()
 
 		if !s.IsRunning() {
@@ -52,7 +50,7 @@ func TestStateRunStop(t *testing.T) {
 			atomic.StoreInt64(&exit, int64(exitCode))
 			close(stopped)
 		}()
-		s.SetStoppedLocking(&execdriver.ExitStatus{ExitCode: i})
+		s.SetStoppedLocking(&ExitStatus{ExitCode: i})
 		if s.IsRunning() {
 			t.Fatal("State is running")
 		}
@@ -93,7 +91,7 @@ func TestStateTimeoutWait(t *testing.T) {
 	}
 
 	s.Lock()
-	s.SetRunning(49)
+	s.SetRunning(49, false)
 	s.Unlock()
 
 	stopped := make(chan struct{})

+ 1 - 3
container/state_unix.go

@@ -2,11 +2,9 @@
 
 package container
 
-import "github.com/docker/docker/daemon/execdriver"
-
 // setFromExitStatus is a platform specific helper function to set the state
 // based on the ExitStatus structure.
-func (s *State) setFromExitStatus(exitStatus *execdriver.ExitStatus) {
+func (s *State) setFromExitStatus(exitStatus *ExitStatus) {
 	s.ExitCode = exitStatus.ExitCode
 	s.OOMKilled = exitStatus.OOMKilled
 }

+ 1 - 3
container/state_windows.go

@@ -1,9 +1,7 @@
 package container
 
-import "github.com/docker/docker/daemon/execdriver"
-
 // setFromExitStatus is a platform specific helper function to set the state
 // based on the ExitStatus structure.
-func (s *State) setFromExitStatus(exitStatus *execdriver.ExitStatus) {
+func (s *State) setFromExitStatus(exitStatus *ExitStatus) {
 	s.ExitCode = exitStatus.ExitCode
 }

+ 0 - 9
daemon/README.md

@@ -1,9 +0,0 @@
-This directory contains code pertaining to running containers and storing images
-
-Code pertaining to running containers:
-
- - execdriver
-
-Code pertaining to storing images:
-
- - graphdriver

+ 30 - 0
daemon/apparmor_default.go

@@ -0,0 +1,30 @@
+// +build linux
+
+package daemon
+
+import (
+	"github.com/Sirupsen/logrus"
+	aaprofile "github.com/docker/docker/profiles/apparmor"
+	"github.com/opencontainers/runc/libcontainer/apparmor"
+)
+
+// Define constants for native driver
+const (
+	defaultApparmorProfile = "docker-default"
+)
+
+func installDefaultAppArmorProfile() {
+	if apparmor.IsEnabled() {
+		if err := aaprofile.InstallDefault(defaultApparmorProfile); err != nil {
+			apparmorProfiles := []string{defaultApparmorProfile}
+
+			// Allow daemon to run if loading failed, but are active
+			// (possibly through another run, manually, or via system startup)
+			for _, policy := range apparmorProfiles {
+				if err := aaprofile.IsLoaded(policy); err != nil {
+					logrus.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy)
+				}
+			}
+		}
+	}
+}

+ 6 - 0
daemon/apparmor_default_unsupported.go

@@ -0,0 +1,6 @@
+// +build !linux
+
+package daemon
+
+func installDefaultAppArmorProfile() {
+}

+ 10 - 4
daemon/execdriver/utils_unix.go → daemon/caps/utils_unix.go

@@ -1,6 +1,6 @@
 // +build !windows
 
-package execdriver
+package caps
 
 import (
 	"fmt"
@@ -24,7 +24,7 @@ func init() {
 		}
 		capabilityList = append(capabilityList,
 			&CapabilityMapping{
-				Key:   strings.ToUpper(cap.String()),
+				Key:   "CAP_" + strings.ToUpper(cap.String()),
 				Value: cap,
 			},
 		)
@@ -77,12 +77,16 @@ func TweakCapabilities(basics, adds, drops []string) ([]string, error) {
 		allCaps = GetAllCapabilities()
 	)
 
+	// FIXME(tonistiigi): docker format is without CAP_ prefix, oci is with prefix
+	// Currently they are mixed in here. We should do conversion in one place.
+
 	// look for invalid cap in the drop list
 	for _, cap := range drops {
 		if strings.ToLower(cap) == "all" {
 			continue
 		}
-		if !stringutils.InSlice(allCaps, cap) {
+
+		if !stringutils.InSlice(allCaps, "CAP_"+cap) {
 			return nil, fmt.Errorf("Unknown capability drop: %q", cap)
 		}
 	}
@@ -100,7 +104,7 @@ func TweakCapabilities(basics, adds, drops []string) ([]string, error) {
 			}
 
 			// if we don't drop `all`, add back all the non-dropped caps
-			if !stringutils.InSlice(drops, cap) {
+			if !stringutils.InSlice(drops, cap[4:]) {
 				newCaps = append(newCaps, strings.ToUpper(cap))
 			}
 		}
@@ -112,6 +116,8 @@ func TweakCapabilities(basics, adds, drops []string) ([]string, error) {
 			continue
 		}
 
+		cap = "CAP_" + cap
+
 		if !stringutils.InSlice(allCaps, cap) {
 			return nil, fmt.Errorf("Unknown capability to add: %q", cap)
 		}

+ 1 - 1
daemon/config.go

@@ -115,7 +115,7 @@ func (config *Config) InstallCommonFlags(cmd *flag.FlagSet, usageFn func(string)
 	cmd.Var(opts.NewNamedListOptsRef("exec-opts", &config.ExecOptions, nil), []string{"-exec-opt"}, usageFn("Set exec driver options"))
 	cmd.StringVar(&config.Pidfile, []string{"p", "-pidfile"}, defaultPidFile, usageFn("Path to use for daemon PID file"))
 	cmd.StringVar(&config.Root, []string{"g", "-graph"}, defaultGraph, usageFn("Root of the Docker runtime"))
-	cmd.StringVar(&config.ExecRoot, []string{"-exec-root"}, "/var/run/docker", usageFn("Root of the Docker execdriver"))
+	cmd.StringVar(&config.ExecRoot, []string{"-exec-root"}, defaultExecRoot, usageFn("Root of the Docker execdriver"))
 	cmd.BoolVar(&config.AutoRestart, []string{"#r", "#-restart"}, true, usageFn("--restart on the daemon has been deprecated in favor of --restart policies on docker run"))
 	cmd.StringVar(&config.GraphDriver, []string{"s", "-storage-driver"}, "", usageFn("Storage driver to use"))
 	cmd.IntVar(&config.Mtu, []string{"#mtu", "-mtu"}, 0, usageFn("Set the containers network MTU"))

+ 5 - 2
daemon/config_unix.go

@@ -12,8 +12,9 @@ import (
 )
 
 var (
-	defaultPidFile = "/var/run/docker.pid"
-	defaultGraph   = "/var/lib/docker"
+	defaultPidFile  = "/var/run/docker.pid"
+	defaultGraph    = "/var/lib/docker"
+	defaultExecRoot = "/var/run/docker"
 )
 
 // Config defines the configuration of a docker daemon.
@@ -30,6 +31,7 @@ type Config struct {
 	RemappedRoot         string                   `json:"userns-remap,omitempty"`
 	CgroupParent         string                   `json:"cgroup-parent,omitempty"`
 	Ulimits              map[string]*units.Ulimit `json:"default-ulimits,omitempty"`
+	ContainerdAddr       string                   `json:"containerd,omitempty"`
 }
 
 // bridgeConfig stores all the bridge driver specific
@@ -80,6 +82,7 @@ func (config *Config) InstallFlags(cmd *flag.FlagSet, usageFn func(string) strin
 	cmd.StringVar(&config.CorsHeaders, []string{"-api-cors-header"}, "", usageFn("Set CORS headers in the remote API"))
 	cmd.StringVar(&config.CgroupParent, []string{"-cgroup-parent"}, "", usageFn("Set parent cgroup for all containers"))
 	cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces"))
+	cmd.StringVar(&config.ContainerdAddr, []string{"-containerd"}, "", usageFn("Path to containerD socket"))
 
 	config.attachExperimentalFlags(cmd, usageFn)
 }

+ 3 - 2
daemon/config_windows.go

@@ -7,8 +7,9 @@ import (
 )
 
 var (
-	defaultPidFile = os.Getenv("programdata") + string(os.PathSeparator) + "docker.pid"
-	defaultGraph   = os.Getenv("programdata") + string(os.PathSeparator) + "docker"
+	defaultPidFile  = os.Getenv("programdata") + string(os.PathSeparator) + "docker.pid"
+	defaultGraph    = os.Getenv("programdata") + string(os.PathSeparator) + "docker"
+	defaultExecRoot = defaultGraph
 )
 
 // bridgeConfig stores all the bridge driver specific

+ 3 - 4
daemon/container_operations.go

@@ -48,11 +48,10 @@ func (daemon *Daemon) buildSandboxOptions(container *container.Container, n libn
 		sboxOptions = append(sboxOptions, libnetwork.OptionUseDefaultSandbox())
 		sboxOptions = append(sboxOptions, libnetwork.OptionOriginHostsPath("/etc/hosts"))
 		sboxOptions = append(sboxOptions, libnetwork.OptionOriginResolvConfPath("/etc/resolv.conf"))
-	} else if daemon.execDriver.SupportsHooks() {
-		// OptionUseExternalKey is mandatory for userns support.
-		// But optional for non-userns support
-		sboxOptions = append(sboxOptions, libnetwork.OptionUseExternalKey())
 	}
+	// OptionUseExternalKey is mandatory for userns support.
+	// But optional for non-userns support
+	sboxOptions = append(sboxOptions, libnetwork.OptionUseExternalKey())
 
 	container.HostsPath, err = container.GetRootResourcePath("hosts")
 	if err != nil {

+ 56 - 235
daemon/container_operations_unix.go

@@ -13,7 +13,6 @@ import (
 
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/docker/container"
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/daemon/links"
 	"github.com/docker/docker/pkg/fileutils"
 	"github.com/docker/docker/pkg/idtools"
@@ -22,13 +21,16 @@ import (
 	"github.com/docker/docker/runconfig"
 	containertypes "github.com/docker/engine-api/types/container"
 	networktypes "github.com/docker/engine-api/types/network"
-	"github.com/docker/go-units"
 	"github.com/docker/libnetwork"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/devices"
 	"github.com/opencontainers/runc/libcontainer/label"
+	"github.com/opencontainers/specs/specs-go"
 )
 
+func u32Ptr(i int64) *uint32     { u := uint32(i); return &u }
+func fmPtr(i int64) *os.FileMode { fm := os.FileMode(i); return &fm }
+
 func (daemon *Daemon) setupLinkedContainers(container *container.Container) ([]string, error) {
 	var env []string
 	children := daemon.children(container)
@@ -64,220 +66,6 @@ func (daemon *Daemon) setupLinkedContainers(container *container.Container) ([]s
 	return env, nil
 }
 
-func (daemon *Daemon) populateCommand(c *container.Container, env []string) error {
-	var en *execdriver.Network
-	if !c.Config.NetworkDisabled {
-		en = &execdriver.Network{}
-		if !daemon.execDriver.SupportsHooks() || c.HostConfig.NetworkMode.IsHost() {
-			en.NamespacePath = c.NetworkSettings.SandboxKey
-		}
-
-		if c.HostConfig.NetworkMode.IsContainer() {
-			nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer())
-			if err != nil {
-				return err
-			}
-			en.ContainerID = nc.ID
-		}
-	}
-
-	ipc := &execdriver.Ipc{}
-	var err error
-	c.ShmPath, err = c.ShmResourcePath()
-	if err != nil {
-		return err
-	}
-
-	if c.HostConfig.IpcMode.IsContainer() {
-		ic, err := daemon.getIpcContainer(c)
-		if err != nil {
-			return err
-		}
-		ipc.ContainerID = ic.ID
-		c.ShmPath = ic.ShmPath
-	} else {
-		ipc.HostIpc = c.HostConfig.IpcMode.IsHost()
-		if ipc.HostIpc {
-			if _, err := os.Stat("/dev/shm"); err != nil {
-				return fmt.Errorf("/dev/shm is not mounted, but must be for --ipc=host")
-			}
-			c.ShmPath = "/dev/shm"
-		}
-	}
-
-	pid := &execdriver.Pid{}
-	pid.HostPid = c.HostConfig.PidMode.IsHost()
-
-	uts := &execdriver.UTS{
-		HostUTS: c.HostConfig.UTSMode.IsHost(),
-	}
-
-	// Build lists of devices allowed and created within the container.
-	var userSpecifiedDevices []*configs.Device
-	for _, deviceMapping := range c.HostConfig.Devices {
-		devs, err := getDevicesFromPath(deviceMapping)
-		if err != nil {
-			return err
-		}
-
-		userSpecifiedDevices = append(userSpecifiedDevices, devs...)
-	}
-
-	allowedDevices := mergeDevices(configs.DefaultAllowedDevices, userSpecifiedDevices)
-
-	autoCreatedDevices := mergeDevices(configs.DefaultAutoCreatedDevices, userSpecifiedDevices)
-
-	var rlimits []*units.Rlimit
-	ulimits := c.HostConfig.Ulimits
-
-	// Merge ulimits with daemon defaults
-	ulIdx := make(map[string]*units.Ulimit)
-	for _, ul := range ulimits {
-		ulIdx[ul.Name] = ul
-	}
-	for name, ul := range daemon.configStore.Ulimits {
-		if _, exists := ulIdx[name]; !exists {
-			ulimits = append(ulimits, ul)
-		}
-	}
-
-	weightDevices, err := getBlkioWeightDevices(c.HostConfig)
-	if err != nil {
-		return err
-	}
-
-	readBpsDevice, err := getBlkioReadBpsDevices(c.HostConfig)
-	if err != nil {
-		return err
-	}
-
-	writeBpsDevice, err := getBlkioWriteBpsDevices(c.HostConfig)
-	if err != nil {
-		return err
-	}
-
-	readIOpsDevice, err := getBlkioReadIOpsDevices(c.HostConfig)
-	if err != nil {
-		return err
-	}
-
-	writeIOpsDevice, err := getBlkioWriteIOpsDevices(c.HostConfig)
-	if err != nil {
-		return err
-	}
-
-	for _, limit := range ulimits {
-		rl, err := limit.GetRlimit()
-		if err != nil {
-			return err
-		}
-		rlimits = append(rlimits, rl)
-	}
-
-	resources := &execdriver.Resources{
-		CommonResources: execdriver.CommonResources{
-			Memory:            c.HostConfig.Memory,
-			MemoryReservation: c.HostConfig.MemoryReservation,
-			CPUShares:         c.HostConfig.CPUShares,
-			BlkioWeight:       c.HostConfig.BlkioWeight,
-		},
-		MemorySwap:                   c.HostConfig.MemorySwap,
-		KernelMemory:                 c.HostConfig.KernelMemory,
-		CpusetCpus:                   c.HostConfig.CpusetCpus,
-		CpusetMems:                   c.HostConfig.CpusetMems,
-		CPUPeriod:                    c.HostConfig.CPUPeriod,
-		CPUQuota:                     c.HostConfig.CPUQuota,
-		Rlimits:                      rlimits,
-		BlkioWeightDevice:            weightDevices,
-		BlkioThrottleReadBpsDevice:   readBpsDevice,
-		BlkioThrottleWriteBpsDevice:  writeBpsDevice,
-		BlkioThrottleReadIOpsDevice:  readIOpsDevice,
-		BlkioThrottleWriteIOpsDevice: writeIOpsDevice,
-		PidsLimit:                    c.HostConfig.PidsLimit,
-		MemorySwappiness:             -1,
-	}
-
-	if c.HostConfig.OomKillDisable != nil {
-		resources.OomKillDisable = *c.HostConfig.OomKillDisable
-	}
-	if c.HostConfig.MemorySwappiness != nil {
-		resources.MemorySwappiness = *c.HostConfig.MemorySwappiness
-	}
-
-	processConfig := execdriver.ProcessConfig{
-		CommonProcessConfig: execdriver.CommonProcessConfig{
-			Entrypoint: c.Path,
-			Arguments:  c.Args,
-			Tty:        c.Config.Tty,
-		},
-		Privileged: c.HostConfig.Privileged,
-		User:       c.Config.User,
-	}
-
-	processConfig.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
-	processConfig.Env = env
-
-	remappedRoot := &execdriver.User{}
-	if c.HostConfig.UsernsMode.IsPrivate() {
-		rootUID, rootGID := daemon.GetRemappedUIDGID()
-		if rootUID != 0 {
-			remappedRoot.UID = rootUID
-			remappedRoot.GID = rootGID
-		}
-	}
-
-	uidMap, gidMap := daemon.GetUIDGIDMaps()
-
-	if !daemon.seccompEnabled {
-		if c.SeccompProfile != "" && c.SeccompProfile != "unconfined" {
-			return fmt.Errorf("Seccomp is not enabled in your kernel, cannot run a custom seccomp profile.")
-		}
-		logrus.Warn("Seccomp is not enabled in your kernel, running container without default profile.")
-		c.SeccompProfile = "unconfined"
-	}
-
-	defaultCgroupParent := "/docker"
-	if daemon.configStore.CgroupParent != "" {
-		defaultCgroupParent = daemon.configStore.CgroupParent
-	} else if daemon.usingSystemd() {
-		defaultCgroupParent = "system.slice"
-	}
-	c.Command = &execdriver.Command{
-		CommonCommand: execdriver.CommonCommand{
-			ID:            c.ID,
-			MountLabel:    c.GetMountLabel(),
-			Network:       en,
-			ProcessConfig: processConfig,
-			ProcessLabel:  c.GetProcessLabel(),
-			Rootfs:        c.BaseFS,
-			Resources:     resources,
-			WorkingDir:    c.Config.WorkingDir,
-		},
-		AllowedDevices:     allowedDevices,
-		AppArmorProfile:    c.AppArmorProfile,
-		AutoCreatedDevices: autoCreatedDevices,
-		CapAdd:             c.HostConfig.CapAdd,
-		CapDrop:            c.HostConfig.CapDrop,
-		CgroupParent:       defaultCgroupParent,
-		GIDMapping:         gidMap,
-		GroupAdd:           c.HostConfig.GroupAdd,
-		Ipc:                ipc,
-		OomScoreAdj:        c.HostConfig.OomScoreAdj,
-		Pid:                pid,
-		ReadonlyRootfs:     c.HostConfig.ReadonlyRootfs,
-		RemappedRoot:       remappedRoot,
-		SeccompProfile:     c.SeccompProfile,
-		UIDMapping:         uidMap,
-		UTS:                uts,
-		NoNewPrivileges:    c.NoNewPrivileges,
-	}
-	if c.HostConfig.CgroupParent != "" {
-		c.Command.CgroupParent = c.HostConfig.CgroupParent
-	}
-
-	return nil
-}
-
 // getSize returns the real size & virtual size of the container.
 func (daemon *Daemon) getSize(container *container.Container) (int64, int64) {
 	var (
@@ -395,28 +183,49 @@ func (daemon *Daemon) getIpcContainer(container *container.Container) (*containe
 }
 
 func (daemon *Daemon) setupIpcDirs(c *container.Container) error {
-	rootUID, rootGID := daemon.GetRemappedUIDGID()
-	if !c.HasMountFor("/dev/shm") {
-		shmPath, err := c.ShmResourcePath()
+	var err error
+
+	c.ShmPath, err = c.ShmResourcePath()
+	if err != nil {
+		return err
+	}
+
+	if c.HostConfig.IpcMode.IsContainer() {
+		ic, err := daemon.getIpcContainer(c)
 		if err != nil {
 			return err
 		}
-
-		if err := idtools.MkdirAllAs(shmPath, 0700, rootUID, rootGID); err != nil {
-			return err
+		c.ShmPath = ic.ShmPath
+	} else if c.HostConfig.IpcMode.IsHost() {
+		if _, err := os.Stat("/dev/shm"); err != nil {
+			return fmt.Errorf("/dev/shm is not mounted, but must be for --ipc=host")
 		}
+		c.ShmPath = "/dev/shm"
+	} else {
+		rootUID, rootGID := daemon.GetRemappedUIDGID()
+		if !c.HasMountFor("/dev/shm") {
+			shmPath, err := c.ShmResourcePath()
+			if err != nil {
+				return err
+			}
 
-		shmSize := container.DefaultSHMSize
-		if c.HostConfig.ShmSize != 0 {
-			shmSize = c.HostConfig.ShmSize
-		}
-		shmproperty := "mode=1777,size=" + strconv.FormatInt(shmSize, 10)
-		if err := syscall.Mount("shm", shmPath, "tmpfs", uintptr(syscall.MS_NOEXEC|syscall.MS_NOSUID|syscall.MS_NODEV), label.FormatMountLabel(shmproperty, c.GetMountLabel())); err != nil {
-			return fmt.Errorf("mounting shm tmpfs: %s", err)
-		}
-		if err := os.Chown(shmPath, rootUID, rootGID); err != nil {
-			return err
+			if err := idtools.MkdirAllAs(shmPath, 0700, rootUID, rootGID); err != nil {
+				return err
+			}
+
+			shmSize := container.DefaultSHMSize
+			if c.HostConfig.ShmSize != 0 {
+				shmSize = c.HostConfig.ShmSize
+			}
+			shmproperty := "mode=1777,size=" + strconv.FormatInt(shmSize, 10)
+			if err := syscall.Mount("shm", shmPath, "tmpfs", uintptr(syscall.MS_NOEXEC|syscall.MS_NOSUID|syscall.MS_NODEV), label.FormatMountLabel(shmproperty, c.GetMountLabel())); err != nil {
+				return fmt.Errorf("mounting shm tmpfs: %s", err)
+			}
+			if err := os.Chown(shmPath, rootUID, rootGID); err != nil {
+				return err
+			}
 		}
+
 	}
 
 	return nil
@@ -474,7 +283,19 @@ func killProcessDirectly(container *container.Container) error {
 	return nil
 }
 
-func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []*configs.Device, err error) {
+func specDevice(d *configs.Device) specs.Device {
+	return specs.Device{
+		Type:     string(d.Type),
+		Path:     d.Path,
+		Major:    d.Major,
+		Minor:    d.Minor,
+		FileMode: fmPtr(int64(d.FileMode)),
+		UID:      u32Ptr(int64(d.Uid)),
+		GID:      u32Ptr(int64(d.Gid)),
+	}
+}
+
+func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []specs.Device, err error) {
 	resolvedPathOnHost := deviceMapping.PathOnHost
 
 	// check if it is a symbolic link
@@ -488,7 +309,7 @@ func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []*con
 	// if there was no error, return the device
 	if err == nil {
 		device.Path = deviceMapping.PathInContainer
-		return append(devs, device), nil
+		return append(devs, specDevice(device)), nil
 	}
 
 	// if the device is not a device node
@@ -508,7 +329,7 @@ func getDevicesFromPath(deviceMapping containertypes.DeviceMapping) (devs []*con
 
 				// add the device to userSpecified devices
 				childDevice.Path = strings.Replace(dpath, resolvedPathOnHost, deviceMapping.PathInContainer, 1)
-				devs = append(devs, childDevice)
+				devs = append(devs, specDevice(childDevice))
 
 				return nil
 			})

+ 1 - 135
daemon/container_operations_windows.go

@@ -4,14 +4,9 @@ package daemon
 
 import (
 	"fmt"
-	"strings"
-
-	networktypes "github.com/docker/engine-api/types/network"
 
 	"github.com/docker/docker/container"
-	"github.com/docker/docker/daemon/execdriver"
-	"github.com/docker/docker/daemon/execdriver/windows"
-	"github.com/docker/docker/layer"
+	networktypes "github.com/docker/engine-api/types/network"
 	"github.com/docker/libnetwork"
 )
 
@@ -29,135 +24,6 @@ func (daemon *Daemon) DisconnectFromNetwork(container *container.Container, n li
 	return fmt.Errorf("Windows does not support disconnecting a running container from a network")
 }
 
-func (daemon *Daemon) populateCommand(c *container.Container, env []string) error {
-	en := &execdriver.Network{
-		Interface: nil,
-	}
-
-	var epList []string
-
-	// Connect all the libnetwork allocated networks to the container
-	if c.NetworkSettings != nil {
-		for n := range c.NetworkSettings.Networks {
-			sn, err := daemon.FindNetwork(n)
-			if err != nil {
-				continue
-			}
-
-			ep, err := c.GetEndpointInNetwork(sn)
-			if err != nil {
-				continue
-			}
-
-			data, err := ep.DriverInfo()
-			if err != nil {
-				continue
-			}
-			if data["hnsid"] != nil {
-				epList = append(epList, data["hnsid"].(string))
-			}
-		}
-	}
-
-	if daemon.netController == nil {
-		parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2)
-		switch parts[0] {
-		case "none":
-		case "default", "": // empty string to support existing containers
-			if !c.Config.NetworkDisabled {
-				en.Interface = &execdriver.NetworkInterface{
-					MacAddress:   c.Config.MacAddress,
-					Bridge:       daemon.configStore.bridgeConfig.Iface,
-					PortBindings: c.HostConfig.PortBindings,
-
-					// TODO Windows. Include IPAddress. There already is a
-					// property IPAddress on execDrive.CommonNetworkInterface,
-					// but there is no CLI option in docker to pass through
-					// an IPAddress on docker run.
-				}
-			}
-		default:
-			return fmt.Errorf("invalid network mode: %s", c.HostConfig.NetworkMode)
-		}
-	}
-
-	// TODO Windows. More resource controls to be implemented later.
-	resources := &execdriver.Resources{
-		CommonResources: execdriver.CommonResources{
-			CPUShares: c.HostConfig.CPUShares,
-		},
-	}
-
-	processConfig := execdriver.ProcessConfig{
-		CommonProcessConfig: execdriver.CommonProcessConfig{
-			Entrypoint: c.Path,
-			Arguments:  c.Args,
-			Tty:        c.Config.Tty,
-		},
-		ConsoleSize: c.HostConfig.ConsoleSize,
-	}
-
-	processConfig.Env = env
-
-	var layerPaths []string
-	img, err := daemon.imageStore.Get(c.ImageID)
-	if err != nil {
-		return fmt.Errorf("Failed to graph.Get on ImageID %s - %s", c.ImageID, err)
-	}
-
-	if img.RootFS != nil && img.RootFS.Type == "layers+base" {
-		max := len(img.RootFS.DiffIDs)
-		for i := 0; i <= max; i++ {
-			img.RootFS.DiffIDs = img.RootFS.DiffIDs[:i]
-			path, err := layer.GetLayerPath(daemon.layerStore, img.RootFS.ChainID())
-			if err != nil {
-				return fmt.Errorf("Failed to get layer path from graphdriver %s for ImageID %s - %s", daemon.layerStore, img.RootFS.ChainID(), err)
-			}
-			// Reverse order, expecting parent most first
-			layerPaths = append([]string{path}, layerPaths...)
-		}
-	}
-
-	m, err := c.RWLayer.Metadata()
-	if err != nil {
-		return fmt.Errorf("Failed to get layer metadata - %s", err)
-	}
-	layerFolder := m["dir"]
-
-	var hvPartition bool
-	// Work out the isolation (whether it is a hypervisor partition)
-	if c.HostConfig.Isolation.IsDefault() {
-		// Not specified by caller. Take daemon default
-		hvPartition = windows.DefaultIsolation.IsHyperV()
-	} else {
-		// Take value specified by caller
-		hvPartition = c.HostConfig.Isolation.IsHyperV()
-	}
-
-	c.Command = &execdriver.Command{
-		CommonCommand: execdriver.CommonCommand{
-			ID:            c.ID,
-			Rootfs:        c.BaseFS,
-			WorkingDir:    c.Config.WorkingDir,
-			Network:       en,
-			MountLabel:    c.GetMountLabel(),
-			Resources:     resources,
-			ProcessConfig: processConfig,
-			ProcessLabel:  c.GetProcessLabel(),
-		},
-		FirstStart:  !c.HasBeenStartedBefore,
-		LayerFolder: layerFolder,
-		LayerPaths:  layerPaths,
-		Hostname:    c.Config.Hostname,
-		Isolation:   string(c.HostConfig.Isolation),
-		ArgsEscaped: c.Config.ArgsEscaped,
-		HvPartition: hvPartition,
-		EpList:      epList,
-	}
-
-	return nil
-}
-
 // getSize returns real size & virtual size
 func (daemon *Daemon) getSize(container *container.Container) (int64, int64) {
 	// TODO Windows

+ 89 - 75
daemon/daemon.go

@@ -20,13 +20,12 @@ import (
 	"time"
 
 	"github.com/Sirupsen/logrus"
+	containerd "github.com/docker/containerd/api/grpc/types"
 	"github.com/docker/docker/api"
 	"github.com/docker/docker/builder"
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/daemon/events"
 	"github.com/docker/docker/daemon/exec"
-	"github.com/docker/docker/daemon/execdriver"
-	"github.com/docker/docker/daemon/execdriver/execdrivers"
 	"github.com/docker/docker/errors"
 	"github.com/docker/engine-api/types"
 	containertypes "github.com/docker/engine-api/types/container"
@@ -46,12 +45,12 @@ import (
 	"github.com/docker/docker/image"
 	"github.com/docker/docker/image/tarexport"
 	"github.com/docker/docker/layer"
+	"github.com/docker/docker/libcontainerd"
 	"github.com/docker/docker/migrate/v1"
 	"github.com/docker/docker/pkg/archive"
 	"github.com/docker/docker/pkg/fileutils"
 	"github.com/docker/docker/pkg/graphdb"
 	"github.com/docker/docker/pkg/idtools"
-	"github.com/docker/docker/pkg/mount"
 	"github.com/docker/docker/pkg/namesgenerator"
 	"github.com/docker/docker/pkg/progress"
 	"github.com/docker/docker/pkg/registrar"
@@ -115,7 +114,6 @@ type Daemon struct {
 	trustKey                  libtrust.PrivateKey
 	idIndex                   *truncindex.TruncIndex
 	configStore               *Config
-	execDriver                execdriver.Driver
 	statsCollector            *statsCollector
 	defaultLogConfig          containertypes.LogConfig
 	RegistryService           *registry.Service
@@ -132,6 +130,8 @@ type Daemon struct {
 	imageStore                image.Store
 	nameIndex                 *registrar.Registrar
 	linkIndex                 *linkIndex
+	containerd                libcontainerd.Client
+	defaultIsolation          containertypes.Isolation // Default isolation mode on Windows
 }
 
 // GetContainer looks for a container using the provided information, which could be
@@ -220,36 +220,16 @@ func (daemon *Daemon) registerName(container *container.Container) error {
 }
 
 // Register makes a container object usable by the daemon as <container.ID>
-func (daemon *Daemon) Register(container *container.Container) error {
+func (daemon *Daemon) Register(c *container.Container) error {
 	// Attach to stdout and stderr
-	if container.Config.OpenStdin {
-		container.NewInputPipes()
+	if c.Config.OpenStdin {
+		c.NewInputPipes()
 	} else {
-		container.NewNopInputPipe()
+		c.NewNopInputPipe()
 	}
 
-	daemon.containers.Add(container.ID, container)
-	daemon.idIndex.Add(container.ID)
-
-	if container.IsRunning() {
-		logrus.Debugf("killing old running container %s", container.ID)
-		// Set exit code to 128 + SIGKILL (9) to properly represent unsuccessful exit
-		container.SetStoppedLocking(&execdriver.ExitStatus{ExitCode: 137})
-		// use the current driver and ensure that the container is dead x.x
-		cmd := &execdriver.Command{
-			CommonCommand: execdriver.CommonCommand{
-				ID: container.ID,
-			},
-		}
-		daemon.execDriver.Terminate(cmd)
-
-		container.UnmountIpcMounts(mount.Unmount)
-
-		daemon.Unmount(container)
-		if err := container.ToDiskLocking(); err != nil {
-			logrus.Errorf("Error saving stopped state to disk: %v", err)
-		}
-	}
+	daemon.containers.Add(c.ID, c)
+	daemon.idIndex.Add(c.ID)
 
 	return nil
 }
@@ -307,17 +287,38 @@ func (daemon *Daemon) restore() error {
 			logrus.Errorf("Failed to register container %s: %s", c.ID, err)
 			continue
 		}
+	}
+	var wg sync.WaitGroup
+	var mapLock sync.Mutex
+	for _, c := range containers {
+		wg.Add(1)
+		go func(c *container.Container) {
+			defer wg.Done()
+			if c.IsRunning() || c.IsPaused() {
+				if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(c.RestartManager(true))); err != nil {
+					logrus.Errorf("Failed to restore with containerd: %q", err)
+					return
+				}
+			}
+			// fixme: only if not running
+			// get list of containers we need to restart
+			if daemon.configStore.AutoRestart && !c.IsRunning() && !c.IsPaused() && c.ShouldRestart() {
+				mapLock.Lock()
+				restartContainers[c] = make(chan struct{})
+				mapLock.Unlock()
+			} else if !c.IsRunning() && !c.IsPaused() {
+				if mountid, err := daemon.layerStore.GetMountID(c.ID); err == nil {
+					daemon.cleanupMountsByID(mountid)
+				}
+			}
 
-		// get list of containers we need to restart
-		if daemon.configStore.AutoRestart && c.ShouldRestart() {
-			restartContainers[c] = make(chan struct{})
-		}
-
-		// if c.hostConfig.Links is nil (not just empty), then it is using the old sqlite links and needs to be migrated
-		if c.HostConfig != nil && c.HostConfig.Links == nil {
-			migrateLegacyLinks = true
-		}
+			// if c.hostConfig.Links is nil (not just empty), then it is using the old sqlite links and needs to be migrated
+			if c.HostConfig != nil && c.HostConfig.Links == nil {
+				migrateLegacyLinks = true
+			}
+		}(c)
 	}
+	wg.Wait()
 
 	// migrate any legacy links from sqlite
 	linkdbFile := filepath.Join(daemon.root, "linkgraph.db")
@@ -599,7 +600,7 @@ func (daemon *Daemon) registerLink(parent, child *container.Container, alias str
 
 // NewDaemon sets up everything for the daemon to be able to service
 // requests from the webserver.
-func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemon, err error) {
+func NewDaemon(config *Config, registryService *registry.Service, containerdRemote libcontainerd.Remote) (daemon *Daemon, err error) {
 	setDefaultMtu(config)
 
 	// Ensure we have compatible and valid configuration options
@@ -659,7 +660,7 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 	}
 	os.Setenv("TMPDIR", realTmp)
 
-	d := &Daemon{}
+	d := &Daemon{configStore: config}
 	// Ensure the daemon is properly shutdown if there is a failure during
 	// initialization
 	defer func() {
@@ -670,6 +671,11 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 		}
 	}()
 
+	// Set the default isolation mode (only applicable on Windows)
+	if err := d.setDefaultIsolation(); err != nil {
+		return nil, fmt.Errorf("error setting default isolation mode: %v", err)
+	}
+
 	// Verify logging driver type
 	if config.LogConfig.Type != "none" {
 		if _, err := logger.GetLogDriver(config.LogConfig.Type); err != nil {
@@ -682,6 +688,7 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 		logrus.Warnf("Failed to configure golang's threads limit: %v", err)
 	}
 
+	installDefaultAppArmorProfile()
 	daemonRepo := filepath.Join(config.Root, "containers")
 	if err := idtools.MkdirAllAs(daemonRepo, 0700, rootUID, rootGID); err != nil && !os.IsExist(err) {
 		return nil, err
@@ -781,11 +788,6 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 		return nil, fmt.Errorf("Devices cgroup isn't mounted")
 	}
 
-	ed, err := execdrivers.NewDriver(config.ExecOptions, config.ExecRoot, config.Root, sysInfo)
-	if err != nil {
-		return nil, err
-	}
-
 	d.ID = trustKey.PublicKey().KeyID()
 	d.repository = daemonRepo
 	d.containers = container.NewMemoryStore()
@@ -794,8 +796,6 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 	d.distributionMetadataStore = distributionMetadataStore
 	d.trustKey = trustKey
 	d.idIndex = truncindex.NewTruncIndex([]string{})
-	d.configStore = config
-	d.execDriver = ed
 	d.statsCollector = d.newStatsCollector(1 * time.Second)
 	d.defaultLogConfig = containertypes.LogConfig{
 		Type:   config.LogConfig.Type,
@@ -812,10 +812,12 @@ func NewDaemon(config *Config, registryService *registry.Service) (daemon *Daemo
 	d.nameIndex = registrar.NewRegistrar()
 	d.linkIndex = newLinkIndex()
 
-	if err := d.cleanupMounts(); err != nil {
+	go d.execCommandGC()
+
+	d.containerd, err = containerdRemote.Client(d)
+	if err != nil {
 		return nil, err
 	}
-	go d.execCommandGC()
 
 	if err := d.restore(); err != nil {
 		return nil, err
@@ -877,6 +879,9 @@ func (daemon *Daemon) Shutdown() error {
 				logrus.Errorf("Stop container error: %v", err)
 				return
 			}
+			if mountid, err := daemon.layerStore.GetMountID(c.ID); err == nil {
+				daemon.cleanupMountsByID(mountid)
+			}
 			logrus.Debugf("container stopped %s", c.ID)
 		})
 	}
@@ -923,29 +928,16 @@ func (daemon *Daemon) Mount(container *container.Container) error {
 }
 
 // Unmount unsets the container base filesystem
-func (daemon *Daemon) Unmount(container *container.Container) {
+func (daemon *Daemon) Unmount(container *container.Container) error {
 	if err := container.RWLayer.Unmount(); err != nil {
 		logrus.Errorf("Error unmounting container %s: %s", container.ID, err)
+		return err
 	}
-}
-
-// Run uses the execution driver to run a given container
-func (daemon *Daemon) Run(c *container.Container, pipes *execdriver.Pipes, startCallback execdriver.DriverCallback) (execdriver.ExitStatus, error) {
-	hooks := execdriver.Hooks{
-		Start: startCallback,
-	}
-	hooks.PreStart = append(hooks.PreStart, func(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error {
-		return daemon.setNetworkNamespaceKey(c.ID, pid)
-	})
-	return daemon.execDriver.Run(c.Command, pipes, hooks)
+	return nil
 }
 
 func (daemon *Daemon) kill(c *container.Container, sig int) error {
-	return daemon.execDriver.Kill(c.Command, sig)
-}
-
-func (daemon *Daemon) stats(c *container.Container) (*execdriver.ResourceStats, error) {
-	return daemon.execDriver.Stats(c.ID)
+	return daemon.containerd.Signal(c.ID, sig)
 }
 
 func (daemon *Daemon) subscribeToContainerStats(c *container.Container) chan interface{} {
@@ -1322,12 +1314,6 @@ func (daemon *Daemon) GraphDriverName() string {
 	return daemon.layerStore.DriverName()
 }
 
-// ExecutionDriver returns the currently used driver for creating and
-// starting execs in a container.
-func (daemon *Daemon) ExecutionDriver() execdriver.Driver {
-	return daemon.execDriver
-}
-
 // GetUIDGIDMaps returns the current daemon's user namespace settings
 // for the full uid and gid maps which will be applied to containers
 // started in this instance.
@@ -1536,7 +1522,7 @@ func (daemon *Daemon) IsShuttingDown() bool {
 }
 
 // GetContainerStats collects all the stats published by a container
-func (daemon *Daemon) GetContainerStats(container *container.Container) (*execdriver.ResourceStats, error) {
+func (daemon *Daemon) GetContainerStats(container *container.Container) (*types.StatsJSON, error) {
 	stats, err := daemon.stats(container)
 	if err != nil {
 		return nil, err
@@ -1547,7 +1533,22 @@ func (daemon *Daemon) GetContainerStats(container *container.Container) (*execdr
 	if nwStats, err = daemon.getNetworkStats(container); err != nil {
 		return nil, err
 	}
-	stats.Interfaces = nwStats
+
+	stats.Networks = make(map[string]types.NetworkStats)
+	for _, iface := range nwStats {
+		// For API Version >= 1.21, the original data of network will
+		// be returned.
+		stats.Networks[iface.Name] = types.NetworkStats{
+			RxBytes:   iface.RxBytes,
+			RxPackets: iface.RxPackets,
+			RxErrors:  iface.RxErrors,
+			RxDropped: iface.RxDropped,
+			TxBytes:   iface.TxBytes,
+			TxPackets: iface.TxPackets,
+			TxErrors:  iface.TxErrors,
+			TxDropped: iface.TxDropped,
+		}
+	}
 
 	return stats, nil
 }
@@ -1735,3 +1736,16 @@ func (daemon *Daemon) networkOptions(dconfig *Config) ([]nwconfig.Option, error)
 	options = append(options, driverOptions(dconfig)...)
 	return options, nil
 }
+
+func copyBlkioEntry(entries []*containerd.BlkioStatsEntry) []types.BlkioStatEntry {
+	out := make([]types.BlkioStatEntry, len(entries))
+	for i, re := range entries {
+		out[i] = types.BlkioStatEntry{
+			Major: re.Major,
+			Minor: re.Minor,
+			Op:    re.Op,
+			Value: re.Value,
+		}
+	}
+	return out
+}

+ 60 - 2
daemon/daemon_linux.go

@@ -12,6 +12,64 @@ import (
 	"github.com/docker/docker/pkg/mount"
 )
 
+func (daemon *Daemon) cleanupMountsByID(id string) error {
+	logrus.Debugf("Cleaning up old mountid %s: start.", id)
+	f, err := os.Open("/proc/self/mountinfo")
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	return daemon.cleanupMountsFromReaderByID(f, id, mount.Unmount)
+}
+
+func (daemon *Daemon) cleanupMountsFromReaderByID(reader io.Reader, id string, unmount func(target string) error) error {
+	if daemon.root == "" {
+		return nil
+	}
+	var errors []string
+	mountRoot := ""
+	shmSuffix := "/" + id + "/shm"
+	mergedSuffix := "/" + id + "/merged"
+	sc := bufio.NewScanner(reader)
+	for sc.Scan() {
+		line := sc.Text()
+		fields := strings.Fields(line)
+		if strings.HasPrefix(fields[4], daemon.root) {
+			logrus.Debugf("Mount base: %v", fields[4])
+			mnt := fields[4]
+			if strings.HasSuffix(mnt, shmSuffix) || strings.HasSuffix(mnt, mergedSuffix) {
+				logrus.Debugf("Unmounting %v", mnt)
+				if err := unmount(mnt); err != nil {
+					logrus.Error(err)
+					errors = append(errors, err.Error())
+				}
+			} else if mountBase := filepath.Base(mnt); mountBase == id {
+				mountRoot = mnt
+			}
+		}
+	}
+
+	if mountRoot != "" {
+		logrus.Debugf("Unmounting %v", mountRoot)
+		if err := unmount(mountRoot); err != nil {
+			logrus.Error(err)
+			errors = append(errors, err.Error())
+		}
+	}
+
+	if err := sc.Err(); err != nil {
+		return err
+	}
+
+	if len(errors) > 0 {
+		return fmt.Errorf("Error cleaningup mounts:\n%v", strings.Join(errors, "\n"))
+	}
+
+	logrus.Debugf("Cleaning up old container shm/mqueue/rootfs mounts: done.")
+	return nil
+}
+
 // cleanupMounts umounts shm/mqueue mounts for old containers
 func (daemon *Daemon) cleanupMounts() error {
 	logrus.Debugf("Cleaning up old container shm/mqueue/rootfs mounts: start.")
@@ -25,7 +83,7 @@ func (daemon *Daemon) cleanupMounts() error {
 }
 
 func (daemon *Daemon) cleanupMountsFromReader(reader io.Reader, unmount func(target string) error) error {
-	if daemon.repository == "" {
+	if daemon.root == "" {
 		return nil
 	}
 	sc := bufio.NewScanner(reader)
@@ -37,7 +95,7 @@ func (daemon *Daemon) cleanupMountsFromReader(reader io.Reader, unmount func(tar
 			logrus.Debugf("Mount base: %v", fields[4])
 			mnt := fields[4]
 			mountBase := filepath.Base(mnt)
-			if mountBase == "mqueue" || mountBase == "shm" || mountBase == "merged" {
+			if mountBase == "shm" || mountBase == "merged" {
 				logrus.Debugf("Unmounting %v", mnt)
 				if err := unmount(mnt); err != nil {
 					logrus.Error(err)

+ 65 - 35
daemon/daemon_linux_test.go

@@ -7,53 +7,83 @@ import (
 	"testing"
 )
 
+const mountsFixture = `142 78 0:38 / / rw,relatime - aufs none rw,si=573b861da0b3a05b,dio
+143 142 0:60 / /proc rw,nosuid,nodev,noexec,relatime - proc proc rw
+144 142 0:67 / /dev rw,nosuid - tmpfs tmpfs rw,mode=755
+145 144 0:78 / /dev/pts rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=666
+146 144 0:49 / /dev/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw
+147 142 0:84 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
+148 147 0:86 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755
+149 148 0:22 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuset
+150 148 0:25 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/cpu rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpu
+151 148 0:27 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/cpuacct rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuacct
+152 148 0:28 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
+153 148 0:29 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,devices
+154 148 0:30 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,freezer
+155 148 0:31 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,blkio
+156 148 0:32 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,perf_event
+157 148 0:33 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,hugetlb
+158 148 0:35 /docker/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup systemd rw,name=systemd
+159 142 8:4 /home/mlaventure/gopath /home/mlaventure/gopath rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
+160 142 8:4 /var/lib/docker/volumes/9a428b651ee4c538130143cad8d87f603a4bf31b928afe7ff3ecd65480692b35/_data /var/lib/docker rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
+164 142 8:4 /home/mlaventure/gopath/src/github.com/docker/docker /go/src/github.com/docker/docker rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
+165 142 8:4 /var/lib/docker/containers/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a/resolv.conf /etc/resolv.conf rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
+166 142 8:4 /var/lib/docker/containers/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a/hostname /etc/hostname rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
+167 142 8:4 /var/lib/docker/containers/5425782a95e643181d8a485a2bab3c0bb21f51d7dfc03511f0e6fbf3f3aa356a/hosts /etc/hosts rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
+168 144 0:39 / /dev/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=65536k
+169 144 0:12 /14 /dev/console rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=000
+83 147 0:10 / /sys/kernel/security rw,relatime - securityfs none rw
+89 142 0:87 / /tmp rw,relatime - tmpfs none rw
+97 142 0:60 / /run/docker/netns/default rw,nosuid,nodev,noexec,relatime - proc proc rw
+100 160 8:4 /var/lib/docker/volumes/9a428b651ee4c538130143cad8d87f603a4bf31b928afe7ff3ecd65480692b35/_data/aufs /var/lib/docker/aufs rw,relatime - ext4 /dev/disk/by-uuid/d99e196c-1fc4-4b4f-bab9-9962b2b34e99 rw,errors=remount-ro,data=ordered
+115 100 0:102 / /var/lib/docker/aufs/mnt/0ecda1c63e5b58b3d89ff380bf646c95cc980252cf0b52466d43619aec7c8432 rw,relatime - aufs none rw,si=573b861dbc01905b,dio
+116 160 0:107 / /var/lib/docker/containers/d045dc441d2e2e1d5b3e328d47e5943811a40819fb47497c5f5a5df2d6d13c37/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=65536k
+118 142 0:102 / /run/docker/libcontainerd/d045dc441d2e2e1d5b3e328d47e5943811a40819fb47497c5f5a5df2d6d13c37/rootfs rw,relatime - aufs none rw,si=573b861dbc01905b,dio
+242 142 0:60 / /run/docker/netns/c3664df2a0f7 rw,nosuid,nodev,noexec,relatime - proc proc rw
+120 100 0:122 / /var/lib/docker/aufs/mnt/03ca4b49e71f1e49a41108829f4d5c70ac95934526e2af8984a1f65f1de0715d rw,relatime - aufs none rw,si=573b861eb147805b,dio
+171 142 0:122 / /run/docker/libcontainerd/e406ff6f3e18516d50e03dbca4de54767a69a403a6f7ec1edc2762812824521e/rootfs rw,relatime - aufs none rw,si=573b861eb147805b,dio
+310 142 0:60 / /run/docker/netns/71a18572176b rw,nosuid,nodev,noexec,relatime - proc proc rw
+`
+
 func TestCleanupMounts(t *testing.T) {
-	fixture := `230 138 0:60 / / rw,relatime - overlay overlay rw,lowerdir=/var/lib/docker/overlay/0ef9f93d5d365c1385b09d54bbee6afff3d92002c16f22eccb6e1549b2ff97d8/root,upperdir=/var/lib/docker/overlay/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb/upper,workdir=/var/lib/docker/overlay/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb/work
-231 230 0:56 / /proc rw,nosuid,nodev,noexec,relatime - proc proc rw
-232 230 0:57 / /dev rw,nosuid - tmpfs tmpfs rw,mode=755
-233 232 0:58 / /dev/pts rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=666
-234 232 0:59 / /dev/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=65536k
-235 232 0:55 / /dev/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw
-236 230 0:61 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
-237 236 0:62 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw
-238 237 0:21 /system.slice/docker.service /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd
-239 237 0:23 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,perf_event
-240 237 0:24 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuset,clone_children
-241 237 0:25 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,devices
-242 237 0:26 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,freezer
-243 237 0:27 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpu,cpuacct
-244 237 0:28 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,blkio
-245 237 0:29 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,net_cls,net_prio
-246 237 0:30 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,hugetlb
-247 237 0:31 /docker/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
-248 230 253:1 /var/lib/docker/volumes/510cc41ac68c48bd4eac932e3e09711673876287abf1b185312cfbfe6261a111/_data /var/lib/docker rw,relatime - ext4 /dev/disk/by-uuid/ba70ea0c-1a8f-4ee4-9687-cb393730e2b5 rw,errors=remount-ro,data=ordered
-250 230 253:1 /var/lib/docker/containers/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb/hostname /etc/hostname rw,relatime - ext4 /dev/disk/by-uuid/ba70ea0c-1a8f-4ee4-9687-cb393730e2b5 rw,errors=remount-ro,data=ordered
-251 230 253:1 /var/lib/docker/containers/dfac036ce135a8914e292cb2f6fea114f7339983c186366aa26d0051e93162cb/hosts /etc/hosts rw,relatime - ext4 /dev/disk/by-uuid/ba70ea0c-1a8f-4ee4-9687-cb393730e2b5 rw,errors=remount-ro,data=ordered
-252 232 0:13 /1 /dev/console rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=000
-139 236 0:11 / /sys/kernel/security rw,relatime - securityfs none rw
-140 230 0:54 / /tmp rw,relatime - tmpfs none rw
-145 230 0:3 / /run/docker/netns/default rw - nsfs nsfs rw
-130 140 0:45 / /tmp/docker_recursive_mount_test312125472/tmpfs rw,relatime - tmpfs tmpfs rw
-131 230 0:3 / /run/docker/netns/47903e2e6701 rw - nsfs nsfs rw
-133 230 0:55 / /go/src/github.com/docker/docker/bundles/1.9.0-dev/test-integration-cli/d45526097/graph/containers/47903e2e67014246eba27607809d5f5c2437c3bf84c2986393448f84093cc40b/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw`
+	d := &Daemon{
+		root: "/var/lib/docker/",
+	}
+
+	expected := "/var/lib/docker/containers/d045dc441d2e2e1d5b3e328d47e5943811a40819fb47497c5f5a5df2d6d13c37/shm"
+	var unmounted int
+	unmount := func(target string) error {
+		if target == expected {
+			unmounted++
+		}
+		return nil
+	}
 
+	d.cleanupMountsFromReader(strings.NewReader(mountsFixture), unmount)
+
+	if unmounted != 1 {
+		t.Fatalf("Expected to unmount the shm (and the shm only)")
+	}
+}
+
+func TestCleanupMountsByID(t *testing.T) {
 	d := &Daemon{
-		repository: "/go/src/github.com/docker/docker/bundles/1.9.0-dev/test-integration-cli/d45526097/graph/containers/",
+		root: "/var/lib/docker/",
 	}
 
-	expected := "/go/src/github.com/docker/docker/bundles/1.9.0-dev/test-integration-cli/d45526097/graph/containers/47903e2e67014246eba27607809d5f5c2437c3bf84c2986393448f84093cc40b/mqueue"
-	var unmounted bool
+	expected := "/var/lib/docker/aufs/mnt/03ca4b49e71f1e49a41108829f4d5c70ac95934526e2af8984a1f65f1de0715d"
+	var unmounted int
 	unmount := func(target string) error {
 		if target == expected {
-			unmounted = true
+			unmounted++
 		}
 		return nil
 	}
 
-	d.cleanupMountsFromReader(strings.NewReader(fixture), unmount)
+	d.cleanupMountsFromReaderByID(strings.NewReader(mountsFixture), "03ca4b49e71f1e49a41108829f4d5c70ac95934526e2af8984a1f65f1de0715d", unmount)
 
-	if !unmounted {
-		t.Fatalf("Expected to unmount the mqueue")
+	if unmounted != 1 {
+		t.Fatalf("Expected to unmount the auf root (and that only)")
 	}
 }
 

+ 163 - 26
daemon/daemon_unix.go

@@ -13,6 +13,7 @@ import (
 	"strconv"
 	"strings"
 	"syscall"
+	"time"
 
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/docker/container"
@@ -25,6 +26,7 @@ import (
 	"github.com/docker/docker/reference"
 	"github.com/docker/docker/runconfig"
 	runconfigopts "github.com/docker/docker/runconfig/opts"
+	"github.com/docker/engine-api/types"
 	pblkiodev "github.com/docker/engine-api/types/blkiodev"
 	containertypes "github.com/docker/engine-api/types/container"
 	"github.com/docker/libnetwork"
@@ -33,10 +35,10 @@ import (
 	"github.com/docker/libnetwork/ipamutils"
 	"github.com/docker/libnetwork/netlabel"
 	"github.com/docker/libnetwork/options"
-	"github.com/docker/libnetwork/types"
-	blkiodev "github.com/opencontainers/runc/libcontainer/configs"
+	lntypes "github.com/docker/libnetwork/types"
 	"github.com/opencontainers/runc/libcontainer/label"
 	"github.com/opencontainers/runc/libcontainer/user"
+	"github.com/opencontainers/specs/specs-go"
 )
 
 const (
@@ -51,16 +53,81 @@ const (
 	defaultRemappedID  string = "dockremap"
 )
 
-func getBlkioWeightDevices(config *containertypes.HostConfig) ([]*blkiodev.WeightDevice, error) {
+func getMemoryResources(config containertypes.Resources) *specs.Memory {
+	memory := specs.Memory{}
+
+	if config.Memory > 0 {
+		limit := uint64(config.Memory)
+		memory.Limit = &limit
+	}
+
+	if config.MemoryReservation > 0 {
+		reservation := uint64(config.MemoryReservation)
+		memory.Reservation = &reservation
+	}
+
+	if config.MemorySwap != 0 {
+		swap := uint64(config.MemorySwap)
+		memory.Swap = &swap
+	}
+
+	if config.MemorySwappiness != nil {
+		swappiness := uint64(*config.MemorySwappiness)
+		memory.Swappiness = &swappiness
+	}
+
+	if config.KernelMemory != 0 {
+		kernelMemory := uint64(config.KernelMemory)
+		memory.Kernel = &kernelMemory
+	}
+
+	return &memory
+}
+
+func getCPUResources(config containertypes.Resources) *specs.CPU {
+	cpu := specs.CPU{}
+
+	if config.CPUShares != 0 {
+		shares := uint64(config.CPUShares)
+		cpu.Shares = &shares
+	}
+
+	if config.CpusetCpus != "" {
+		cpuset := config.CpusetCpus
+		cpu.Cpus = &cpuset
+	}
+
+	if config.CpusetMems != "" {
+		cpuset := config.CpusetMems
+		cpu.Mems = &cpuset
+	}
+
+	if config.CPUPeriod != 0 {
+		period := uint64(config.CPUPeriod)
+		cpu.Period = &period
+	}
+
+	if config.CPUQuota != 0 {
+		quota := uint64(config.CPUQuota)
+		cpu.Quota = &quota
+	}
+
+	return &cpu
+}
+
+func getBlkioWeightDevices(config containertypes.Resources) ([]specs.WeightDevice, error) {
 	var stat syscall.Stat_t
-	var blkioWeightDevices []*blkiodev.WeightDevice
+	var blkioWeightDevices []specs.WeightDevice
 
 	for _, weightDevice := range config.BlkioWeightDevice {
 		if err := syscall.Stat(weightDevice.Path, &stat); err != nil {
 			return nil, err
 		}
-		weightDevice := blkiodev.NewWeightDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), weightDevice.Weight, 0)
-		blkioWeightDevices = append(blkioWeightDevices, weightDevice)
+		weight := weightDevice.Weight
+		d := specs.WeightDevice{Weight: &weight}
+		d.Major = int64(stat.Rdev / 256)
+		d.Major = int64(stat.Rdev % 256)
+		blkioWeightDevices = append(blkioWeightDevices, d)
 	}
 
 	return blkioWeightDevices, nil
@@ -105,61 +172,73 @@ func parseSecurityOpt(container *container.Container, config *containertypes.Hos
 	return err
 }
 
-func getBlkioReadIOpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) {
-	var blkioReadIOpsDevice []*blkiodev.ThrottleDevice
+func getBlkioReadIOpsDevices(config containertypes.Resources) ([]specs.ThrottleDevice, error) {
+	var blkioReadIOpsDevice []specs.ThrottleDevice
 	var stat syscall.Stat_t
 
 	for _, iopsDevice := range config.BlkioDeviceReadIOps {
 		if err := syscall.Stat(iopsDevice.Path, &stat); err != nil {
 			return nil, err
 		}
-		readIOpsDevice := blkiodev.NewThrottleDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), iopsDevice.Rate)
-		blkioReadIOpsDevice = append(blkioReadIOpsDevice, readIOpsDevice)
+		rate := iopsDevice.Rate
+		d := specs.ThrottleDevice{Rate: &rate}
+		d.Major = int64(stat.Rdev / 256)
+		d.Major = int64(stat.Rdev % 256)
+		blkioReadIOpsDevice = append(blkioReadIOpsDevice, d)
 	}
 
 	return blkioReadIOpsDevice, nil
 }
 
-func getBlkioWriteIOpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) {
-	var blkioWriteIOpsDevice []*blkiodev.ThrottleDevice
+func getBlkioWriteIOpsDevices(config containertypes.Resources) ([]specs.ThrottleDevice, error) {
+	var blkioWriteIOpsDevice []specs.ThrottleDevice
 	var stat syscall.Stat_t
 
 	for _, iopsDevice := range config.BlkioDeviceWriteIOps {
 		if err := syscall.Stat(iopsDevice.Path, &stat); err != nil {
 			return nil, err
 		}
-		writeIOpsDevice := blkiodev.NewThrottleDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), iopsDevice.Rate)
-		blkioWriteIOpsDevice = append(blkioWriteIOpsDevice, writeIOpsDevice)
+		rate := iopsDevice.Rate
+		d := specs.ThrottleDevice{Rate: &rate}
+		d.Major = int64(stat.Rdev / 256)
+		d.Major = int64(stat.Rdev % 256)
+		blkioWriteIOpsDevice = append(blkioWriteIOpsDevice, d)
 	}
 
 	return blkioWriteIOpsDevice, nil
 }
 
-func getBlkioReadBpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) {
-	var blkioReadBpsDevice []*blkiodev.ThrottleDevice
+func getBlkioReadBpsDevices(config containertypes.Resources) ([]specs.ThrottleDevice, error) {
+	var blkioReadBpsDevice []specs.ThrottleDevice
 	var stat syscall.Stat_t
 
 	for _, bpsDevice := range config.BlkioDeviceReadBps {
 		if err := syscall.Stat(bpsDevice.Path, &stat); err != nil {
 			return nil, err
 		}
-		readBpsDevice := blkiodev.NewThrottleDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), bpsDevice.Rate)
-		blkioReadBpsDevice = append(blkioReadBpsDevice, readBpsDevice)
+		rate := bpsDevice.Rate
+		d := specs.ThrottleDevice{Rate: &rate}
+		d.Major = int64(stat.Rdev / 256)
+		d.Major = int64(stat.Rdev % 256)
+		blkioReadBpsDevice = append(blkioReadBpsDevice, d)
 	}
 
 	return blkioReadBpsDevice, nil
 }
 
-func getBlkioWriteBpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) {
-	var blkioWriteBpsDevice []*blkiodev.ThrottleDevice
+func getBlkioWriteBpsDevices(config containertypes.Resources) ([]specs.ThrottleDevice, error) {
+	var blkioWriteBpsDevice []specs.ThrottleDevice
 	var stat syscall.Stat_t
 
 	for _, bpsDevice := range config.BlkioDeviceWriteBps {
 		if err := syscall.Stat(bpsDevice.Path, &stat); err != nil {
 			return nil, err
 		}
-		writeBpsDevice := blkiodev.NewThrottleDevice(int64(stat.Rdev/256), int64(stat.Rdev%256), bpsDevice.Rate)
-		blkioWriteBpsDevice = append(blkioWriteBpsDevice, writeBpsDevice)
+		rate := bpsDevice.Rate
+		d := specs.ThrottleDevice{Rate: &rate}
+		d.Major = int64(stat.Rdev / 256)
+		d.Major = int64(stat.Rdev % 256)
+		blkioWriteBpsDevice = append(blkioWriteBpsDevice, d)
 	}
 
 	return blkioWriteBpsDevice, nil
@@ -600,8 +679,8 @@ func initBridgeDriver(controller libnetwork.NetworkController, config *Config) e
 
 	nw, nw6List, err := ipamutils.ElectInterfaceAddresses(bridgeName)
 	if err == nil {
-		ipamV4Conf.PreferredPool = types.GetIPNetCanonical(nw).String()
-		hip, _ := types.GetHostPartIP(nw.IP, nw.Mask)
+		ipamV4Conf.PreferredPool = lntypes.GetIPNetCanonical(nw).String()
+		hip, _ := lntypes.GetHostPartIP(nw.IP, nw.Mask)
 		if hip.IsGlobalUnicast() {
 			ipamV4Conf.Gateway = nw.IP.String()
 		}
@@ -953,11 +1032,69 @@ func (daemon *Daemon) conditionalMountOnStart(container *container.Container) er
 
 // conditionalUnmountOnCleanup is a platform specific helper function called
 // during the cleanup of a container to unmount.
-func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) {
-	daemon.Unmount(container)
+func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error {
+	return daemon.Unmount(container)
 }
 
 func restoreCustomImage(is image.Store, ls layer.Store, rs reference.Store) error {
 	// Unix has no custom images to register
 	return nil
 }
+
+func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
+	if !c.IsRunning() {
+		return nil, errNotRunning{c.ID}
+	}
+	stats, err := daemon.containerd.Stats(c.ID)
+	if err != nil {
+		return nil, err
+	}
+	s := &types.StatsJSON{}
+	cgs := stats.CgroupStats
+	if cgs != nil {
+		s.BlkioStats = types.BlkioStats{
+			IoServiceBytesRecursive: copyBlkioEntry(cgs.BlkioStats.IoServiceBytesRecursive),
+			IoServicedRecursive:     copyBlkioEntry(cgs.BlkioStats.IoServicedRecursive),
+			IoQueuedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoQueuedRecursive),
+			IoServiceTimeRecursive:  copyBlkioEntry(cgs.BlkioStats.IoServiceTimeRecursive),
+			IoWaitTimeRecursive:     copyBlkioEntry(cgs.BlkioStats.IoWaitTimeRecursive),
+			IoMergedRecursive:       copyBlkioEntry(cgs.BlkioStats.IoMergedRecursive),
+			IoTimeRecursive:         copyBlkioEntry(cgs.BlkioStats.IoTimeRecursive),
+			SectorsRecursive:        copyBlkioEntry(cgs.BlkioStats.SectorsRecursive),
+		}
+		cpu := cgs.CpuStats
+		s.CPUStats = types.CPUStats{
+			CPUUsage: types.CPUUsage{
+				TotalUsage:        cpu.CpuUsage.TotalUsage,
+				PercpuUsage:       cpu.CpuUsage.PercpuUsage,
+				UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode,
+				UsageInUsermode:   cpu.CpuUsage.UsageInUsermode,
+			},
+			ThrottlingData: types.ThrottlingData{
+				Periods:          cpu.ThrottlingData.Periods,
+				ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods,
+				ThrottledTime:    cpu.ThrottlingData.ThrottledTime,
+			},
+		}
+		mem := cgs.MemoryStats.Usage
+		s.MemoryStats = types.MemoryStats{
+			Usage:    mem.Usage,
+			MaxUsage: mem.MaxUsage,
+			Stats:    cgs.MemoryStats.Stats,
+			Failcnt:  mem.Failcnt,
+		}
+		if cgs.PidsStats != nil {
+			s.PidsStats = types.PidsStats{
+				Current: cgs.PidsStats.Current,
+			}
+		}
+	}
+	s.Read = time.Unix(int64(stats.Timestamp), 0)
+	return s, nil
+}
+
+// setDefaultIsolation determine the default isolation mode for the
+// daemon to run in. This is only applicable on Windows
+func (daemon *Daemon) setDefaultIsolation() error {
+	return nil
+}

+ 59 - 9
daemon/daemon_windows.go

@@ -18,11 +18,13 @@ import (
 	"github.com/docker/docker/layer"
 	"github.com/docker/docker/reference"
 	"github.com/docker/docker/runconfig"
-	containertypes "github.com/docker/engine-api/types/container"
 	// register the windows graph driver
 	"github.com/docker/docker/daemon/graphdriver/windows"
 	"github.com/docker/docker/pkg/idtools"
+	"github.com/docker/docker/pkg/parsers"
 	"github.com/docker/docker/pkg/system"
+	"github.com/docker/engine-api/types"
+	containertypes "github.com/docker/engine-api/types/container"
 	"github.com/docker/libnetwork"
 	nwconfig "github.com/docker/libnetwork/config"
 	winlibnetwork "github.com/docker/libnetwork/drivers/windows"
@@ -39,7 +41,7 @@ const (
 	windowsMaxCPUShares  = 10000
 )
 
-func getBlkioWeightDevices(config *containertypes.HostConfig) ([]*blkiodev.WeightDevice, error) {
+func getBlkioWeightDevices(config *containertypes.HostConfig) ([]blkiodev.WeightDevice, error) {
 	return nil, nil
 }
 
@@ -47,19 +49,19 @@ func parseSecurityOpt(container *container.Container, config *containertypes.Hos
 	return nil
 }
 
-func getBlkioReadIOpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) {
+func getBlkioReadIOpsDevices(config *containertypes.HostConfig) ([]blkiodev.ThrottleDevice, error) {
 	return nil, nil
 }
 
-func getBlkioWriteIOpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) {
+func getBlkioWriteIOpsDevices(config *containertypes.HostConfig) ([]blkiodev.ThrottleDevice, error) {
 	return nil, nil
 }
 
-func getBlkioReadBpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) {
+func getBlkioReadBpsDevices(config *containertypes.HostConfig) ([]blkiodev.ThrottleDevice, error) {
 	return nil, nil
 }
 
-func getBlkioWriteBpsDevices(config *containertypes.HostConfig) ([]*blkiodev.ThrottleDevice, error) {
+func getBlkioWriteBpsDevices(config *containertypes.HostConfig) ([]blkiodev.ThrottleDevice, error) {
 	return nil, nil
 }
 
@@ -287,6 +289,10 @@ func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *
 	return nil
 }
 
+func (daemon *Daemon) cleanupMountsByID(in string) error {
+	return nil
+}
+
 func (daemon *Daemon) cleanupMounts() error {
 	return nil
 }
@@ -307,8 +313,19 @@ func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error
 // conditionalMountOnStart is a platform specific helper function during the
 // container start to call mount.
 func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error {
+
+	// Are we going to run as a Hyper-V container?
+	hv := false
+	if container.HostConfig.Isolation.IsDefault() {
+		// Container is set to use the default, so take the default from the daemon configuration
+		hv = daemon.defaultIsolation.IsHyperV()
+	} else {
+		// Container is requesting an isolation mode. Honour it.
+		hv = container.HostConfig.Isolation.IsHyperV()
+	}
+
 	// We do not mount if a Hyper-V container
-	if !container.HostConfig.Isolation.IsHyperV() {
+	if !hv {
 		if err := daemon.Mount(container); err != nil {
 			return err
 		}
@@ -318,11 +335,12 @@ func (daemon *Daemon) conditionalMountOnStart(container *container.Container) er
 
 // conditionalUnmountOnCleanup is a platform specific helper function called
 // during the cleanup of a container to unmount.
-func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) {
+func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container) error {
 	// We do not unmount if a Hyper-V container
 	if !container.HostConfig.Isolation.IsHyperV() {
-		daemon.Unmount(container)
+		return daemon.Unmount(container)
 	}
+	return nil
 }
 
 func restoreCustomImage(is image.Store, ls layer.Store, rs reference.Store) error {
@@ -404,3 +422,35 @@ func restoreCustomImage(is image.Store, ls layer.Store, rs reference.Store) erro
 func driverOptions(config *Config) []nwconfig.Option {
 	return []nwconfig.Option{}
 }
+
+func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
+	return nil, nil
+}
+
+// setDefaultIsolation determine the default isolation mode for the
+// daemon to run in. This is only applicable on Windows
+func (daemon *Daemon) setDefaultIsolation() error {
+	daemon.defaultIsolation = containertypes.Isolation("process")
+	for _, option := range daemon.configStore.ExecOptions {
+		key, val, err := parsers.ParseKeyValueOpt(option)
+		if err != nil {
+			return err
+		}
+		key = strings.ToLower(key)
+		switch key {
+
+		case "isolation":
+			if !containertypes.Isolation(val).IsValid() {
+				return fmt.Errorf("Invalid exec-opt value for 'isolation':'%s'", val)
+			}
+			if containertypes.Isolation(val).IsHyperV() {
+				daemon.defaultIsolation = containertypes.Isolation("hyperv")
+			}
+		default:
+			return fmt.Errorf("Unrecognised exec-opt '%s'\n", key)
+		}
+	}
+
+	logrus.Infof("Windows default isolation mode: %s", daemon.defaultIsolation)
+	return nil
+}

+ 0 - 3
daemon/delete.go

@@ -129,9 +129,6 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, forceRemo
 		return fmt.Errorf("Driver %s failed to remove root filesystem %s: %s", daemon.GraphDriverName(), container.ID, err)
 	}
 
-	if err = daemon.execDriver.Clean(container.ID); err != nil {
-		return fmt.Errorf("Unable to remove execdriver data for %s: %s", container.ID, err)
-	}
 	return nil
 }
 

+ 32 - 107
daemon/exec.go

@@ -11,10 +11,9 @@ import (
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/daemon/exec"
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/errors"
+	"github.com/docker/docker/libcontainerd"
 	"github.com/docker/docker/pkg/pools"
-	"github.com/docker/docker/pkg/promise"
 	"github.com/docker/docker/pkg/term"
 	"github.com/docker/engine-api/types"
 	"github.com/docker/engine-api/types/strslice"
@@ -106,33 +105,31 @@ func (d *Daemon) ContainerExecCreate(config *types.ExecConfig) (string, error) {
 		}
 	}
 
-	processConfig := &execdriver.ProcessConfig{
-		CommonProcessConfig: execdriver.CommonProcessConfig{
-			Tty:        config.Tty,
-			Entrypoint: entrypoint,
-			Arguments:  args,
-		},
-	}
-	setPlatformSpecificExecProcessConfig(config, container, processConfig)
-
 	execConfig := exec.NewConfig()
 	execConfig.OpenStdin = config.AttachStdin
 	execConfig.OpenStdout = config.AttachStdout
 	execConfig.OpenStderr = config.AttachStderr
-	execConfig.ProcessConfig = processConfig
 	execConfig.ContainerID = container.ID
 	execConfig.DetachKeys = keys
+	execConfig.Entrypoint = entrypoint
+	execConfig.Args = args
+	execConfig.Tty = config.Tty
+	execConfig.Privileged = config.Privileged
+	execConfig.User = config.User
+	if len(execConfig.User) == 0 {
+		execConfig.User = container.Config.User
+	}
 
 	d.registerExecCommand(container, execConfig)
 
-	d.LogContainerEvent(container, "exec_create: "+execConfig.ProcessConfig.Entrypoint+" "+strings.Join(execConfig.ProcessConfig.Arguments, " "))
+	d.LogContainerEvent(container, "exec_create: "+execConfig.Entrypoint+" "+strings.Join(execConfig.Args, " "))
 
 	return execConfig.ID, nil
 }
 
 // ContainerExecStart starts a previously set up exec instance. The
 // std streams are set up.
-func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) error {
+func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) (err error) {
 	var (
 		cStdin           io.ReadCloser
 		cStdout, cStderr io.Writer
@@ -155,11 +152,18 @@ func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io.
 		return fmt.Errorf("Error: Exec command %s is already running", ec.ID)
 	}
 	ec.Running = true
+	defer func() {
+		if err != nil {
+			ec.Running = false
+			exitCode := 126
+			ec.ExitCode = &exitCode
+		}
+	}()
 	ec.Unlock()
 
 	c := d.containers.Get(ec.ContainerID)
 	logrus.Debugf("starting exec command %s in container %s", ec.ID, c.ID)
-	d.LogContainerEvent(c, "exec_start: "+ec.ProcessConfig.Entrypoint+" "+strings.Join(ec.ProcessConfig.Arguments, " "))
+	d.LogContainerEvent(c, "exec_start: "+ec.Entrypoint+" "+strings.Join(ec.Args, " "))
 
 	if ec.OpenStdin && stdin != nil {
 		r, w := io.Pipe()
@@ -183,56 +187,26 @@ func (d *Daemon) ContainerExecStart(name string, stdin io.ReadCloser, stdout io.
 		ec.NewNopInputPipe()
 	}
 
-	attachErr := container.AttachStreams(context.Background(), ec.StreamConfig, ec.OpenStdin, true, ec.ProcessConfig.Tty, cStdin, cStdout, cStderr, ec.DetachKeys)
-
-	execErr := make(chan error)
-
-	// Note, the ExecConfig data will be removed when the container
-	// itself is deleted.  This allows us to query it (for things like
-	// the exitStatus) even after the cmd is done running.
-
-	go func() {
-		execErr <- d.containerExec(c, ec)
-	}()
+	p := libcontainerd.Process{
+		Args:     append([]string{ec.Entrypoint}, ec.Args...),
+		Terminal: ec.Tty,
+	}
 
-	select {
-	case err := <-attachErr:
-		if err != nil {
-			return fmt.Errorf("attach failed with error: %v", err)
-		}
+	if err := execSetPlatformOpt(c, ec, &p); err != nil {
 		return nil
-	case err := <-execErr:
-		if aErr := <-attachErr; aErr != nil && err == nil {
-			return fmt.Errorf("attach failed with error: %v", aErr)
-		}
-		if err == nil {
-			return nil
-		}
-
-		// Maybe the container stopped while we were trying to exec
-		if !c.IsRunning() {
-			return fmt.Errorf("container stopped while running exec: %s", c.ID)
-		}
-		return fmt.Errorf("Cannot run exec command %s in container %s: %s", ec.ID, c.ID, err)
 	}
-}
 
-// Exec calls the underlying exec driver to run
-func (d *Daemon) Exec(c *container.Container, execConfig *exec.Config, pipes *execdriver.Pipes, startCallback execdriver.DriverCallback) (int, error) {
-	hooks := execdriver.Hooks{
-		Start: startCallback,
-	}
-	exitStatus, err := d.execDriver.Exec(c.Command, execConfig.ProcessConfig, pipes, hooks)
+	attachErr := container.AttachStreams(context.Background(), ec.StreamConfig, ec.OpenStdin, true, ec.Tty, cStdin, cStdout, cStderr, ec.DetachKeys)
 
-	// On err, make sure we don't leave ExitCode at zero
-	if err != nil && exitStatus == 0 {
-		exitStatus = 128
+	if err := d.containerd.AddProcess(c.ID, name, p); err != nil {
+		return err
 	}
 
-	execConfig.ExitCode = &exitStatus
-	execConfig.Running = false
-
-	return exitStatus, err
+	err = <-attachErr
+	if err != nil {
+		return fmt.Errorf("attach failed with error: %v", err)
+	}
+	return nil
 }
 
 // execCommandGC runs a ticker to clean up the daemon references
@@ -270,52 +244,3 @@ func (d *Daemon) containerExecIds() map[string]struct{} {
 	}
 	return ids
 }
-
-func (d *Daemon) containerExec(container *container.Container, ec *exec.Config) error {
-	container.Lock()
-	defer container.Unlock()
-
-	callback := func(processConfig *execdriver.ProcessConfig, pid int, chOOM <-chan struct{}) error {
-		if processConfig.Tty {
-			// The callback is called after the process Start()
-			// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlave
-			// which we close here.
-			if c, ok := processConfig.Stdout.(io.Closer); ok {
-				c.Close()
-			}
-		}
-		ec.Close()
-		return nil
-	}
-
-	// We use a callback here instead of a goroutine and an chan for
-	// synchronization purposes
-	cErr := promise.Go(func() error { return d.monitorExec(container, ec, callback) })
-	return ec.Wait(cErr)
-}
-
-func (d *Daemon) monitorExec(container *container.Container, execConfig *exec.Config, callback execdriver.DriverCallback) error {
-	pipes := execdriver.NewPipes(execConfig.Stdin(), execConfig.Stdout(), execConfig.Stderr(), execConfig.OpenStdin)
-	exitCode, err := d.Exec(container, execConfig, pipes, callback)
-	if err != nil {
-		logrus.Errorf("Error running command in existing container %s: %s", container.ID, err)
-	}
-	logrus.Debugf("Exec task in container %s exited with code %d", container.ID, exitCode)
-
-	if err := execConfig.CloseStreams(); err != nil {
-		logrus.Errorf("%s: %s", container.ID, err)
-	}
-
-	if execConfig.ProcessConfig.Terminal != nil {
-		if err := execConfig.WaitResize(); err != nil {
-			logrus.Errorf("Error waiting for resize: %v", err)
-		}
-		if err := execConfig.ProcessConfig.Terminal.Close(); err != nil {
-			logrus.Errorf("Error closing terminal while running in container %s: %s", container.ID, err)
-		}
-	}
-	// remove the exec command from the container's store only and not the
-	// daemon's store so that the exec command can be inspected.
-	container.ExecCommands.Delete(execConfig.ID)
-	return err
-}

+ 14 - 63
daemon/exec/exec.go

@@ -1,11 +1,8 @@
 package exec
 
 import (
-	"fmt"
 	"sync"
-	"time"
 
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/pkg/stringid"
 	"github.com/docker/docker/runconfig"
 )
@@ -16,22 +13,20 @@ import (
 type Config struct {
 	sync.Mutex
 	*runconfig.StreamConfig
-	ID            string
-	Running       bool
-	ExitCode      *int
-	ProcessConfig *execdriver.ProcessConfig
-	OpenStdin     bool
-	OpenStderr    bool
-	OpenStdout    bool
-	CanRemove     bool
-	ContainerID   string
-	DetachKeys    []byte
-
-	// waitStart will be closed immediately after the exec is really started.
-	waitStart chan struct{}
-
-	// waitResize will be closed after Resize is finished.
-	waitResize chan struct{}
+	ID          string
+	Running     bool
+	ExitCode    *int
+	OpenStdin   bool
+	OpenStderr  bool
+	OpenStdout  bool
+	CanRemove   bool
+	ContainerID string
+	DetachKeys  []byte
+	Entrypoint  string
+	Args        []string
+	Tty         bool
+	Privileged  bool
+	User        string
 }
 
 // NewConfig initializes the a new exec configuration
@@ -39,8 +34,6 @@ func NewConfig() *Config {
 	return &Config{
 		ID:           stringid.GenerateNonCryptoID(),
 		StreamConfig: runconfig.NewStreamConfig(),
-		waitStart:    make(chan struct{}),
-		waitResize:   make(chan struct{}),
 	}
 }
 
@@ -98,45 +91,3 @@ func (e *Store) List() []string {
 	e.RUnlock()
 	return IDs
 }
-
-// Wait waits until the exec process finishes or there is an error in the error channel.
-func (c *Config) Wait(cErr chan error) error {
-	// Exec should not return until the process is actually running
-	select {
-	case <-c.waitStart:
-	case err := <-cErr:
-		return err
-	}
-	return nil
-}
-
-// WaitResize waits until terminal resize finishes or time out.
-func (c *Config) WaitResize() error {
-	select {
-	case <-c.waitResize:
-	case <-time.After(time.Second):
-		return fmt.Errorf("Terminal resize for exec %s time out.", c.ID)
-	}
-	return nil
-}
-
-// Close closes the wait channel for the progress.
-func (c *Config) Close() {
-	close(c.waitStart)
-}
-
-// CloseResize closes the wait channel for resizing terminal.
-func (c *Config) CloseResize() {
-	close(c.waitResize)
-}
-
-// Resize changes the size of the terminal for the exec process.
-func (c *Config) Resize(h, w int) error {
-	defer c.CloseResize()
-	select {
-	case <-c.waitStart:
-	case <-time.After(time.Second):
-		return fmt.Errorf("Exec %s is not running, so it can not be resized.", c.ID)
-	}
-	return c.ProcessConfig.Terminal.Resize(h, w)
-}

+ 26 - 0
daemon/exec_linux.go

@@ -0,0 +1,26 @@
+package daemon
+
+import (
+	"github.com/docker/docker/container"
+	"github.com/docker/docker/daemon/caps"
+	"github.com/docker/docker/daemon/exec"
+	"github.com/docker/docker/libcontainerd"
+)
+
+func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainerd.Process) error {
+	if len(ec.User) > 0 {
+		uid, gid, additionalGids, err := getUser(c, ec.User)
+		if err != nil {
+			return err
+		}
+		p.User = &libcontainerd.User{
+			UID:            uid,
+			GID:            gid,
+			AdditionalGids: additionalGids,
+		}
+	}
+	if ec.Privileged {
+		p.Capabilities = caps.GetAllCapabilities()
+	}
+	return nil
+}

+ 0 - 21
daemon/exec_unix.go

@@ -1,21 +0,0 @@
-// +build linux freebsd
-
-package daemon
-
-import (
-	"github.com/docker/docker/container"
-	"github.com/docker/docker/daemon/execdriver"
-	"github.com/docker/engine-api/types"
-)
-
-// setPlatformSpecificExecProcessConfig sets platform-specific fields in the
-// ProcessConfig structure.
-func setPlatformSpecificExecProcessConfig(config *types.ExecConfig, container *container.Container, pc *execdriver.ProcessConfig) {
-	user := config.User
-	if len(user) == 0 {
-		user = container.Config.User
-	}
-
-	pc.User = user
-	pc.Privileged = config.Privileged
-}

+ 7 - 5
daemon/exec_windows.go

@@ -2,11 +2,13 @@ package daemon
 
 import (
 	"github.com/docker/docker/container"
-	"github.com/docker/docker/daemon/execdriver"
-	"github.com/docker/engine-api/types"
+	"github.com/docker/docker/daemon/exec"
+	"github.com/docker/docker/libcontainerd"
 )
 
-// setPlatformSpecificExecProcessConfig sets platform-specific fields in the
-// ProcessConfig structure. This is a no-op on Windows
-func setPlatformSpecificExecProcessConfig(config *types.ExecConfig, container *container.Container, pc *execdriver.ProcessConfig) {
+func execSetPlatformOpt(c *container.Container, ec *exec.Config, p *libcontainerd.Process) error {
+	// Process arguments need to be escaped before sending to OCI.
+	// TODO (jstarks): escape the entrypoint too once the tests are fixed to not rely on this behavior
+	p.Args = append([]string{p.Args[0]}, escapeArgs(p.Args[1:])...)
+	return nil
 }

+ 0 - 133
daemon/execdriver/driver.go

@@ -1,133 +0,0 @@
-package execdriver
-
-import (
-	"errors"
-	"io"
-	"os/exec"
-	"time"
-
-	"github.com/opencontainers/runc/libcontainer"
-)
-
-// Context is a generic key value pair that allows
-// arbitrary data to be sent
-type Context map[string]string
-
-// Define error messages
-var (
-	ErrNotRunning              = errors.New("Container is not running")
-	ErrWaitTimeoutReached      = errors.New("Wait timeout reached")
-	ErrDriverAlreadyRegistered = errors.New("A driver already registered this docker init function")
-	ErrDriverNotFound          = errors.New("The requested docker init has not been found")
-)
-
-// DriverCallback defines a callback function which is used in "Run" and "Exec".
-// This allows work to be done in the parent process when the child is passing
-// through PreStart, Start and PostStop events.
-// Callbacks are provided a processConfig pointer and the pid of the child.
-// The channel will be used to notify the OOM events.
-type DriverCallback func(processConfig *ProcessConfig, pid int, chOOM <-chan struct{}) error
-
-// Hooks is a struct containing function pointers to callbacks
-// used by any execdriver implementation exploiting hooks capabilities
-type Hooks struct {
-	// PreStart is called before container's CMD/ENTRYPOINT is executed
-	PreStart []DriverCallback
-	// Start is called after the container's process is full started
-	Start DriverCallback
-	// PostStop is called after the container process exits
-	PostStop []DriverCallback
-}
-
-// Terminal represents a pseudo TTY, it is for when
-// using a container interactively.
-type Terminal interface {
-	io.Closer
-	Resize(height, width int) error
-}
-
-// Driver is an interface for drivers to implement
-// including all basic functions a driver should have
-type Driver interface {
-	// Run executes the process, blocks until the process exits and returns
-	// the exit code. It's the last stage on Docker side for running a container.
-	Run(c *Command, pipes *Pipes, hooks Hooks) (ExitStatus, error)
-
-	// Exec executes the process in an existing container, blocks until the
-	// process exits and returns the exit code.
-	Exec(c *Command, processConfig *ProcessConfig, pipes *Pipes, hooks Hooks) (int, error)
-
-	// Kill sends signals to process in container.
-	Kill(c *Command, sig int) error
-
-	// Pause pauses a container.
-	Pause(c *Command) error
-
-	// Unpause unpauses a container.
-	Unpause(c *Command) error
-
-	// Name returns the name of the driver.
-	Name() string
-
-	// GetPidsForContainer returns a list of pid for the processes running in a container.
-	GetPidsForContainer(id string) ([]int, error)
-
-	// Terminate kills a container by sending signal SIGKILL.
-	Terminate(c *Command) error
-
-	// Clean removes all traces of container exec.
-	Clean(id string) error
-
-	// Stats returns resource stats for a running container
-	Stats(id string) (*ResourceStats, error)
-
-	// Update updates resource configs for a container
-	Update(c *Command) error
-
-	// SupportsHooks refers to the driver capability to exploit pre/post hook functionality
-	SupportsHooks() bool
-}
-
-// CommonResources contains the resource configs for a driver that are
-// common across platforms.
-type CommonResources struct {
-	Memory            int64  `json:"memory"`
-	MemoryReservation int64  `json:"memory_reservation"`
-	CPUShares         int64  `json:"cpu_shares"`
-	BlkioWeight       uint16 `json:"blkio_weight"`
-}
-
-// ResourceStats contains information about resource usage by a container.
-type ResourceStats struct {
-	*libcontainer.Stats
-	Read        time.Time `json:"read"`
-	MemoryLimit int64     `json:"memory_limit"`
-	SystemUsage uint64    `json:"system_usage"`
-}
-
-// CommonProcessConfig is the common platform agnostic part of the ProcessConfig
-// structure that describes a process that will be run inside a container.
-type CommonProcessConfig struct {
-	exec.Cmd `json:"-"`
-
-	Tty        bool     `json:"tty"`
-	Entrypoint string   `json:"entrypoint"`
-	Arguments  []string `json:"arguments"`
-	Terminal   Terminal `json:"-"` // standard or tty terminal
-}
-
-// CommonCommand is the common platform agnostic part of the Command structure
-// which wraps an os/exec.Cmd to add more metadata
-type CommonCommand struct {
-	ContainerPid  int           `json:"container_pid"` // the pid for the process inside a container
-	ID            string        `json:"id"`
-	MountLabel    string        `json:"mount_label"` // TODO Windows. More involved, but can be factored out
-	Mounts        []Mount       `json:"mounts"`
-	Network       *Network      `json:"network"`
-	ProcessConfig ProcessConfig `json:"process_config"` // Describes the init process of the container.
-	ProcessLabel  string        `json:"process_label"`  // TODO Windows. More involved, but can be factored out
-	Resources     *Resources    `json:"resources"`
-	Rootfs        string        `json:"rootfs"` // root fs of the container
-	WorkingDir    string        `json:"working_dir"`
-	TmpDir        string        `json:"tmpdir"` // Directory used to store docker tmpdirs.
-}

+ 0 - 323
daemon/execdriver/driver_unix.go

@@ -1,323 +0,0 @@
-// +build !windows
-
-package execdriver
-
-import (
-	"encoding/json"
-	"io/ioutil"
-	"os"
-	"path/filepath"
-	"strconv"
-	"strings"
-	"time"
-
-	"github.com/docker/docker/daemon/execdriver/native/template"
-	"github.com/docker/docker/pkg/idtools"
-	"github.com/docker/docker/pkg/mount"
-	"github.com/docker/go-units"
-	"github.com/opencontainers/runc/libcontainer"
-	"github.com/opencontainers/runc/libcontainer/cgroups/fs"
-	"github.com/opencontainers/runc/libcontainer/configs"
-	blkiodev "github.com/opencontainers/runc/libcontainer/configs"
-)
-
-// Mount contains information for a mount operation.
-type Mount struct {
-	Source      string `json:"source"`
-	Destination string `json:"destination"`
-	Writable    bool   `json:"writable"`
-	Data        string `json:"data"`
-	Propagation string `json:"mountpropagation"`
-}
-
-// Resources contains all resource configs for a driver.
-// Currently these are all for cgroup configs.
-type Resources struct {
-	CommonResources
-
-	// Fields below here are platform specific
-
-	BlkioWeightDevice            []*blkiodev.WeightDevice   `json:"blkio_weight_device"`
-	BlkioThrottleReadBpsDevice   []*blkiodev.ThrottleDevice `json:"blkio_throttle_read_bps_device"`
-	BlkioThrottleWriteBpsDevice  []*blkiodev.ThrottleDevice `json:"blkio_throttle_write_bps_device"`
-	BlkioThrottleReadIOpsDevice  []*blkiodev.ThrottleDevice `json:"blkio_throttle_read_iops_device"`
-	BlkioThrottleWriteIOpsDevice []*blkiodev.ThrottleDevice `json:"blkio_throttle_write_iops_device"`
-	MemorySwap                   int64                      `json:"memory_swap"`
-	KernelMemory                 int64                      `json:"kernel_memory"`
-	CPUQuota                     int64                      `json:"cpu_quota"`
-	CpusetCpus                   string                     `json:"cpuset_cpus"`
-	CpusetMems                   string                     `json:"cpuset_mems"`
-	CPUPeriod                    int64                      `json:"cpu_period"`
-	Rlimits                      []*units.Rlimit            `json:"rlimits"`
-	OomKillDisable               bool                       `json:"oom_kill_disable"`
-	PidsLimit                    int64                      `json:"pids_limit"`
-	MemorySwappiness             int64                      `json:"memory_swappiness"`
-}
-
-// ProcessConfig is the platform specific structure that describes a process
-// that will be run inside a container.
-type ProcessConfig struct {
-	CommonProcessConfig
-
-	// Fields below here are platform specific
-	Privileged bool   `json:"privileged"`
-	User       string `json:"user"`
-	Console    string `json:"-"` // dev/console path
-}
-
-// Ipc settings of the container
-// It is for IPC namespace setting. Usually different containers
-// have their own IPC namespace, however this specifies to use
-// an existing IPC namespace.
-// You can join the host's or a container's IPC namespace.
-type Ipc struct {
-	ContainerID string `json:"container_id"` // id of the container to join ipc.
-	HostIpc     bool   `json:"host_ipc"`
-}
-
-// Pid settings of the container
-// It is for PID namespace setting. Usually different containers
-// have their own PID namespace, however this specifies to use
-// an existing PID namespace.
-// Joining the host's PID namespace is currently the only supported
-// option.
-type Pid struct {
-	HostPid bool `json:"host_pid"`
-}
-
-// UTS settings of the container
-// It is for UTS namespace setting. Usually different containers
-// have their own UTS namespace, however this specifies to use
-// an existing UTS namespace.
-// Joining the host's UTS namespace is currently the only supported
-// option.
-type UTS struct {
-	HostUTS bool `json:"host_uts"`
-}
-
-// Network settings of the container
-type Network struct {
-	Mtu            int    `json:"mtu"`
-	ContainerID    string `json:"container_id"` // id of the container to join network.
-	NamespacePath  string `json:"namespace_path"`
-	HostNetworking bool   `json:"host_networking"`
-}
-
-// Command wraps an os/exec.Cmd to add more metadata
-type Command struct {
-	CommonCommand
-
-	// Fields below here are platform specific
-
-	AllowedDevices     []*configs.Device `json:"allowed_devices"`
-	AppArmorProfile    string            `json:"apparmor_profile"`
-	AutoCreatedDevices []*configs.Device `json:"autocreated_devices"`
-	CapAdd             []string          `json:"cap_add"`
-	CapDrop            []string          `json:"cap_drop"`
-	CgroupParent       string            `json:"cgroup_parent"` // The parent cgroup for this command.
-	GIDMapping         []idtools.IDMap   `json:"gidmapping"`
-	GroupAdd           []string          `json:"group_add"`
-	Ipc                *Ipc              `json:"ipc"`
-	OomScoreAdj        int               `json:"oom_score_adj"`
-	Pid                *Pid              `json:"pid"`
-	ReadonlyRootfs     bool              `json:"readonly_rootfs"`
-	RemappedRoot       *User             `json:"remap_root"`
-	SeccompProfile     string            `json:"seccomp_profile"`
-	UIDMapping         []idtools.IDMap   `json:"uidmapping"`
-	UTS                *UTS              `json:"uts"`
-	NoNewPrivileges    bool              `json:"no_new_privileges"`
-}
-
-// SetRootPropagation sets the root mount propagation mode.
-func SetRootPropagation(config *configs.Config, propagation int) {
-	config.RootPropagation = propagation
-}
-
-// InitContainer is the initialization of a container config.
-// It returns the initial configs for a container. It's mostly
-// defined by the default template.
-func InitContainer(c *Command) *configs.Config {
-	container := template.New()
-
-	container.Hostname = getEnv("HOSTNAME", c.ProcessConfig.Env)
-	container.Cgroups.Name = c.ID
-	container.Cgroups.Resources.AllowedDevices = c.AllowedDevices
-	container.Devices = filterDevices(c.AutoCreatedDevices, (c.RemappedRoot.UID != 0))
-	container.Rootfs = c.Rootfs
-	container.Readonlyfs = c.ReadonlyRootfs
-	// This can be overridden later by driver during mount setup based
-	// on volume options
-	SetRootPropagation(container, mount.RPRIVATE)
-	container.Cgroups.Parent = c.CgroupParent
-
-	// check to see if we are running in ramdisk to disable pivot root
-	container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
-
-	return container
-}
-
-func filterDevices(devices []*configs.Device, userNamespacesEnabled bool) []*configs.Device {
-	if !userNamespacesEnabled {
-		return devices
-	}
-
-	filtered := []*configs.Device{}
-	// if we have user namespaces enabled, these devices will not be created
-	// because of the mknod limitation in the kernel for an unprivileged process.
-	// Rather, they will be bind-mounted, which will only work if they exist;
-	// check for existence and remove non-existent entries from the list
-	for _, device := range devices {
-		if _, err := os.Stat(device.Path); err == nil {
-			filtered = append(filtered, device)
-		}
-	}
-	return filtered
-}
-
-func getEnv(key string, env []string) string {
-	for _, pair := range env {
-		parts := strings.SplitN(pair, "=", 2)
-		if parts[0] == key {
-			return parts[1]
-		}
-	}
-	return ""
-}
-
-// SetupCgroups setups cgroup resources for a container.
-func SetupCgroups(container *configs.Config, c *Command) error {
-	if c.Resources != nil {
-		container.Cgroups.Resources.CpuShares = c.Resources.CPUShares
-		container.Cgroups.Resources.Memory = c.Resources.Memory
-		container.Cgroups.Resources.MemoryReservation = c.Resources.MemoryReservation
-		container.Cgroups.Resources.MemorySwap = c.Resources.MemorySwap
-		container.Cgroups.Resources.KernelMemory = c.Resources.KernelMemory
-		container.Cgroups.Resources.CpusetCpus = c.Resources.CpusetCpus
-		container.Cgroups.Resources.CpusetMems = c.Resources.CpusetMems
-		container.Cgroups.Resources.CpuPeriod = c.Resources.CPUPeriod
-		container.Cgroups.Resources.CpuQuota = c.Resources.CPUQuota
-		container.Cgroups.Resources.BlkioWeight = c.Resources.BlkioWeight
-		container.Cgroups.Resources.BlkioWeightDevice = c.Resources.BlkioWeightDevice
-		container.Cgroups.Resources.BlkioThrottleReadBpsDevice = c.Resources.BlkioThrottleReadBpsDevice
-		container.Cgroups.Resources.BlkioThrottleWriteBpsDevice = c.Resources.BlkioThrottleWriteBpsDevice
-		container.Cgroups.Resources.BlkioThrottleReadIOPSDevice = c.Resources.BlkioThrottleReadIOpsDevice
-		container.Cgroups.Resources.BlkioThrottleWriteIOPSDevice = c.Resources.BlkioThrottleWriteIOpsDevice
-		container.Cgroups.Resources.OomKillDisable = c.Resources.OomKillDisable
-		container.Cgroups.Resources.PidsLimit = c.Resources.PidsLimit
-		container.Cgroups.Resources.MemorySwappiness = c.Resources.MemorySwappiness
-	}
-
-	return nil
-}
-
-// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo.
-func getNetworkInterfaceStats(interfaceName string) (*libcontainer.NetworkInterface, error) {
-	out := &libcontainer.NetworkInterface{Name: interfaceName}
-	// This can happen if the network runtime information is missing - possible if the
-	// container was created by an old version of libcontainer.
-	if interfaceName == "" {
-		return out, nil
-	}
-	type netStatsPair struct {
-		// Where to write the output.
-		Out *uint64
-		// The network stats file to read.
-		File string
-	}
-	// Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container.
-	netStats := []netStatsPair{
-		{Out: &out.RxBytes, File: "tx_bytes"},
-		{Out: &out.RxPackets, File: "tx_packets"},
-		{Out: &out.RxErrors, File: "tx_errors"},
-		{Out: &out.RxDropped, File: "tx_dropped"},
-
-		{Out: &out.TxBytes, File: "rx_bytes"},
-		{Out: &out.TxPackets, File: "rx_packets"},
-		{Out: &out.TxErrors, File: "rx_errors"},
-		{Out: &out.TxDropped, File: "rx_dropped"},
-	}
-	for _, netStat := range netStats {
-		data, err := readSysfsNetworkStats(interfaceName, netStat.File)
-		if err != nil {
-			return nil, err
-		}
-		*(netStat.Out) = data
-	}
-	return out, nil
-}
-
-// Reads the specified statistics available under /sys/class/net/<EthInterface>/statistics
-func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) {
-	data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile))
-	if err != nil {
-		return 0, err
-	}
-	return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
-}
-
-// Stats collects all the resource usage information from a container.
-func Stats(containerDir string, containerMemoryLimit int64, machineMemory int64) (*ResourceStats, error) {
-	f, err := os.Open(filepath.Join(containerDir, "state.json"))
-	if err != nil {
-		return nil, err
-	}
-	defer f.Close()
-
-	type network struct {
-		Type              string
-		HostInterfaceName string
-	}
-
-	state := struct {
-		CgroupPaths map[string]string `json:"cgroup_paths"`
-		Networks    []network
-	}{}
-
-	if err := json.NewDecoder(f).Decode(&state); err != nil {
-		return nil, err
-	}
-	now := time.Now()
-
-	mgr := fs.Manager{Paths: state.CgroupPaths}
-	cstats, err := mgr.GetStats()
-	if err != nil {
-		return nil, err
-	}
-	stats := &libcontainer.Stats{CgroupStats: cstats}
-	// if the container does not have any memory limit specified set the
-	// limit to the machines memory
-	memoryLimit := containerMemoryLimit
-	if memoryLimit == 0 {
-		memoryLimit = machineMemory
-	}
-	for _, iface := range state.Networks {
-		switch iface.Type {
-		case "veth":
-			istats, err := getNetworkInterfaceStats(iface.HostInterfaceName)
-			if err != nil {
-				return nil, err
-			}
-			stats.Interfaces = append(stats.Interfaces, istats)
-		}
-	}
-	return &ResourceStats{
-		Stats:       stats,
-		Read:        now,
-		MemoryLimit: memoryLimit,
-	}, nil
-}
-
-// User contains the uid and gid representing a Unix user
-type User struct {
-	UID int `json:"root_uid"`
-	GID int `json:"root_gid"`
-}
-
-// ExitStatus provides exit reasons for a container.
-type ExitStatus struct {
-	// The exit code with which the container exited.
-	ExitCode int
-
-	// Whether the container encountered an OOM.
-	OOMKilled bool
-}

+ 0 - 66
daemon/execdriver/driver_windows.go

@@ -1,66 +0,0 @@
-package execdriver
-
-import "github.com/docker/go-connections/nat"
-
-// Mount contains information for a mount operation.
-type Mount struct {
-	Source      string `json:"source"`
-	Destination string `json:"destination"`
-	Writable    bool   `json:"writable"`
-}
-
-// Resources contains all resource configs for a driver.
-// Currently these are all for cgroup configs.
-type Resources struct {
-	CommonResources
-
-	// Fields below here are platform specific
-}
-
-// ProcessConfig is the platform specific structure that describes a process
-// that will be run inside a container.
-type ProcessConfig struct {
-	CommonProcessConfig
-
-	// Fields below here are platform specific
-	ConsoleSize [2]int `json:"-"` // h,w of initial console size
-}
-
-// Network settings of the container
-type Network struct {
-	Interface   *NetworkInterface `json:"interface"`
-	ContainerID string            `json:"container_id"` // id of the container to join network.
-}
-
-// NetworkInterface contains network configs for a driver
-type NetworkInterface struct {
-	MacAddress string `json:"mac"`
-	Bridge     string `json:"bridge"`
-	IPAddress  string `json:"ip"`
-
-	// PortBindings is the port mapping between the exposed port in the
-	// container and the port on the host.
-	PortBindings nat.PortMap `json:"port_bindings"`
-}
-
-// Command wraps an os/exec.Cmd to add more metadata
-type Command struct {
-	CommonCommand
-
-	// Fields below here are platform specific
-
-	FirstStart  bool     `json:"first_start"`  // Optimization for first boot of Windows
-	Hostname    string   `json:"hostname"`     // Windows sets the hostname in the execdriver
-	LayerFolder string   `json:"layer_folder"` // Layer folder for a command
-	LayerPaths  []string `json:"layer_paths"`  // Layer paths for a command
-	Isolation   string   `json:"isolation"`    // Isolation technology for the container
-	ArgsEscaped bool     `json:"args_escaped"` // True if args are already escaped
-	HvPartition bool     `json:"hv_partition"` // True if it's an hypervisor partition
-	EpList      []string `json:"endpoints"`    // List of network endpoints for HNS
-}
-
-// ExitStatus provides exit reasons for a container.
-type ExitStatus struct {
-	// The exit code with which the container exited.
-	ExitCode int
-}

+ 0 - 15
daemon/execdriver/execdrivers/execdrivers_freebsd.go

@@ -1,15 +0,0 @@
-// +build freebsd
-
-package execdrivers
-
-import (
-	"fmt"
-
-	"github.com/docker/docker/daemon/execdriver"
-	"github.com/docker/docker/pkg/sysinfo"
-)
-
-// NewDriver returns a new execdriver.Driver from the given name configured with the provided options.
-func NewDriver(options []string, root, libPath string, sysInfo *sysinfo.SysInfo) (execdriver.Driver, error) {
-	return nil, fmt.Errorf("jail driver not yet supported on FreeBSD")
-}

+ 0 - 16
daemon/execdriver/execdrivers/execdrivers_linux.go

@@ -1,16 +0,0 @@
-// +build linux
-
-package execdrivers
-
-import (
-	"path"
-
-	"github.com/docker/docker/daemon/execdriver"
-	"github.com/docker/docker/daemon/execdriver/native"
-	"github.com/docker/docker/pkg/sysinfo"
-)
-
-// NewDriver returns a new execdriver.Driver from the given name configured with the provided options.
-func NewDriver(options []string, root, libPath string, sysInfo *sysinfo.SysInfo) (execdriver.Driver, error) {
-	return native.NewDriver(path.Join(root, "execdriver", "native"), options)
-}

+ 0 - 14
daemon/execdriver/execdrivers/execdrivers_windows.go

@@ -1,14 +0,0 @@
-// +build windows
-
-package execdrivers
-
-import (
-	"github.com/docker/docker/daemon/execdriver"
-	"github.com/docker/docker/daemon/execdriver/windows"
-	"github.com/docker/docker/pkg/sysinfo"
-)
-
-// NewDriver returns a new execdriver.Driver from the given name configured with the provided options.
-func NewDriver(options []string, root, libPath string, sysInfo *sysinfo.SysInfo) (execdriver.Driver, error) {
-	return windows.NewDriver(root, options)
-}

+ 0 - 514
daemon/execdriver/native/create.go

@@ -1,514 +0,0 @@
-// +build linux,cgo
-
-package native
-
-import (
-	"fmt"
-	"path/filepath"
-	"strings"
-	"syscall"
-
-	"github.com/docker/docker/daemon/execdriver"
-	"github.com/docker/docker/pkg/mount"
-	"github.com/docker/docker/profiles/seccomp"
-
-	"github.com/docker/docker/volume"
-	"github.com/opencontainers/runc/libcontainer/apparmor"
-	"github.com/opencontainers/runc/libcontainer/configs"
-	"github.com/opencontainers/runc/libcontainer/devices"
-)
-
-// createContainer populates and configures the container type with the
-// data provided by the execdriver.Command
-func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks) (container *configs.Config, err error) {
-	container = execdriver.InitContainer(c)
-
-	if err := d.createIpc(container, c); err != nil {
-		return nil, err
-	}
-
-	if err := d.createPid(container, c); err != nil {
-		return nil, err
-	}
-
-	if err := d.createUTS(container, c); err != nil {
-		return nil, err
-	}
-
-	if err := d.setupRemappedRoot(container, c); err != nil {
-		return nil, err
-	}
-
-	if err := d.createNetwork(container, c, hooks); err != nil {
-		return nil, err
-	}
-
-	if c.ProcessConfig.Privileged {
-		if !container.Readonlyfs {
-			// clear readonly for /sys
-			for i := range container.Mounts {
-				if container.Mounts[i].Destination == "/sys" {
-					container.Mounts[i].Flags &= ^syscall.MS_RDONLY
-				}
-			}
-			container.ReadonlyPaths = nil
-		}
-
-		// clear readonly for cgroup
-		for i := range container.Mounts {
-			if container.Mounts[i].Device == "cgroup" {
-				container.Mounts[i].Flags &= ^syscall.MS_RDONLY
-			}
-		}
-
-		container.MaskPaths = nil
-		if err := d.setPrivileged(container); err != nil {
-			return nil, err
-		}
-	} else {
-		if err := d.setCapabilities(container, c); err != nil {
-			return nil, err
-		}
-
-		if c.SeccompProfile == "" {
-			container.Seccomp, err = seccomp.GetDefaultProfile()
-			if err != nil {
-				return nil, err
-			}
-		}
-	}
-	// add CAP_ prefix to all caps for new libcontainer update to match
-	// the spec format.
-	for i, s := range container.Capabilities {
-		if !strings.HasPrefix(s, "CAP_") {
-			container.Capabilities[i] = fmt.Sprintf("CAP_%s", s)
-		}
-	}
-	container.AdditionalGroups = c.GroupAdd
-
-	if c.AppArmorProfile != "" {
-		container.AppArmorProfile = c.AppArmorProfile
-	}
-
-	if c.SeccompProfile != "" && c.SeccompProfile != "unconfined" {
-		container.Seccomp, err = seccomp.LoadProfile(c.SeccompProfile)
-		if err != nil {
-			return nil, err
-		}
-	}
-
-	if err := execdriver.SetupCgroups(container, c); err != nil {
-		return nil, err
-	}
-
-	container.OomScoreAdj = c.OomScoreAdj
-
-	if container.Readonlyfs {
-		for i := range container.Mounts {
-			switch container.Mounts[i].Destination {
-			case "/proc", "/dev", "/dev/pts", "/dev/mqueue":
-				continue
-			}
-			container.Mounts[i].Flags |= syscall.MS_RDONLY
-		}
-
-		/* These paths must be remounted as r/o */
-		container.ReadonlyPaths = append(container.ReadonlyPaths, "/dev")
-	}
-
-	if err := d.setupMounts(container, c); err != nil {
-		return nil, err
-	}
-
-	d.setupLabels(container, c)
-	d.setupRlimits(container, c)
-
-	container.NoNewPrivileges = c.NoNewPrivileges
-	return container, nil
-}
-
-func (d *Driver) createNetwork(container *configs.Config, c *execdriver.Command, hooks execdriver.Hooks) error {
-	if c.Network == nil {
-		return nil
-	}
-	if c.Network.ContainerID != "" {
-		d.Lock()
-		active := d.activeContainers[c.Network.ContainerID]
-		d.Unlock()
-
-		if active == nil {
-			return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID)
-		}
-
-		state, err := active.State()
-		if err != nil {
-			return err
-		}
-
-		container.Namespaces.Add(configs.NEWNET, state.NamespacePaths[configs.NEWNET])
-		return nil
-	}
-
-	if c.Network.NamespacePath != "" {
-		container.Namespaces.Add(configs.NEWNET, c.Network.NamespacePath)
-		return nil
-	}
-	// only set up prestart hook if the namespace path is not set (this should be
-	// all cases *except* for --net=host shared networking)
-	container.Hooks = &configs.Hooks{
-		Prestart: []configs.Hook{
-			configs.NewFunctionHook(func(s configs.HookState) error {
-				if len(hooks.PreStart) > 0 {
-					for _, fnHook := range hooks.PreStart {
-						// A closed channel for OOM is returned here as it will be
-						// non-blocking and return the correct result when read.
-						chOOM := make(chan struct{})
-						close(chOOM)
-						if err := fnHook(&c.ProcessConfig, s.Pid, chOOM); err != nil {
-							return err
-						}
-					}
-				}
-				return nil
-			}),
-		},
-	}
-	return nil
-}
-
-func (d *Driver) createIpc(container *configs.Config, c *execdriver.Command) error {
-	if c.Ipc.HostIpc {
-		container.Namespaces.Remove(configs.NEWIPC)
-		return nil
-	}
-
-	if c.Ipc.ContainerID != "" {
-		d.Lock()
-		active := d.activeContainers[c.Ipc.ContainerID]
-		d.Unlock()
-
-		if active == nil {
-			return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID)
-		}
-
-		state, err := active.State()
-		if err != nil {
-			return err
-		}
-		container.Namespaces.Add(configs.NEWIPC, state.NamespacePaths[configs.NEWIPC])
-	}
-
-	return nil
-}
-
-func (d *Driver) createPid(container *configs.Config, c *execdriver.Command) error {
-	if c.Pid.HostPid {
-		container.Namespaces.Remove(configs.NEWPID)
-		return nil
-	}
-
-	return nil
-}
-
-func (d *Driver) createUTS(container *configs.Config, c *execdriver.Command) error {
-	if c.UTS.HostUTS {
-		container.Namespaces.Remove(configs.NEWUTS)
-		container.Hostname = ""
-		return nil
-	}
-
-	return nil
-}
-
-func (d *Driver) setupRemappedRoot(container *configs.Config, c *execdriver.Command) error {
-	if c.RemappedRoot.UID == 0 {
-		container.Namespaces.Remove(configs.NEWUSER)
-		return nil
-	}
-
-	// convert the Docker daemon id map to the libcontainer variant of the same struct
-	// this keeps us from having to import libcontainer code across Docker client + daemon packages
-	cuidMaps := []configs.IDMap{}
-	cgidMaps := []configs.IDMap{}
-	for _, idMap := range c.UIDMapping {
-		cuidMaps = append(cuidMaps, configs.IDMap(idMap))
-	}
-	for _, idMap := range c.GIDMapping {
-		cgidMaps = append(cgidMaps, configs.IDMap(idMap))
-	}
-	container.UidMappings = cuidMaps
-	container.GidMappings = cgidMaps
-
-	for _, node := range container.Devices {
-		node.Uid = uint32(c.RemappedRoot.UID)
-		node.Gid = uint32(c.RemappedRoot.GID)
-	}
-	// TODO: until a kernel/mount solution exists for handling remount in a user namespace,
-	// we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
-	for i := range container.Mounts {
-		if container.Mounts[i].Device == "cgroup" {
-			container.Mounts[i].Flags &= ^syscall.MS_RDONLY
-		}
-	}
-
-	return nil
-}
-
-func (d *Driver) setPrivileged(container *configs.Config) (err error) {
-	container.Capabilities = execdriver.GetAllCapabilities()
-	container.Cgroups.Resources.AllowAllDevices = true
-
-	hostDevices, err := devices.HostDevices()
-	if err != nil {
-		return err
-	}
-	container.Devices = hostDevices
-
-	if apparmor.IsEnabled() {
-		container.AppArmorProfile = "unconfined"
-	}
-	return nil
-}
-
-func (d *Driver) setCapabilities(container *configs.Config, c *execdriver.Command) (err error) {
-	container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop)
-	return err
-}
-
-func (d *Driver) setupRlimits(container *configs.Config, c *execdriver.Command) {
-	if c.Resources == nil {
-		return
-	}
-
-	for _, rlimit := range c.Resources.Rlimits {
-		container.Rlimits = append(container.Rlimits, configs.Rlimit{
-			Type: rlimit.Type,
-			Hard: rlimit.Hard,
-			Soft: rlimit.Soft,
-		})
-	}
-}
-
-// If rootfs mount propagation is RPRIVATE, that means all the volumes are
-// going to be private anyway. There is no need to apply per volume
-// propagation on top. This is just an optimization so that cost of per volume
-// propagation is paid only if user decides to make some volume non-private
-// which will force rootfs mount propagation to be non RPRIVATE.
-func checkResetVolumePropagation(container *configs.Config) {
-	if container.RootPropagation != mount.RPRIVATE {
-		return
-	}
-	for _, m := range container.Mounts {
-		m.PropagationFlags = nil
-	}
-}
-
-func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
-	for _, m := range mountinfo {
-		if m.Mountpoint == dir {
-			return m
-		}
-	}
-	return nil
-}
-
-// Get the source mount point of directory passed in as argument. Also return
-// optional fields.
-func getSourceMount(source string) (string, string, error) {
-	// Ensure any symlinks are resolved.
-	sourcePath, err := filepath.EvalSymlinks(source)
-	if err != nil {
-		return "", "", err
-	}
-
-	mountinfos, err := mount.GetMounts()
-	if err != nil {
-		return "", "", err
-	}
-
-	mountinfo := getMountInfo(mountinfos, sourcePath)
-	if mountinfo != nil {
-		return sourcePath, mountinfo.Optional, nil
-	}
-
-	path := sourcePath
-	for {
-		path = filepath.Dir(path)
-
-		mountinfo = getMountInfo(mountinfos, path)
-		if mountinfo != nil {
-			return path, mountinfo.Optional, nil
-		}
-
-		if path == "/" {
-			break
-		}
-	}
-
-	// If we are here, we did not find parent mount. Something is wrong.
-	return "", "", fmt.Errorf("Could not find source mount of %s", source)
-}
-
-// Ensure mount point on which path is mounted, is shared.
-func ensureShared(path string) error {
-	sharedMount := false
-
-	sourceMount, optionalOpts, err := getSourceMount(path)
-	if err != nil {
-		return err
-	}
-	// Make sure source mount point is shared.
-	optsSplit := strings.Split(optionalOpts, " ")
-	for _, opt := range optsSplit {
-		if strings.HasPrefix(opt, "shared:") {
-			sharedMount = true
-			break
-		}
-	}
-
-	if !sharedMount {
-		return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount)
-	}
-	return nil
-}
-
-// Ensure mount point on which path is mounted, is either shared or slave.
-func ensureSharedOrSlave(path string) error {
-	sharedMount := false
-	slaveMount := false
-
-	sourceMount, optionalOpts, err := getSourceMount(path)
-	if err != nil {
-		return err
-	}
-	// Make sure source mount point is shared.
-	optsSplit := strings.Split(optionalOpts, " ")
-	for _, opt := range optsSplit {
-		if strings.HasPrefix(opt, "shared:") {
-			sharedMount = true
-			break
-		} else if strings.HasPrefix(opt, "master:") {
-			slaveMount = true
-			break
-		}
-	}
-
-	if !sharedMount && !slaveMount {
-		return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount)
-	}
-	return nil
-}
-
-func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) error {
-	userMounts := make(map[string]struct{})
-	for _, m := range c.Mounts {
-		userMounts[m.Destination] = struct{}{}
-	}
-
-	// Filter out mounts that are overridden by user supplied mounts
-	var defaultMounts []*configs.Mount
-	_, mountDev := userMounts["/dev"]
-	for _, m := range container.Mounts {
-		if _, ok := userMounts[m.Destination]; !ok {
-			if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
-				container.Devices = nil
-				continue
-			}
-			defaultMounts = append(defaultMounts, m)
-		}
-	}
-	container.Mounts = defaultMounts
-
-	mountPropagationMap := map[string]int{
-		"private":  mount.PRIVATE,
-		"rprivate": mount.RPRIVATE,
-		"shared":   mount.SHARED,
-		"rshared":  mount.RSHARED,
-		"slave":    mount.SLAVE,
-		"rslave":   mount.RSLAVE,
-	}
-
-	for _, m := range c.Mounts {
-		for _, cm := range container.Mounts {
-			if cm.Destination == m.Destination {
-				return fmt.Errorf("Duplicate mount point '%s'", m.Destination)
-			}
-		}
-
-		if m.Source == "tmpfs" {
-			var (
-				data  = "size=65536k"
-				flags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
-				err   error
-			)
-			if m.Data != "" {
-				flags, data, err = mount.ParseTmpfsOptions(m.Data)
-				if err != nil {
-					return err
-				}
-			}
-			container.Mounts = append(container.Mounts, &configs.Mount{
-				Source:           m.Source,
-				Destination:      m.Destination,
-				Data:             data,
-				Device:           "tmpfs",
-				Flags:            flags,
-				PropagationFlags: []int{mountPropagationMap[volume.DefaultPropagationMode]},
-			})
-			continue
-		}
-		flags := syscall.MS_BIND | syscall.MS_REC
-		var pFlag int
-		if !m.Writable {
-			flags |= syscall.MS_RDONLY
-		}
-
-		// Determine property of RootPropagation based on volume
-		// properties. If a volume is shared, then keep root propagation
-		// shared. This should work for slave and private volumes too.
-		//
-		// For slave volumes, it can be either [r]shared/[r]slave.
-		//
-		// For private volumes any root propagation value should work.
-
-		pFlag = mountPropagationMap[m.Propagation]
-		if pFlag == mount.SHARED || pFlag == mount.RSHARED {
-			if err := ensureShared(m.Source); err != nil {
-				return err
-			}
-			rootpg := container.RootPropagation
-			if rootpg != mount.SHARED && rootpg != mount.RSHARED {
-				execdriver.SetRootPropagation(container, mount.SHARED)
-			}
-		} else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
-			if err := ensureSharedOrSlave(m.Source); err != nil {
-				return err
-			}
-			rootpg := container.RootPropagation
-			if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
-				execdriver.SetRootPropagation(container, mount.RSLAVE)
-			}
-		}
-
-		mount := &configs.Mount{
-			Source:      m.Source,
-			Destination: m.Destination,
-			Device:      "bind",
-			Flags:       flags,
-		}
-
-		if pFlag != 0 {
-			mount.PropagationFlags = []int{pFlag}
-		}
-
-		container.Mounts = append(container.Mounts, mount)
-	}
-
-	checkResetVolumePropagation(container)
-	return nil
-}
-
-func (d *Driver) setupLabels(container *configs.Config, c *execdriver.Command) {
-	container.ProcessLabel = c.ProcessLabel
-	container.MountLabel = c.MountLabel
-}

+ 0 - 606
daemon/execdriver/native/driver.go

@@ -1,606 +0,0 @@
-// +build linux,cgo
-
-package native
-
-import (
-	"fmt"
-	"io"
-	"io/ioutil"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"strings"
-	"sync"
-	"syscall"
-	"time"
-
-	"github.com/Sirupsen/logrus"
-	"github.com/docker/docker/daemon/execdriver"
-	"github.com/docker/docker/pkg/parsers"
-	"github.com/docker/docker/pkg/pools"
-	"github.com/docker/docker/pkg/reexec"
-	sysinfo "github.com/docker/docker/pkg/system"
-	"github.com/docker/docker/pkg/term"
-	aaprofile "github.com/docker/docker/profiles/apparmor"
-	"github.com/opencontainers/runc/libcontainer"
-	"github.com/opencontainers/runc/libcontainer/apparmor"
-	"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
-	"github.com/opencontainers/runc/libcontainer/configs"
-	"github.com/opencontainers/runc/libcontainer/system"
-	"github.com/opencontainers/runc/libcontainer/utils"
-)
-
-// Define constants for native driver
-const (
-	DriverName = "native"
-	Version    = "0.2"
-
-	defaultApparmorProfile = "docker-default"
-)
-
-// Driver contains all information for native driver,
-// it implements execdriver.Driver.
-type Driver struct {
-	root             string
-	activeContainers map[string]libcontainer.Container
-	machineMemory    int64
-	factory          libcontainer.Factory
-	sync.Mutex
-}
-
-// NewDriver returns a new native driver, called from NewDriver of execdriver.
-func NewDriver(root string, options []string) (*Driver, error) {
-	meminfo, err := sysinfo.ReadMemInfo()
-	if err != nil {
-		return nil, err
-	}
-
-	if err := sysinfo.MkdirAll(root, 0700); err != nil {
-		return nil, err
-	}
-
-	if apparmor.IsEnabled() {
-		if err := aaprofile.InstallDefault(defaultApparmorProfile); err != nil {
-			apparmorProfiles := []string{defaultApparmorProfile}
-
-			// Allow daemon to run if loading failed, but are active
-			// (possibly through another run, manually, or via system startup)
-			for _, policy := range apparmorProfiles {
-				if err := aaprofile.IsLoaded(policy); err != nil {
-					return nil, fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy)
-				}
-			}
-		}
-	}
-
-	// choose cgroup manager
-	// this makes sure there are no breaking changes to people
-	// who upgrade from versions without native.cgroupdriver opt
-	cgm := libcontainer.Cgroupfs
-
-	// parse the options
-	for _, option := range options {
-		key, val, err := parsers.ParseKeyValueOpt(option)
-		if err != nil {
-			return nil, err
-		}
-		key = strings.ToLower(key)
-		switch key {
-		case "native.cgroupdriver":
-			// override the default if they set options
-			switch val {
-			case "systemd":
-				if systemd.UseSystemd() {
-					cgm = libcontainer.SystemdCgroups
-				} else {
-					// warn them that they chose the wrong driver
-					logrus.Warn("You cannot use systemd as native.cgroupdriver, using cgroupfs instead")
-				}
-			case "cgroupfs":
-				cgm = libcontainer.Cgroupfs
-			default:
-				return nil, fmt.Errorf("Unknown native.cgroupdriver given %q. try cgroupfs or systemd", val)
-			}
-		default:
-			return nil, fmt.Errorf("Unknown option %s\n", key)
-		}
-	}
-
-	f, err := libcontainer.New(
-		root,
-		cgm,
-		libcontainer.InitPath(reexec.Self(), DriverName),
-	)
-	if err != nil {
-		return nil, err
-	}
-
-	return &Driver{
-		root:             root,
-		activeContainers: make(map[string]libcontainer.Container),
-		machineMemory:    meminfo.MemTotal,
-		factory:          f,
-	}, nil
-}
-
-// Run implements the exec driver Driver interface,
-// it calls libcontainer APIs to run a container.
-func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks) (execdriver.ExitStatus, error) {
-	destroyed := false
-	var err error
-	c.TmpDir, err = ioutil.TempDir("", c.ID)
-	if err != nil {
-		return execdriver.ExitStatus{ExitCode: -1}, err
-	}
-	defer os.RemoveAll(c.TmpDir)
-
-	// take the Command and populate the libcontainer.Config from it
-	container, err := d.createContainer(c, hooks)
-	if err != nil {
-		return execdriver.ExitStatus{ExitCode: -1}, err
-	}
-
-	p := &libcontainer.Process{
-		Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
-		Env:  c.ProcessConfig.Env,
-		Cwd:  c.WorkingDir,
-		User: c.ProcessConfig.User,
-	}
-
-	wg := sync.WaitGroup{}
-	writers, err := setupPipes(container, &c.ProcessConfig, p, pipes, &wg)
-	if err != nil {
-		return execdriver.ExitStatus{ExitCode: -1}, err
-	}
-
-	cont, err := d.factory.Create(c.ID, container)
-	if err != nil {
-		return execdriver.ExitStatus{ExitCode: -1}, err
-	}
-
-	if err := cont.Start(p); err != nil {
-		return execdriver.ExitStatus{ExitCode: -1}, err
-	}
-	d.Lock()
-	d.activeContainers[c.ID] = cont
-	d.Unlock()
-	defer func() {
-		if !destroyed {
-			cont.Destroy()
-		}
-		d.cleanContainer(c.ID)
-	}()
-
-	//close the write end of any opened pipes now that they are dup'ed into the container
-	for _, writer := range writers {
-		writer.Close()
-	}
-	// 'oom' is used to emit 'oom' events to the eventstream, 'oomKilled' is used
-	// to set the 'OOMKilled' flag in state
-	oom := notifyOnOOM(cont)
-	oomKilled := notifyOnOOM(cont)
-	if hooks.Start != nil {
-		pid, err := p.Pid()
-		if err != nil {
-			p.Signal(os.Kill)
-			p.Wait()
-			return execdriver.ExitStatus{ExitCode: -1}, err
-		}
-		hooks.Start(&c.ProcessConfig, pid, oom)
-	}
-
-	waitF := p.Wait
-	if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) {
-		// we need such hack for tracking processes with inherited fds,
-		// because cmd.Wait() waiting for all streams to be copied
-		waitF = waitInPIDHost(p, cont)
-	}
-	ps, err := waitF()
-	if err != nil {
-		execErr, ok := err.(*exec.ExitError)
-		if !ok {
-			return execdriver.ExitStatus{ExitCode: -1}, err
-		}
-		ps = execErr.ProcessState
-	}
-	// wait for all IO goroutine copiers to finish
-	wg.Wait()
-
-	cont.Destroy()
-	destroyed = true
-	// oomKilled will have an oom event if any process within the container was
-	// OOM killed at any time, not only if the init process OOMed.
-	//
-	// Perhaps we only want the OOMKilled flag to be set if the OOM
-	// resulted in a container death, but there isn't a good way to do this
-	// because the kernel's cgroup oom notification does not provide information
-	// such as the PID. This could be heuristically done by checking that the OOM
-	// happened within some very small time slice for the container dying (and
-	// optionally exit-code 137), but I don't think the cgroup oom notification
-	// can be used to reliably determine this
-	//
-	// Even if there were multiple OOMs, it's sufficient to read one value
-	// because libcontainer's oom notify will discard the channel after the
-	// cgroup is destroyed
-	_, oomKill := <-oomKilled
-	return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
-}
-
-// notifyOnOOM returns a channel that signals if the container received an OOM notification
-// for any process. If it is unable to subscribe to OOM notifications then a closed
-// channel is returned as it will be non-blocking and return the correct result when read.
-func notifyOnOOM(container libcontainer.Container) <-chan struct{} {
-	oom, err := container.NotifyOOM()
-	if err != nil {
-		logrus.Warnf("Your kernel does not support OOM notifications: %s", err)
-		c := make(chan struct{})
-		close(c)
-		return c
-	}
-	return oom
-}
-
-func killCgroupProcs(c libcontainer.Container) {
-	var procs []*os.Process
-	if err := c.Pause(); err != nil {
-		logrus.Warn(err)
-	}
-	pids, err := c.Processes()
-	if err != nil {
-		// don't care about childs if we can't get them, this is mostly because cgroup already deleted
-		logrus.Warnf("Failed to get processes from container %s: %v", c.ID(), err)
-	}
-	for _, pid := range pids {
-		if p, err := os.FindProcess(pid); err == nil {
-			procs = append(procs, p)
-			if err := p.Kill(); err != nil {
-				logrus.Warn(err)
-			}
-		}
-	}
-	if err := c.Resume(); err != nil {
-		logrus.Warn(err)
-	}
-	for _, p := range procs {
-		if _, err := p.Wait(); err != nil {
-			logrus.Warn(err)
-		}
-	}
-}
-
-func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) {
-	return func() (*os.ProcessState, error) {
-		pid, err := p.Pid()
-		if err != nil {
-			return nil, err
-		}
-
-		process, err := os.FindProcess(pid)
-		s, err := process.Wait()
-		if err != nil {
-			execErr, ok := err.(*exec.ExitError)
-			if !ok {
-				return s, err
-			}
-			s = execErr.ProcessState
-		}
-		killCgroupProcs(c)
-		p.Wait()
-		return s, err
-	}
-}
-
-// Kill implements the exec driver Driver interface.
-func (d *Driver) Kill(c *execdriver.Command, sig int) error {
-	d.Lock()
-	active := d.activeContainers[c.ID]
-	d.Unlock()
-	if active == nil {
-		return fmt.Errorf("active container for %s does not exist", c.ID)
-	}
-	state, err := active.State()
-	if err != nil {
-		return err
-	}
-	if state.InitProcessPid == -1 {
-		return fmt.Errorf("avoid sending signal %d to container %s with pid -1", sig, c.ID)
-	}
-	return syscall.Kill(state.InitProcessPid, syscall.Signal(sig))
-}
-
-// Pause implements the exec driver Driver interface,
-// it calls libcontainer API to pause a container.
-func (d *Driver) Pause(c *execdriver.Command) error {
-	d.Lock()
-	active := d.activeContainers[c.ID]
-	d.Unlock()
-	if active == nil {
-		return fmt.Errorf("active container for %s does not exist", c.ID)
-	}
-	return active.Pause()
-}
-
-// Unpause implements the exec driver Driver interface,
-// it calls libcontainer API to unpause a container.
-func (d *Driver) Unpause(c *execdriver.Command) error {
-	d.Lock()
-	active := d.activeContainers[c.ID]
-	d.Unlock()
-	if active == nil {
-		return fmt.Errorf("active container for %s does not exist", c.ID)
-	}
-	return active.Resume()
-}
-
-// Terminate implements the exec driver Driver interface.
-func (d *Driver) Terminate(c *execdriver.Command) error {
-	defer d.cleanContainer(c.ID)
-	container, err := d.factory.Load(c.ID)
-	if err != nil {
-		return err
-	}
-	defer container.Destroy()
-	state, err := container.State()
-	if err != nil {
-		return err
-	}
-	pid := state.InitProcessPid
-	currentStartTime, err := system.GetProcessStartTime(pid)
-	if err != nil {
-		return err
-	}
-	if state.InitProcessStartTime == currentStartTime {
-		err = syscall.Kill(pid, 9)
-		syscall.Wait4(pid, nil, 0, nil)
-	}
-	return err
-}
-
-// Name implements the exec driver Driver interface.
-func (d *Driver) Name() string {
-	return fmt.Sprintf("%s-%s", DriverName, Version)
-}
-
-// GetPidsForContainer implements the exec driver Driver interface.
-func (d *Driver) GetPidsForContainer(id string) ([]int, error) {
-	d.Lock()
-	active := d.activeContainers[id]
-	d.Unlock()
-
-	if active == nil {
-		return nil, fmt.Errorf("active container for %s does not exist", id)
-	}
-	return active.Processes()
-}
-
-func (d *Driver) cleanContainer(id string) error {
-	d.Lock()
-	delete(d.activeContainers, id)
-	d.Unlock()
-	return os.RemoveAll(filepath.Join(d.root, id))
-}
-
-func (d *Driver) createContainerRoot(id string) error {
-	return os.MkdirAll(filepath.Join(d.root, id), 0655)
-}
-
-// Clean implements the exec driver Driver interface.
-func (d *Driver) Clean(id string) error {
-	return os.RemoveAll(filepath.Join(d.root, id))
-}
-
-// Stats implements the exec driver Driver interface.
-func (d *Driver) Stats(id string) (*execdriver.ResourceStats, error) {
-	d.Lock()
-	c := d.activeContainers[id]
-	d.Unlock()
-	if c == nil {
-		return nil, execdriver.ErrNotRunning
-	}
-	now := time.Now()
-	stats, err := c.Stats()
-	if err != nil {
-		return nil, err
-	}
-	memoryLimit := c.Config().Cgroups.Resources.Memory
-	// if the container does not have any memory limit specified set the
-	// limit to the machines memory
-	if memoryLimit == 0 {
-		memoryLimit = d.machineMemory
-	}
-	return &execdriver.ResourceStats{
-		Stats:       stats,
-		Read:        now,
-		MemoryLimit: memoryLimit,
-	}, nil
-}
-
-// Update updates configs for a container
-func (d *Driver) Update(c *execdriver.Command) error {
-	d.Lock()
-	cont := d.activeContainers[c.ID]
-	d.Unlock()
-	if cont == nil {
-		return execdriver.ErrNotRunning
-	}
-	config := cont.Config()
-	if err := execdriver.SetupCgroups(&config, c); err != nil {
-		return err
-	}
-
-	if err := cont.Set(config); err != nil {
-		return err
-	}
-
-	return nil
-}
-
-// TtyConsole implements the exec driver Terminal interface.
-type TtyConsole struct {
-	console libcontainer.Console
-}
-
-// NewTtyConsole returns a new TtyConsole struct.
-func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes, wg *sync.WaitGroup) (*TtyConsole, error) {
-	tty := &TtyConsole{
-		console: console,
-	}
-
-	if err := tty.AttachPipes(pipes, wg); err != nil {
-		tty.Close()
-		return nil, err
-	}
-
-	return tty, nil
-}
-
-// Resize implements Resize method of Terminal interface
-func (t *TtyConsole) Resize(h, w int) error {
-	return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
-}
-
-// AttachPipes attaches given pipes to TtyConsole
-func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes, wg *sync.WaitGroup) error {
-	wg.Add(1)
-	go func() {
-		defer wg.Done()
-		if wb, ok := pipes.Stdout.(interface {
-			CloseWriters() error
-		}); ok {
-			defer wb.CloseWriters()
-		}
-
-		pools.Copy(pipes.Stdout, t.console)
-	}()
-
-	if pipes.Stdin != nil {
-		go func() {
-			pools.Copy(t.console, pipes.Stdin)
-
-			pipes.Stdin.Close()
-		}()
-	}
-
-	return nil
-}
-
-// Close implements Close method of Terminal interface
-func (t *TtyConsole) Close() error {
-	return t.console.Close()
-}
-
-func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes, wg *sync.WaitGroup) ([]io.WriteCloser, error) {
-
-	writers := []io.WriteCloser{}
-
-	rootuid, err := container.HostUID()
-	if err != nil {
-		return writers, err
-	}
-
-	if processConfig.Tty {
-		cons, err := p.NewConsole(rootuid)
-		if err != nil {
-			return writers, err
-		}
-		term, err := NewTtyConsole(cons, pipes, wg)
-		if err != nil {
-			return writers, err
-		}
-		processConfig.Terminal = term
-		return writers, nil
-	}
-	// not a tty--set up stdio pipes
-	term := &execdriver.StdConsole{}
-	processConfig.Terminal = term
-
-	// if we are not in a user namespace, there is no reason to go through
-	// the hassle of setting up os-level pipes with proper (remapped) ownership
-	// so we will do the prior shortcut for non-userns containers
-	if rootuid == 0 {
-		p.Stdout = pipes.Stdout
-		p.Stderr = pipes.Stderr
-
-		r, w, err := os.Pipe()
-		if err != nil {
-			return writers, err
-		}
-		if pipes.Stdin != nil {
-			go func() {
-				io.Copy(w, pipes.Stdin)
-				w.Close()
-			}()
-			p.Stdin = r
-		}
-		return writers, nil
-	}
-
-	// if we have user namespaces enabled (rootuid != 0), we will set
-	// up os pipes for stderr, stdout, stdin so we can chown them to
-	// the proper ownership to allow for proper access to the underlying
-	// fds
-	var fds []uintptr
-
-	copyPipes := func(out io.Writer, in io.ReadCloser) {
-		defer wg.Done()
-		io.Copy(out, in)
-		in.Close()
-	}
-
-	//setup stdout
-	r, w, err := os.Pipe()
-	if err != nil {
-		w.Close()
-		return writers, err
-	}
-	writers = append(writers, w)
-	fds = append(fds, r.Fd(), w.Fd())
-	if pipes.Stdout != nil {
-		wg.Add(1)
-		go copyPipes(pipes.Stdout, r)
-	}
-	term.Closers = append(term.Closers, r)
-	p.Stdout = w
-
-	//setup stderr
-	r, w, err = os.Pipe()
-	if err != nil {
-		w.Close()
-		return writers, err
-	}
-	writers = append(writers, w)
-	fds = append(fds, r.Fd(), w.Fd())
-	if pipes.Stderr != nil {
-		wg.Add(1)
-		go copyPipes(pipes.Stderr, r)
-	}
-	term.Closers = append(term.Closers, r)
-	p.Stderr = w
-
-	//setup stdin
-	r, w, err = os.Pipe()
-	if err != nil {
-		r.Close()
-		return writers, err
-	}
-	fds = append(fds, r.Fd(), w.Fd())
-	if pipes.Stdin != nil {
-		go func() {
-			io.Copy(w, pipes.Stdin)
-			w.Close()
-		}()
-		p.Stdin = r
-	}
-	for _, fd := range fds {
-		if err := syscall.Fchown(int(fd), rootuid, rootuid); err != nil {
-			return writers, fmt.Errorf("Failed to chown pipes fd: %v", err)
-		}
-	}
-	return writers, nil
-}
-
-// SupportsHooks implements the execdriver Driver interface.
-// The libcontainer/runC-based native execdriver does exploit the hook mechanism
-func (d *Driver) SupportsHooks() bool {
-	return true
-}

+ 0 - 14
daemon/execdriver/native/driver_unsupported.go

@@ -1,14 +0,0 @@
-// +build !linux
-
-package native
-
-import (
-	"fmt"
-
-	"github.com/docker/docker/daemon/execdriver"
-)
-
-// NewDriver returns a new native driver, called from NewDriver of execdriver.
-func NewDriver(root string, options []string) (execdriver.Driver, error) {
-	return nil, fmt.Errorf("native driver not supported on non-linux")
-}

+ 0 - 14
daemon/execdriver/native/driver_unsupported_nocgo.go

@@ -1,14 +0,0 @@
-// +build linux,!cgo
-
-package native
-
-import (
-	"fmt"
-
-	"github.com/docker/docker/daemon/execdriver"
-)
-
-// NewDriver returns a new native driver, called from NewDriver of execdriver.
-func NewDriver(root string, options []string) (execdriver.Driver, error) {
-	return nil, fmt.Errorf("native driver not supported on non-linux")
-}

+ 0 - 96
daemon/execdriver/native/exec.go

@@ -1,96 +0,0 @@
-// +build linux
-
-package native
-
-import (
-	"fmt"
-	"os"
-	"os/exec"
-	"strings"
-	"sync"
-	"syscall"
-
-	"github.com/docker/docker/daemon/execdriver"
-	"github.com/opencontainers/runc/libcontainer"
-	// Blank import 'nsenter' so that init in that package will call c
-	// function 'nsexec()' to do 'setns' before Go runtime take over,
-	// it's used for join to exist ns like 'docker exec' command.
-	_ "github.com/opencontainers/runc/libcontainer/nsenter"
-	"github.com/opencontainers/runc/libcontainer/utils"
-)
-
-// Exec implements the exec driver Driver interface,
-// it calls libcontainer APIs to execute a container.
-func (d *Driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessConfig, pipes *execdriver.Pipes, hooks execdriver.Hooks) (int, error) {
-	active := d.activeContainers[c.ID]
-	if active == nil {
-		return -1, fmt.Errorf("No active container exists with ID %s", c.ID)
-	}
-
-	user := processConfig.User
-	if c.RemappedRoot.UID != 0 && user == "" {
-		//if user namespaces are enabled, set user explicitly so uid/gid is set to 0
-		//otherwise we end up with the overflow id and no permissions (65534)
-		user = "0"
-	}
-
-	p := &libcontainer.Process{
-		Args: append([]string{processConfig.Entrypoint}, processConfig.Arguments...),
-		Env:  c.ProcessConfig.Env,
-		Cwd:  c.WorkingDir,
-		User: user,
-	}
-
-	if processConfig.Privileged {
-		p.Capabilities = execdriver.GetAllCapabilities()
-	}
-	// add CAP_ prefix to all caps for new libcontainer update to match
-	// the spec format.
-	for i, s := range p.Capabilities {
-		if !strings.HasPrefix(s, "CAP_") {
-			p.Capabilities[i] = fmt.Sprintf("CAP_%s", s)
-		}
-	}
-
-	config := active.Config()
-	wg := sync.WaitGroup{}
-	writers, err := setupPipes(&config, processConfig, p, pipes, &wg)
-	if err != nil {
-		return -1, err
-	}
-
-	if err := active.Start(p); err != nil {
-		return -1, err
-	}
-	//close the write end of any opened pipes now that they are dup'ed into the container
-	for _, writer := range writers {
-		writer.Close()
-	}
-
-	if hooks.Start != nil {
-		pid, err := p.Pid()
-		if err != nil {
-			p.Signal(os.Kill)
-			p.Wait()
-			return -1, err
-		}
-
-		// A closed channel for OOM is returned here as it will be
-		// non-blocking and return the correct result when read.
-		chOOM := make(chan struct{})
-		close(chOOM)
-		hooks.Start(&c.ProcessConfig, pid, chOOM)
-	}
-
-	ps, err := p.Wait()
-	if err != nil {
-		exitErr, ok := err.(*exec.ExitError)
-		if !ok {
-			return -1, err
-		}
-		ps = exitErr.ProcessState
-	}
-	// wait for all IO goroutine copiers to finish
-	wg.Wait()
-	return utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), nil
-}

+ 0 - 40
daemon/execdriver/native/init.go

@@ -1,40 +0,0 @@
-// +build linux
-
-package native
-
-import (
-	"fmt"
-	"os"
-	"runtime"
-
-	"github.com/docker/docker/pkg/reexec"
-	"github.com/opencontainers/runc/libcontainer"
-)
-
-func init() {
-	reexec.Register(DriverName, initializer)
-}
-
-func fatal(err error) {
-	if lerr, ok := err.(libcontainer.Error); ok {
-		lerr.Detail(os.Stderr)
-		os.Exit(1)
-	}
-
-	fmt.Fprintln(os.Stderr, err)
-	os.Exit(1)
-}
-
-func initializer() {
-	runtime.GOMAXPROCS(1)
-	runtime.LockOSThread()
-	factory, err := libcontainer.New("")
-	if err != nil {
-		fatal(err)
-	}
-	if err := factory.StartInitialization(); err != nil {
-		fatal(err)
-	}
-
-	panic("unreachable")
-}

+ 0 - 106
daemon/execdriver/native/template/default_template_linux.go

@@ -1,106 +0,0 @@
-package template
-
-import (
-	"syscall"
-
-	"github.com/opencontainers/runc/libcontainer/apparmor"
-	"github.com/opencontainers/runc/libcontainer/configs"
-)
-
-const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
-
-// New returns the docker default configuration for libcontainer
-func New() *configs.Config {
-	container := &configs.Config{
-		Capabilities: []string{
-			"CHOWN",
-			"DAC_OVERRIDE",
-			"FSETID",
-			"FOWNER",
-			"MKNOD",
-			"NET_RAW",
-			"SETGID",
-			"SETUID",
-			"SETFCAP",
-			"SETPCAP",
-			"NET_BIND_SERVICE",
-			"SYS_CHROOT",
-			"KILL",
-			"AUDIT_WRITE",
-		},
-		Namespaces: configs.Namespaces([]configs.Namespace{
-			{Type: "NEWNS"},
-			{Type: "NEWUTS"},
-			{Type: "NEWIPC"},
-			{Type: "NEWPID"},
-			{Type: "NEWNET"},
-			{Type: "NEWUSER"},
-		}),
-		Cgroups: &configs.Cgroup{
-			ScopePrefix: "docker", // systemd only
-			Resources: &configs.Resources{
-				AllowAllDevices:  false,
-				MemorySwappiness: -1,
-			},
-		},
-		Mounts: []*configs.Mount{
-			{
-				Source:      "proc",
-				Destination: "/proc",
-				Device:      "proc",
-				Flags:       defaultMountFlags,
-			},
-			{
-				Source:      "tmpfs",
-				Destination: "/dev",
-				Device:      "tmpfs",
-				Flags:       syscall.MS_NOSUID | syscall.MS_STRICTATIME,
-				Data:        "mode=755",
-			},
-			{
-				Source:      "devpts",
-				Destination: "/dev/pts",
-				Device:      "devpts",
-				Flags:       syscall.MS_NOSUID | syscall.MS_NOEXEC,
-				Data:        "newinstance,ptmxmode=0666,mode=0620,gid=5",
-			},
-			{
-				Source:      "mqueue",
-				Destination: "/dev/mqueue",
-				Device:      "mqueue",
-				Flags:       defaultMountFlags,
-			},
-			{
-				Source:      "sysfs",
-				Destination: "/sys",
-				Device:      "sysfs",
-				Flags:       defaultMountFlags | syscall.MS_RDONLY,
-			},
-			{
-				Source:      "cgroup",
-				Destination: "/sys/fs/cgroup",
-				Device:      "cgroup",
-				Flags:       defaultMountFlags | syscall.MS_RDONLY,
-			},
-		},
-		MaskPaths: []string{
-			"/proc/kcore",
-			"/proc/latency_stats",
-			"/proc/timer_stats",
-		},
-		ReadonlyPaths: []string{
-			"/proc/asound",
-			"/proc/bus",
-			"/proc/fs",
-			"/proc/irq",
-			"/proc/sys",
-			"/proc/sysrq-trigger",
-		},
-	}
-
-	if apparmor.IsEnabled() {
-		container.AppArmorProfile = "docker-default"
-	}
-
-	return container
-}

+ 0 - 3
daemon/execdriver/native/template/default_template_unsupported.go

@@ -1,3 +0,0 @@
-// +build !linux
-
-package template

+ 0 - 24
daemon/execdriver/pipes.go

@@ -1,24 +0,0 @@
-package execdriver
-
-import (
-	"io"
-)
-
-// Pipes is a wrapper around a container's output for
-// stdin, stdout, stderr
-type Pipes struct {
-	Stdin          io.ReadCloser
-	Stdout, Stderr io.Writer
-}
-
-// NewPipes returns a wrapper around a container's output
-func NewPipes(stdin io.ReadCloser, stdout, stderr io.Writer, useStdin bool) *Pipes {
-	p := &Pipes{
-		Stdout: stdout,
-		Stderr: stderr,
-	}
-	if useStdin {
-		p.Stdin = stdin
-	}
-	return p
-}

+ 0 - 55
daemon/execdriver/termconsole.go

@@ -1,55 +0,0 @@
-package execdriver
-
-import (
-	"io"
-	"os/exec"
-)
-
-// StdConsole defines standard console operations for execdriver
-type StdConsole struct {
-	// Closers holds io.Closer references for closing at terminal close time
-	Closers []io.Closer
-}
-
-// NewStdConsole returns a new StdConsole struct
-func NewStdConsole(processConfig *ProcessConfig, pipes *Pipes) (*StdConsole, error) {
-	std := &StdConsole{}
-
-	if err := std.AttachPipes(&processConfig.Cmd, pipes); err != nil {
-		return nil, err
-	}
-	return std, nil
-}
-
-// AttachPipes attaches given pipes to exec.Cmd
-func (s *StdConsole) AttachPipes(command *exec.Cmd, pipes *Pipes) error {
-	command.Stdout = pipes.Stdout
-	command.Stderr = pipes.Stderr
-
-	if pipes.Stdin != nil {
-		stdin, err := command.StdinPipe()
-		if err != nil {
-			return err
-		}
-
-		go func() {
-			defer stdin.Close()
-			io.Copy(stdin, pipes.Stdin)
-		}()
-	}
-	return nil
-}
-
-// Resize implements Resize method of Terminal interface
-func (s *StdConsole) Resize(h, w int) error {
-	// we do not need to resize a non tty
-	return nil
-}
-
-// Close implements Close method of Terminal interface
-func (s *StdConsole) Close() error {
-	for _, c := range s.Closers {
-		c.Close()
-	}
-	return nil
-}

+ 0 - 8
daemon/execdriver/windows/clean.go

@@ -1,8 +0,0 @@
-// +build windows
-
-package windows
-
-// Clean implements the exec driver Driver interface.
-func (d *Driver) Clean(id string) error {
-	return nil
-}

+ 0 - 36
daemon/execdriver/windows/commandlinebuilder.go

@@ -1,36 +0,0 @@
-//+build windows
-
-package windows
-
-import (
-	"errors"
-	"syscall"
-
-	"github.com/Sirupsen/logrus"
-	"github.com/docker/docker/daemon/execdriver"
-)
-
-// createCommandLine creates a command line from the Entrypoint and args
-// of the ProcessConfig. It escapes the arguments if they are not already
-// escaped
-func createCommandLine(processConfig *execdriver.ProcessConfig, alreadyEscaped bool) (commandLine string, err error) {
-	// While this should get caught earlier, just in case, validate that we
-	// have something to run.
-	if processConfig.Entrypoint == "" {
-		return "", errors.New("No entrypoint specified")
-	}
-
-	// Build the command line of the process
-	commandLine = processConfig.Entrypoint
-	logrus.Debugf("Entrypoint: %s", processConfig.Entrypoint)
-	for _, arg := range processConfig.Arguments {
-		logrus.Debugf("appending %s", arg)
-		if !alreadyEscaped {
-			arg = syscall.EscapeArg(arg)
-		}
-		commandLine += " " + arg
-	}
-
-	logrus.Debugf("commandLine: %s", commandLine)
-	return commandLine, nil
-}

+ 0 - 89
daemon/execdriver/windows/exec.go

@@ -1,89 +0,0 @@
-// +build windows
-
-package windows
-
-import (
-	"fmt"
-	"syscall"
-
-	"github.com/Microsoft/hcsshim"
-	"github.com/Sirupsen/logrus"
-	"github.com/docker/docker/daemon/execdriver"
-)
-
-// Exec implements the exec driver Driver interface.
-func (d *Driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessConfig, pipes *execdriver.Pipes, hooks execdriver.Hooks) (int, error) {
-
-	var (
-		term     execdriver.Terminal
-		err      error
-		exitCode int32
-	)
-
-	active := d.activeContainers[c.ID]
-	if active == nil {
-		return -1, fmt.Errorf("Exec - No active container exists with ID %s", c.ID)
-	}
-
-	createProcessParms := hcsshim.CreateProcessParams{
-		EmulateConsole:   processConfig.Tty, // Note NOT c.ProcessConfig.Tty
-		WorkingDirectory: c.WorkingDir,
-	}
-
-	// Configure the environment for the process // Note NOT c.ProcessConfig.Env
-	createProcessParms.Environment = setupEnvironmentVariables(processConfig.Env)
-
-	// Create the commandline for the process // Note NOT c.ProcessConfig
-	createProcessParms.CommandLine, err = createCommandLine(processConfig, false)
-
-	if err != nil {
-		return -1, err
-	}
-
-	// Start the command running in the container.
-	pid, stdin, stdout, stderr, err := hcsshim.CreateProcessInComputeSystem(c.ID, pipes.Stdin != nil, true, !processConfig.Tty, createProcessParms)
-	if err != nil {
-		// TODO Windows: TP4 Workaround. In Hyper-V containers, there is a limitation
-		// of one exec per container. This should be fixed post TP4. CreateProcessInComputeSystem
-		// will return a specific error which we handle here to give a good error message
-		// back to the user instead of an inactionable "An invalid argument was supplied"
-		if herr, ok := err.(*hcsshim.HcsError); ok && herr.Err == hcsshim.WSAEINVAL {
-			return -1, fmt.Errorf("The limit of docker execs per Hyper-V container has been exceeded")
-		}
-		logrus.Errorf("CreateProcessInComputeSystem() failed %s", err)
-		return -1, err
-	}
-
-	// Now that the process has been launched, begin copying data to and from
-	// the named pipes for the std handles.
-	setupPipes(stdin, stdout, stderr, pipes)
-
-	// Note NOT c.ProcessConfig.Tty
-	if processConfig.Tty {
-		term = NewTtyConsole(c.ID, pid)
-	} else {
-		term = NewStdConsole()
-	}
-	processConfig.Terminal = term
-
-	// Invoke the start callback
-	if hooks.Start != nil {
-		// A closed channel for OOM is returned here as it will be
-		// non-blocking and return the correct result when read.
-		chOOM := make(chan struct{})
-		close(chOOM)
-		hooks.Start(&c.ProcessConfig, int(pid), chOOM)
-	}
-
-	if exitCode, err = hcsshim.WaitForProcessInComputeSystem(c.ID, pid, hcsshim.TimeoutInfinite); err != nil {
-		if herr, ok := err.(*hcsshim.HcsError); ok && herr.Err == syscall.ERROR_BROKEN_PIPE {
-			logrus.Debugf("Exiting Run() after WaitForProcessInComputeSystem failed with recognised error %s", err)
-			return hcsshim.WaitErrExecFailed, nil
-		}
-		logrus.Warnf("WaitForProcessInComputeSystem failed (container may have been killed): %s", err)
-		return -1, err
-	}
-
-	logrus.Debugln("Exiting Run()", c.ID)
-	return int(exitCode), nil
-}

+ 0 - 11
daemon/execdriver/windows/getpids.go

@@ -1,11 +0,0 @@
-// +build windows
-
-package windows
-
-import "fmt"
-
-// GetPidsForContainer implements the exec driver Driver interface.
-func (d *Driver) GetPidsForContainer(id string) ([]int, error) {
-	// TODO Windows: Implementation required.
-	return nil, fmt.Errorf("GetPidsForContainer: GetPidsForContainer() not implemented")
-}

+ 0 - 63
daemon/execdriver/windows/namedpipes.go

@@ -1,63 +0,0 @@
-// +build windows
-
-package windows
-
-import (
-	"fmt"
-	"io"
-
-	"github.com/Sirupsen/logrus"
-	"github.com/docker/docker/daemon/execdriver"
-)
-
-// General comment. Handling I/O for a container is very different to Linux.
-// We use a named pipe to HCS to copy I/O both in and out of the container,
-// very similar to how docker daemon communicates with a CLI.
-
-// startStdinCopy asynchronously copies an io.Reader to the container's
-// process's stdin pipe and closes the pipe when there is no more data to copy.
-func startStdinCopy(dst io.WriteCloser, src io.Reader) {
-
-	// Anything that comes from the client stdin should be copied
-	// across to the stdin named pipe of the container.
-	go func() {
-		defer dst.Close()
-		bytes, err := io.Copy(dst, src)
-		log := fmt.Sprintf("Copied %d bytes from stdin.", bytes)
-		if err != nil {
-			log = log + " err=" + err.Error()
-		}
-		logrus.Debugf(log)
-	}()
-}
-
-// startStdouterrCopy asynchronously copies data from the container's process's
-// stdout or stderr pipe to an io.Writer and closes the pipe when there is no
-// more data to copy.
-func startStdouterrCopy(dst io.Writer, src io.ReadCloser, name string) {
-	// Anything that comes from the container named pipe stdout/err should be copied
-	// across to the stdout/err of the client
-	go func() {
-		defer src.Close()
-		bytes, err := io.Copy(dst, src)
-		log := fmt.Sprintf("Copied %d bytes from %s.", bytes, name)
-		if err != nil {
-			log = log + " err=" + err.Error()
-		}
-		logrus.Debugf(log)
-	}()
-}
-
-// setupPipes starts the asynchronous copying of data to and from the named
-// pipes used byt he HCS for the std handles.
-func setupPipes(stdin io.WriteCloser, stdout, stderr io.ReadCloser, pipes *execdriver.Pipes) {
-	if stdin != nil {
-		startStdinCopy(stdin, pipes.Stdin)
-	}
-	if stdout != nil {
-		startStdouterrCopy(pipes.Stdout, stdout, "stdout")
-	}
-	if stderr != nil {
-		startStdouterrCopy(pipes.Stderr, stderr, "stderr")
-	}
-}

+ 0 - 19
daemon/execdriver/windows/pauseunpause.go

@@ -1,19 +0,0 @@
-// +build windows
-
-package windows
-
-import (
-	"fmt"
-
-	"github.com/docker/docker/daemon/execdriver"
-)
-
-// Pause implements the exec driver Driver interface.
-func (d *Driver) Pause(c *execdriver.Command) error {
-	return fmt.Errorf("Windows: Containers cannot be paused")
-}
-
-// Unpause implements the exec driver Driver interface.
-func (d *Driver) Unpause(c *execdriver.Command) error {
-	return fmt.Errorf("Windows: Containers cannot be paused")
-}

+ 0 - 366
daemon/execdriver/windows/run.go

@@ -1,366 +0,0 @@
-// +build windows
-
-package windows
-
-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"path/filepath"
-	"strconv"
-	"strings"
-	"syscall"
-	"time"
-
-	"github.com/Microsoft/hcsshim"
-	"github.com/Sirupsen/logrus"
-	"github.com/docker/docker/daemon/execdriver"
-)
-
-// defaultContainerNAT is the default name of the container NAT device that is
-// preconfigured on the server.
-const defaultContainerNAT = "ContainerNAT"
-
-// Win32 error codes that are used for various workarounds
-// These really should be ALL_CAPS to match golangs syscall library and standard
-// Win32 error conventions, but golint insists on CamelCase.
-const (
-	CoEClassstring     = syscall.Errno(0x800401F3) // Invalid class string
-	ErrorNoNetwork     = syscall.Errno(1222)       // The network is not present or not started
-	ErrorBadPathname   = syscall.Errno(161)        // The specified path is invalid
-	ErrorInvalidObject = syscall.Errno(0x800710D8) // The object identifier does not represent a valid object
-)
-
-type layer struct {
-	ID   string
-	Path string
-}
-
-type portBinding struct {
-	Protocol     string
-	InternalPort int
-	ExternalPort int
-}
-
-type natSettings struct {
-	Name         string
-	PortBindings []portBinding
-}
-
-type networkConnection struct {
-	NetworkName string
-	// TODO Windows: Add Ip4Address string to this structure when hooked up in
-	// docker CLI. This is present in the HCS JSON handler.
-	EnableNat bool
-	Nat       natSettings
-}
-type networkSettings struct {
-	MacAddress string
-}
-
-type device struct {
-	DeviceType string
-	Connection interface{}
-	Settings   interface{}
-}
-
-type mappedDir struct {
-	HostPath      string
-	ContainerPath string
-	ReadOnly      bool
-}
-
-type containerInit struct {
-	SystemType              string      // HCS requires this to be hard-coded to "Container"
-	Name                    string      // Name of the container. We use the docker ID.
-	Owner                   string      // The management platform that created this container
-	IsDummy                 bool        // Used for development purposes.
-	VolumePath              string      // Windows volume path for scratch space
-	Devices                 []device    // Devices used by the container
-	IgnoreFlushesDuringBoot bool        // Optimization hint for container startup in Windows
-	LayerFolderPath         string      // Where the layer folders are located
-	Layers                  []layer     // List of storage layers
-	ProcessorWeight         int64       `json:",omitempty"` // CPU Shares 0..10000 on Windows; where 0 will be omitted and HCS will default.
-	HostName                string      // Hostname
-	MappedDirectories       []mappedDir // List of mapped directories (volumes/mounts)
-	SandboxPath             string      // Location of unmounted sandbox (used for Hyper-V containers, not Windows Server containers)
-	HvPartition             bool        // True if it a Hyper-V Container
-	EndpointList            []string    // List of endpoints to be attached to container
-}
-
-// defaultOwner is a tag passed to HCS to allow it to differentiate between
-// container creator management stacks. We hard code "docker" in the case
-// of docker.
-const defaultOwner = "docker"
-
-// Run implements the exec driver Driver interface
-func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks) (execdriver.ExitStatus, error) {
-
-	var (
-		term execdriver.Terminal
-		err  error
-	)
-
-	// Allocate Network only if there is no network interface
-	cu := &containerInit{
-		SystemType:              "Container",
-		Name:                    c.ID,
-		Owner:                   defaultOwner,
-		IsDummy:                 dummyMode,
-		VolumePath:              c.Rootfs,
-		IgnoreFlushesDuringBoot: c.FirstStart,
-		LayerFolderPath:         c.LayerFolder,
-		ProcessorWeight:         c.Resources.CPUShares,
-		HostName:                c.Hostname,
-		EndpointList:            c.EpList,
-	}
-
-	cu.HvPartition = c.HvPartition
-
-	if cu.HvPartition {
-		cu.SandboxPath = filepath.Dir(c.LayerFolder)
-	} else {
-		cu.VolumePath = c.Rootfs
-		cu.LayerFolderPath = c.LayerFolder
-	}
-
-	for _, layerPath := range c.LayerPaths {
-		_, filename := filepath.Split(layerPath)
-		g, err := hcsshim.NameToGuid(filename)
-		if err != nil {
-			return execdriver.ExitStatus{ExitCode: -1}, err
-		}
-		cu.Layers = append(cu.Layers, layer{
-			ID:   g.ToString(),
-			Path: layerPath,
-		})
-	}
-
-	// Add the mounts (volumes, bind mounts etc) to the structure
-	mds := make([]mappedDir, len(c.Mounts))
-	for i, mount := range c.Mounts {
-		mds[i] = mappedDir{
-			HostPath:      mount.Source,
-			ContainerPath: mount.Destination,
-			ReadOnly:      !mount.Writable}
-	}
-	cu.MappedDirectories = mds
-
-	// TODO Windows. At some point, when there is CLI on docker run to
-	// enable the IP Address of the container to be passed into docker run,
-	// the IP Address needs to be wired through to HCS in the JSON. It
-	// would be present in c.Network.Interface.IPAddress. See matching
-	// TODO in daemon\container_windows.go, function populateCommand.
-
-	if c.Network.Interface != nil {
-
-		var pbs []portBinding
-
-		// Enumerate through the port bindings specified by the user and convert
-		// them into the internal structure matching the JSON blob that can be
-		// understood by the HCS.
-		for i, v := range c.Network.Interface.PortBindings {
-			proto := strings.ToUpper(i.Proto())
-			if proto != "TCP" && proto != "UDP" {
-				return execdriver.ExitStatus{ExitCode: -1}, fmt.Errorf("invalid protocol %s", i.Proto())
-			}
-
-			if len(v) > 1 {
-				return execdriver.ExitStatus{ExitCode: -1}, fmt.Errorf("Windows does not support more than one host port in NAT settings")
-			}
-
-			for _, v2 := range v {
-				var (
-					iPort, ePort int
-					err          error
-				)
-				if len(v2.HostIP) != 0 {
-					return execdriver.ExitStatus{ExitCode: -1}, fmt.Errorf("Windows does not support host IP addresses in NAT settings")
-				}
-				if ePort, err = strconv.Atoi(v2.HostPort); err != nil {
-					return execdriver.ExitStatus{ExitCode: -1}, fmt.Errorf("invalid container port %s: %s", v2.HostPort, err)
-				}
-				if iPort, err = strconv.Atoi(i.Port()); err != nil {
-					return execdriver.ExitStatus{ExitCode: -1}, fmt.Errorf("invalid internal port %s: %s", i.Port(), err)
-				}
-				if iPort < 0 || iPort > 65535 || ePort < 0 || ePort > 65535 {
-					return execdriver.ExitStatus{ExitCode: -1}, fmt.Errorf("specified NAT port is not in allowed range")
-				}
-				pbs = append(pbs,
-					portBinding{ExternalPort: ePort,
-						InternalPort: iPort,
-						Protocol:     proto})
-			}
-		}
-
-		// TODO Windows: TP3 workaround. Allow the user to override the name of
-		// the Container NAT device through an environment variable. This will
-		// ultimately be a global daemon parameter on Windows, similar to -b
-		// for the name of the virtual switch (aka bridge).
-		cn := os.Getenv("DOCKER_CONTAINER_NAT")
-		if len(cn) == 0 {
-			cn = defaultContainerNAT
-		}
-
-		dev := device{
-			DeviceType: "Network",
-			Connection: &networkConnection{
-				NetworkName: c.Network.Interface.Bridge,
-				// TODO Windows: Fixme, next line. Needs HCS fix.
-				EnableNat: false,
-				Nat: natSettings{
-					Name:         cn,
-					PortBindings: pbs,
-				},
-			},
-		}
-
-		if c.Network.Interface.MacAddress != "" {
-			windowsStyleMAC := strings.Replace(
-				c.Network.Interface.MacAddress, ":", "-", -1)
-			dev.Settings = networkSettings{
-				MacAddress: windowsStyleMAC,
-			}
-		}
-		cu.Devices = append(cu.Devices, dev)
-	} else {
-		logrus.Debugln("No network interface")
-	}
-
-	configurationb, err := json.Marshal(cu)
-	if err != nil {
-		return execdriver.ExitStatus{ExitCode: -1}, err
-	}
-
-	configuration := string(configurationb)
-
-	// TODO Windows TP5 timeframe. Remove when TP4 is no longer supported.
-	// The following a workaround for Windows TP4 which has a networking
-	// bug which fairly frequently returns an error. Back off and retry.
-	maxAttempts := 5
-	for i := 0; i < maxAttempts; i++ {
-		err = hcsshim.CreateComputeSystem(c.ID, configuration)
-		if err == nil {
-			break
-		}
-
-		if !TP4RetryHack {
-			return execdriver.ExitStatus{ExitCode: -1}, err
-		}
-
-		if herr, ok := err.(*hcsshim.HcsError); ok {
-			if herr.Err != syscall.ERROR_NOT_FOUND && // Element not found
-				herr.Err != syscall.ERROR_FILE_NOT_FOUND && // The system cannot find the file specified
-				herr.Err != ErrorNoNetwork && // The network is not present or not started
-				herr.Err != ErrorBadPathname && // The specified path is invalid
-				herr.Err != CoEClassstring && // Invalid class string
-				herr.Err != ErrorInvalidObject { // The object identifier does not represent a valid object
-				logrus.Debugln("Failed to create temporary container ", err)
-				return execdriver.ExitStatus{ExitCode: -1}, err
-			}
-			logrus.Warnf("Invoking Windows TP4 retry hack (%d of %d)", i, maxAttempts-1)
-			time.Sleep(50 * time.Millisecond)
-		}
-	}
-
-	// Start the container
-	logrus.Debugln("Starting container ", c.ID)
-	err = hcsshim.StartComputeSystem(c.ID)
-	if err != nil {
-		logrus.Errorf("Failed to start compute system: %s", err)
-		return execdriver.ExitStatus{ExitCode: -1}, err
-	}
-	defer func() {
-		// Stop the container
-		if forceKill {
-			logrus.Debugf("Forcibly terminating container %s", c.ID)
-			if err := hcsshim.TerminateComputeSystem(c.ID, hcsshim.TimeoutInfinite, "exec-run-defer"); err != nil {
-				logrus.Warnf("Ignoring error from TerminateComputeSystem %s", err)
-			}
-		} else {
-			logrus.Debugf("Shutting down container %s", c.ID)
-			if err := hcsshim.ShutdownComputeSystem(c.ID, hcsshim.TimeoutInfinite, "exec-run-defer"); err != nil {
-				if herr, ok := err.(*hcsshim.HcsError); !ok ||
-					(herr.Err != hcsshim.ERROR_SHUTDOWN_IN_PROGRESS &&
-						herr.Err != ErrorBadPathname &&
-						herr.Err != syscall.ERROR_PATH_NOT_FOUND) {
-					logrus.Warnf("Ignoring error from ShutdownComputeSystem %s", err)
-				}
-			}
-		}
-	}()
-
-	createProcessParms := hcsshim.CreateProcessParams{
-		EmulateConsole:   c.ProcessConfig.Tty,
-		WorkingDirectory: c.WorkingDir,
-		ConsoleSize:      c.ProcessConfig.ConsoleSize,
-	}
-
-	// Configure the environment for the process
-	createProcessParms.Environment = setupEnvironmentVariables(c.ProcessConfig.Env)
-
-	createProcessParms.CommandLine, err = createCommandLine(&c.ProcessConfig, c.ArgsEscaped)
-
-	if err != nil {
-		return execdriver.ExitStatus{ExitCode: -1}, err
-	}
-
-	// Start the command running in the container.
-	pid, stdin, stdout, stderr, err := hcsshim.CreateProcessInComputeSystem(c.ID, pipes.Stdin != nil, true, !c.ProcessConfig.Tty, createProcessParms)
-	if err != nil {
-		logrus.Errorf("CreateProcessInComputeSystem() failed %s", err)
-		return execdriver.ExitStatus{ExitCode: -1}, err
-	}
-
-	// Now that the process has been launched, begin copying data to and from
-	// the named pipes for the std handles.
-	setupPipes(stdin, stdout, stderr, pipes)
-
-	//Save the PID as we'll need this in Kill()
-	logrus.Debugf("PID %d", pid)
-	c.ContainerPid = int(pid)
-
-	if c.ProcessConfig.Tty {
-		term = NewTtyConsole(c.ID, pid)
-	} else {
-		term = NewStdConsole()
-	}
-	c.ProcessConfig.Terminal = term
-
-	// Maintain our list of active containers. We'll need this later for exec
-	// and other commands.
-	d.Lock()
-	d.activeContainers[c.ID] = &activeContainer{
-		command: c,
-	}
-	d.Unlock()
-
-	if hooks.Start != nil {
-		// A closed channel for OOM is returned here as it will be
-		// non-blocking and return the correct result when read.
-		chOOM := make(chan struct{})
-		close(chOOM)
-		hooks.Start(&c.ProcessConfig, int(pid), chOOM)
-	}
-
-	exitCode, err := hcsshim.WaitForProcessInComputeSystem(c.ID, pid, hcsshim.TimeoutInfinite)
-	if err != nil {
-		if herr, ok := err.(*hcsshim.HcsError); ok && herr.Err != syscall.ERROR_BROKEN_PIPE {
-			logrus.Warnf("WaitForProcessInComputeSystem failed (container may have been killed): %s", err)
-		}
-		// Do NOT return err here as the container would have
-		// started, otherwise docker will deadlock. It's perfectly legitimate
-		// for WaitForProcessInComputeSystem to fail in situations such
-		// as the container being killed on another thread.
-		return execdriver.ExitStatus{ExitCode: hcsshim.WaitErrExecFailed}, nil
-	}
-
-	logrus.Debugf("Exiting Run() exitCode %d id=%s", exitCode, c.ID)
-	return execdriver.ExitStatus{ExitCode: int(exitCode)}, nil
-}
-
-// SupportsHooks implements the execdriver Driver interface.
-// The windows driver does not support the hook mechanism
-func (d *Driver) SupportsHooks() bool {
-	return false
-}

+ 0 - 14
daemon/execdriver/windows/stats.go

@@ -1,14 +0,0 @@
-// +build windows
-
-package windows
-
-import (
-	"fmt"
-
-	"github.com/docker/docker/daemon/execdriver"
-)
-
-// Stats implements the exec driver Driver interface.
-func (d *Driver) Stats(id string) (*execdriver.ResourceStats, error) {
-	return nil, fmt.Errorf("Windows: Stats not implemented")
-}

+ 0 - 24
daemon/execdriver/windows/stdconsole.go

@@ -1,24 +0,0 @@
-// +build windows
-
-package windows
-
-// StdConsole is for when using a container non-interactively
-type StdConsole struct {
-}
-
-// NewStdConsole returns a new StdConsole struct.
-func NewStdConsole() *StdConsole {
-	return &StdConsole{}
-}
-
-// Resize implements Resize method of Terminal interface.
-func (s *StdConsole) Resize(h, w int) error {
-	// we do not need to resize a non tty
-	return nil
-}
-
-// Close implements Close method of Terminal interface.
-func (s *StdConsole) Close() error {
-	// nothing to close here
-	return nil
-}

+ 0 - 49
daemon/execdriver/windows/terminatekill.go

@@ -1,49 +0,0 @@
-// +build windows
-
-package windows
-
-import (
-	"fmt"
-	"syscall"
-
-	"github.com/Microsoft/hcsshim"
-	"github.com/Sirupsen/logrus"
-	"github.com/docker/docker/daemon/execdriver"
-)
-
-// Terminate implements the exec driver Driver interface.
-func (d *Driver) Terminate(p *execdriver.Command) error {
-	return kill(p.ID, p.ContainerPid, syscall.SIGTERM)
-}
-
-// Kill implements the exec driver Driver interface.
-func (d *Driver) Kill(p *execdriver.Command, sig int) error {
-	return kill(p.ID, p.ContainerPid, syscall.Signal(sig))
-}
-
-func kill(id string, pid int, sig syscall.Signal) error {
-	logrus.Debugf("WindowsExec: kill() id=%s pid=%d sig=%d", id, pid, sig)
-	var err error
-	context := fmt.Sprintf("kill: sig=%d pid=%d", sig, pid)
-
-	if sig == syscall.SIGKILL || forceKill {
-		// Terminate the compute system
-		if err := hcsshim.TerminateComputeSystem(id, hcsshim.TimeoutInfinite, context); err != nil {
-			logrus.Errorf("Failed to terminate %s - %q", id, err)
-		}
-
-	} else {
-		// Terminate Process
-		if err = hcsshim.TerminateProcessInComputeSystem(id, uint32(pid)); err != nil {
-			logrus.Warnf("Failed to terminate pid %d in %s: %q", pid, id, err)
-			// Ignore errors
-			err = nil
-		}
-
-		// Shutdown the compute system
-		if err := hcsshim.ShutdownComputeSystem(id, hcsshim.TimeoutInfinite, context); err != nil {
-			logrus.Errorf("Failed to shutdown %s - %q", id, err)
-		}
-	}
-	return err
-}

+ 0 - 32
daemon/execdriver/windows/ttyconsole.go

@@ -1,32 +0,0 @@
-// +build windows
-
-package windows
-
-import (
-	"github.com/Microsoft/hcsshim"
-)
-
-// TtyConsole implements the exec driver Terminal interface.
-type TtyConsole struct {
-	id        string
-	processid uint32
-}
-
-// NewTtyConsole returns a new TtyConsole struct.
-func NewTtyConsole(id string, processid uint32) *TtyConsole {
-	tty := &TtyConsole{
-		id:        id,
-		processid: processid,
-	}
-	return tty
-}
-
-// Resize implements Resize method of Terminal interface.
-func (t *TtyConsole) Resize(h, w int) error {
-	return hcsshim.ResizeConsoleInComputeSystem(t.id, t.processid, h, w)
-}
-
-// Close implements Close method of Terminal interface.
-func (t *TtyConsole) Close() error {
-	return nil
-}

+ 0 - 14
daemon/execdriver/windows/unsupported.go

@@ -1,14 +0,0 @@
-// +build !windows
-
-package windows
-
-import (
-	"fmt"
-
-	"github.com/docker/docker/daemon/execdriver"
-)
-
-// NewDriver returns a new execdriver.Driver
-func NewDriver(root, initPath string) (execdriver.Driver, error) {
-	return nil, fmt.Errorf("Windows driver not supported on non-Windows")
-}

+ 0 - 14
daemon/execdriver/windows/update.go

@@ -1,14 +0,0 @@
-// +build windows
-
-package windows
-
-import (
-	"github.com/docker/docker/daemon/execdriver"
-)
-
-// Update updates resource configs for a container.
-func (d *Driver) Update(c *execdriver.Command) error {
-	// Updating resource isn't supported on Windows
-	// but we should return nil for enabling updating container
-	return nil
-}

+ 0 - 123
daemon/execdriver/windows/windows.go

@@ -1,123 +0,0 @@
-// +build windows
-
-package windows
-
-import (
-	"fmt"
-	"strings"
-	"sync"
-
-	"github.com/Microsoft/hcsshim"
-	"github.com/Sirupsen/logrus"
-	"github.com/docker/docker/daemon/execdriver"
-	"github.com/docker/docker/dockerversion"
-	"github.com/docker/docker/pkg/parsers"
-	"github.com/docker/engine-api/types/container"
-)
-
-// TP4RetryHack is a hack to retry CreateComputeSystem if it fails with
-// known return codes from Windows due to bugs in TP4.
-var TP4RetryHack bool
-
-// This is a daemon development variable only and should not be
-// used for running production containers on Windows.
-var dummyMode bool
-
-// This allows the daemon to terminate containers rather than shutdown
-// This allows the daemon to force kill (HCS terminate) rather than shutdown
-var forceKill bool
-
-// DefaultIsolation allows users to specify a default isolation technology for
-// when running a container on Windows. For example docker daemon -D
-// --exec-opt isolation=hyperv will cause Windows to always run containers
-// as Hyper-V containers unless otherwise specified.
-var DefaultIsolation container.Isolation = "process"
-
-// Define name and version for windows
-var (
-	DriverName = "Windows 1854"
-	Version    = dockerversion.Version + " " + dockerversion.GitCommit
-)
-
-type activeContainer struct {
-	command *execdriver.Command
-}
-
-// Driver contains all information for windows driver,
-// it implements execdriver.Driver
-type Driver struct {
-	root             string
-	activeContainers map[string]*activeContainer
-	sync.Mutex
-}
-
-// Name implements the exec driver Driver interface.
-func (d *Driver) Name() string {
-	return fmt.Sprintf("\n Name: %s\n Build: %s \n Default Isolation: %s", DriverName, Version, DefaultIsolation)
-}
-
-// NewDriver returns a new windows driver, called from NewDriver of execdriver.
-func NewDriver(root string, options []string) (*Driver, error) {
-
-	for _, option := range options {
-		key, val, err := parsers.ParseKeyValueOpt(option)
-		if err != nil {
-			return nil, err
-		}
-		key = strings.ToLower(key)
-		switch key {
-
-		case "dummy":
-			switch val {
-			case "1":
-				dummyMode = true
-				logrus.Warn("Using dummy mode in Windows exec driver. This is for development use only!")
-			}
-
-		case "forcekill":
-			switch val {
-			case "1":
-				forceKill = true
-				logrus.Warn("Using force kill mode in Windows exec driver. This is for testing purposes only.")
-			}
-
-		case "isolation":
-			if !container.Isolation(val).IsValid() {
-				return nil, fmt.Errorf("Unrecognised exec driver option 'isolation':'%s'", val)
-			}
-			if container.Isolation(val).IsHyperV() {
-				DefaultIsolation = "hyperv"
-			}
-			logrus.Infof("Windows default isolation: '%s'", val)
-		default:
-			return nil, fmt.Errorf("Unrecognised exec driver option %s\n", key)
-		}
-	}
-
-	// TODO Windows TP5 timeframe. Remove this next block of code once TP4
-	// is no longer supported. Also remove the workaround in run.go.
-	//
-	// Hack for TP4.
-	// This overcomes an issue on TP4 which causes CreateComputeSystem to
-	// intermittently fail. It's predominantly here to make Windows to Windows
-	// CI more reliable.
-	TP4RetryHack = hcsshim.IsTP4()
-
-	return &Driver{
-		root:             root,
-		activeContainers: make(map[string]*activeContainer),
-	}, nil
-}
-
-// setupEnvironmentVariables convert a string array of environment variables
-// into a map as required by the HCS. Source array is in format [v1=k1] [v2=k2] etc.
-func setupEnvironmentVariables(a []string) map[string]string {
-	r := make(map[string]string)
-	for _, s := range a {
-		arr := strings.Split(s, "=")
-		if len(arr) == 2 {
-			r[arr[0]] = arr[1]
-		}
-	}
-	return r
-}

+ 0 - 1
daemon/info.go

@@ -84,7 +84,6 @@ func (daemon *Daemon) SystemInfo() (*types.Info, error) {
 		NFd:                fileutils.GetTotalUsedFds(),
 		NGoroutines:        runtime.NumGoroutine(),
 		SystemTime:         time.Now().Format(time.RFC3339Nano),
-		ExecutionDriver:    daemon.ExecutionDriver().Name(),
 		LoggingDriver:      daemon.defaultLogConfig.Type,
 		CgroupDriver:       daemon.getCgroupDriver(),
 		NEventsListener:    daemon.EventsService.SubscribersCount(),

+ 5 - 5
daemon/inspect_unix.go

@@ -82,10 +82,10 @@ func addMountPoints(container *container.Container) []types.MountPoint {
 
 func inspectExecProcessConfig(e *exec.Config) *backend.ExecProcessConfig {
 	return &backend.ExecProcessConfig{
-		Tty:        e.ProcessConfig.Tty,
-		Entrypoint: e.ProcessConfig.Entrypoint,
-		Arguments:  e.ProcessConfig.Arguments,
-		Privileged: &e.ProcessConfig.Privileged,
-		User:       e.ProcessConfig.User,
+		Tty:        e.Tty,
+		Entrypoint: e.Entrypoint,
+		Arguments:  e.Args,
+		Privileged: &e.Privileged,
+		User:       e.User,
 	}
 }

+ 3 - 3
daemon/inspect_windows.go

@@ -33,8 +33,8 @@ func (daemon *Daemon) containerInspectPre120(name string) (*types.ContainerJSON,
 
 func inspectExecProcessConfig(e *exec.Config) *backend.ExecProcessConfig {
 	return &backend.ExecProcessConfig{
-		Tty:        e.ProcessConfig.Tty,
-		Entrypoint: e.ProcessConfig.Entrypoint,
-		Arguments:  e.ProcessConfig.Arguments,
+		Tty:        e.Tty,
+		Entrypoint: e.Entrypoint,
+		Arguments:  e.Args,
 	}
 }

+ 4 - 0
daemon/kill.go

@@ -69,6 +69,10 @@ func (daemon *Daemon) killWithSignal(container *container.Container, sig int) er
 
 	container.ExitOnNext()
 
+	if !daemon.IsShuttingDown() {
+		container.HasBeenManuallyStopped = true
+	}
+
 	// if the container is currently restarting we do not need to send the signal
 	// to the process.  Telling the monitor that it should exit on it's next event
 	// loop is enough

+ 143 - 0
daemon/monitor.go

@@ -0,0 +1,143 @@
+package daemon
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"runtime"
+	"strconv"
+
+	"github.com/Sirupsen/logrus"
+	"github.com/docker/docker/libcontainerd"
+	"github.com/docker/docker/runconfig"
+)
+
+// StateChanged updates daemon state changes from containerd
+func (daemon *Daemon) StateChanged(id string, e libcontainerd.StateInfo) error {
+	c := daemon.containers.Get(id)
+	if c == nil {
+		return fmt.Errorf("no such container: %s", id)
+	}
+
+	switch e.State {
+	case libcontainerd.StateOOM:
+		// StateOOM is Linux specific and should never be hit on Windows
+		if runtime.GOOS == "windows" {
+			return errors.New("Received StateOOM from libcontainerd on Windows. This should never happen.")
+		}
+		daemon.LogContainerEvent(c, "oom")
+	case libcontainerd.StateExit:
+		c.Lock()
+		defer c.Unlock()
+		c.Wait()
+		c.Reset(false)
+		c.SetStopped(platformConstructExitStatus(e))
+		attributes := map[string]string{
+			"exitCode": strconv.Itoa(int(e.ExitCode)),
+		}
+		daemon.LogContainerEventWithAttributes(c, "die", attributes)
+		daemon.Cleanup(c)
+		// FIXME: here is race condition between two RUN instructions in Dockerfile
+		// because they share same runconfig and change image. Must be fixed
+		// in builder/builder.go
+		return c.ToDisk()
+	case libcontainerd.StateRestart:
+		c.Lock()
+		defer c.Unlock()
+		c.Reset(false)
+		c.RestartCount++
+		c.SetRestarting(platformConstructExitStatus(e))
+		attributes := map[string]string{
+			"exitCode": strconv.Itoa(int(e.ExitCode)),
+		}
+		daemon.LogContainerEventWithAttributes(c, "die", attributes)
+		return c.ToDisk()
+	case libcontainerd.StateExitProcess:
+		c.Lock()
+		defer c.Unlock()
+		if execConfig := c.ExecCommands.Get(e.ProcessID); execConfig != nil {
+			ec := int(e.ExitCode)
+			execConfig.ExitCode = &ec
+			execConfig.Running = false
+			execConfig.Wait()
+			if err := execConfig.CloseStreams(); err != nil {
+				logrus.Errorf("%s: %s", c.ID, err)
+			}
+
+			// remove the exec command from the container's store only and not the
+			// daemon's store so that the exec command can be inspected.
+			c.ExecCommands.Delete(execConfig.ID)
+		} else {
+			logrus.Warnf("Ignoring StateExitProcess for %v but no exec command found", e)
+		}
+	case libcontainerd.StateStart, libcontainerd.StateRestore:
+		c.SetRunning(int(e.Pid), e.State == libcontainerd.StateStart)
+		c.HasBeenManuallyStopped = false
+		if err := c.ToDisk(); err != nil {
+			c.Reset(false)
+			return err
+		}
+	case libcontainerd.StatePause:
+		c.Paused = true
+		daemon.LogContainerEvent(c, "pause")
+	case libcontainerd.StateResume:
+		c.Paused = false
+		daemon.LogContainerEvent(c, "unpause")
+	}
+
+	return nil
+}
+
+// AttachStreams is called by libcontainerd to connect the stdio.
+func (daemon *Daemon) AttachStreams(id string, iop libcontainerd.IOPipe) error {
+	var s *runconfig.StreamConfig
+	c := daemon.containers.Get(id)
+	if c == nil {
+		ec, err := daemon.getExecConfig(id)
+		if err != nil {
+			return fmt.Errorf("no such exec/container: %s", id)
+		}
+		s = ec.StreamConfig
+	} else {
+		s = c.StreamConfig
+		if err := daemon.StartLogging(c); err != nil {
+			c.Reset(false)
+			return err
+		}
+	}
+
+	if stdin := s.Stdin(); stdin != nil {
+		if iop.Stdin != nil {
+			go func() {
+				io.Copy(iop.Stdin, stdin)
+				iop.Stdin.Close()
+			}()
+		}
+	} else {
+		if c != nil && !c.Config.Tty {
+			// tty is enabled, so dont close containerd's iopipe stdin.
+			if iop.Stdin != nil {
+				iop.Stdin.Close()
+			}
+		}
+	}
+
+	copy := func(w io.Writer, r io.Reader) {
+		s.Add(1)
+		go func() {
+			if _, err := io.Copy(w, r); err != nil {
+				logrus.Errorf("%v stream copy error: %v", id, err)
+			}
+			s.Done()
+		}()
+	}
+
+	if iop.Stdout != nil {
+		copy(s.Stdout(), iop.Stdout)
+	}
+	if iop.Stderr != nil {
+		copy(s.Stderr(), iop.Stderr)
+	}
+
+	return nil
+}

+ 14 - 0
daemon/monitor_linux.go

@@ -0,0 +1,14 @@
+package daemon
+
+import (
+	"github.com/docker/docker/container"
+	"github.com/docker/docker/libcontainerd"
+)
+
+// platformConstructExitStatus returns a platform specific exit status structure
+func platformConstructExitStatus(e libcontainerd.StateInfo) *container.ExitStatus {
+	return &container.ExitStatus{
+		ExitCode:  int(e.ExitCode),
+		OOMKilled: e.OOMKilled,
+	}
+}

+ 13 - 0
daemon/monitor_windows.go

@@ -0,0 +1,13 @@
+package daemon
+
+import (
+	"github.com/docker/docker/container"
+	"github.com/docker/docker/libcontainerd"
+)
+
+// platformConstructExitStatus returns a platform specific exit status structure
+func platformConstructExitStatus(e libcontainerd.StateInfo) *container.ExitStatus {
+	return &container.ExitStatus{
+		ExitCode: int(e.ExitCode),
+	}
+}

+ 652 - 0
daemon/oci_linux.go

@@ -0,0 +1,652 @@
+package daemon
+
+import (
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+
+	"github.com/docker/docker/container"
+	"github.com/docker/docker/daemon/caps"
+	"github.com/docker/docker/libcontainerd"
+	"github.com/docker/docker/oci"
+	"github.com/docker/docker/pkg/idtools"
+	"github.com/docker/docker/pkg/mount"
+	"github.com/docker/docker/pkg/stringutils"
+	"github.com/docker/docker/pkg/symlink"
+	"github.com/docker/docker/volume"
+	containertypes "github.com/docker/engine-api/types/container"
+	"github.com/opencontainers/runc/libcontainer/apparmor"
+	"github.com/opencontainers/runc/libcontainer/devices"
+	"github.com/opencontainers/runc/libcontainer/user"
+	"github.com/opencontainers/specs/specs-go"
+)
+
+func setResources(s *specs.Spec, r containertypes.Resources) error {
+	weightDevices, err := getBlkioWeightDevices(r)
+	if err != nil {
+		return err
+	}
+	readBpsDevice, err := getBlkioReadBpsDevices(r)
+	if err != nil {
+		return err
+	}
+	writeBpsDevice, err := getBlkioWriteBpsDevices(r)
+	if err != nil {
+		return err
+	}
+	readIOpsDevice, err := getBlkioReadIOpsDevices(r)
+	if err != nil {
+		return err
+	}
+	writeIOpsDevice, err := getBlkioWriteIOpsDevices(r)
+	if err != nil {
+		return err
+	}
+
+	memoryRes := getMemoryResources(r)
+	cpuRes := getCPUResources(r)
+	blkioWeight := r.BlkioWeight
+
+	specResources := &specs.Resources{
+		Memory: memoryRes,
+		CPU:    cpuRes,
+		BlockIO: &specs.BlockIO{
+			Weight:                  &blkioWeight,
+			WeightDevice:            weightDevices,
+			ThrottleReadBpsDevice:   readBpsDevice,
+			ThrottleWriteBpsDevice:  writeBpsDevice,
+			ThrottleReadIOPSDevice:  readIOpsDevice,
+			ThrottleWriteIOPSDevice: writeIOpsDevice,
+		},
+		DisableOOMKiller: r.OomKillDisable,
+		Pids: &specs.Pids{
+			Limit: &r.PidsLimit,
+		},
+	}
+
+	if s.Linux.Resources != nil && len(s.Linux.Resources.Devices) > 0 {
+		specResources.Devices = s.Linux.Resources.Devices
+	}
+
+	s.Linux.Resources = specResources
+	return nil
+}
+
+func setDevices(s *specs.Spec, c *container.Container) error {
+	// Build lists of devices allowed and created within the container.
+	var devs []specs.Device
+	if c.HostConfig.Privileged {
+		hostDevices, err := devices.HostDevices()
+		if err != nil {
+			return err
+		}
+		for _, d := range hostDevices {
+			devs = append(devs, specDevice(d))
+		}
+	} else {
+		for _, deviceMapping := range c.HostConfig.Devices {
+			d, err := getDevicesFromPath(deviceMapping)
+			if err != nil {
+				return err
+			}
+
+			devs = append(devs, d...)
+		}
+	}
+
+	s.Linux.Devices = append(s.Linux.Devices, devs...)
+	return nil
+}
+
+func setRlimits(daemon *Daemon, s *specs.Spec, c *container.Container) error {
+	var rlimits []specs.Rlimit
+
+	ulimits := c.HostConfig.Ulimits
+	// Merge ulimits with daemon defaults
+	ulIdx := make(map[string]struct{})
+	for _, ul := range ulimits {
+		ulIdx[ul.Name] = struct{}{}
+	}
+	for name, ul := range daemon.configStore.Ulimits {
+		if _, exists := ulIdx[name]; !exists {
+			ulimits = append(ulimits, ul)
+		}
+	}
+
+	for _, ul := range ulimits {
+		rlimits = append(rlimits, specs.Rlimit{
+			Type: "RLIMIT_" + strings.ToUpper(ul.Name),
+			Soft: uint64(ul.Soft),
+			Hard: uint64(ul.Hard),
+		})
+	}
+
+	s.Process.Rlimits = rlimits
+	return nil
+}
+
+func setUser(s *specs.Spec, c *container.Container) error {
+	uid, gid, additionalGids, err := getUser(c, c.Config.User)
+	if err != nil {
+		return err
+	}
+	s.Process.User.UID = uid
+	s.Process.User.GID = gid
+	s.Process.User.AdditionalGids = additionalGids
+	return nil
+}
+
+func readUserFile(c *container.Container, p string) (io.ReadCloser, error) {
+	fp, err := symlink.FollowSymlinkInScope(filepath.Join(c.BaseFS, p), c.BaseFS)
+	if err != nil {
+		return nil, err
+	}
+	return os.Open(fp)
+}
+
+func getUser(c *container.Container, username string) (uint32, uint32, []uint32, error) {
+	passwdPath, err := user.GetPasswdPath()
+	if err != nil {
+		return 0, 0, nil, err
+	}
+	groupPath, err := user.GetGroupPath()
+	if err != nil {
+		return 0, 0, nil, err
+	}
+	passwdFile, err := readUserFile(c, passwdPath)
+	if err == nil {
+		defer passwdFile.Close()
+	}
+	groupFile, err := readUserFile(c, groupPath)
+	if err == nil {
+		defer groupFile.Close()
+	}
+
+	execUser, err := user.GetExecUser(username, nil, passwdFile, groupFile)
+	if err != nil {
+		return 0, 0, nil, err
+	}
+
+	// todo: fix this double read by a change to libcontainer/user pkg
+	groupFile, err = readUserFile(c, groupPath)
+	if err == nil {
+		defer groupFile.Close()
+	}
+	var addGroups []int
+	if len(c.HostConfig.GroupAdd) > 0 {
+		addGroups, err = user.GetAdditionalGroups(c.HostConfig.GroupAdd, groupFile)
+		if err != nil {
+			return 0, 0, nil, err
+		}
+	}
+	uid := uint32(execUser.Uid)
+	gid := uint32(execUser.Gid)
+	sgids := append(execUser.Sgids, addGroups...)
+	var additionalGids []uint32
+	for _, g := range sgids {
+		additionalGids = append(additionalGids, uint32(g))
+	}
+	return uid, gid, additionalGids, nil
+}
+
+func setNamespace(s *specs.Spec, ns specs.Namespace) {
+	for i, n := range s.Linux.Namespaces {
+		if n.Type == ns.Type {
+			s.Linux.Namespaces[i] = ns
+			return
+		}
+	}
+	s.Linux.Namespaces = append(s.Linux.Namespaces, ns)
+}
+
+func setCapabilities(s *specs.Spec, c *container.Container) error {
+	var caplist []string
+	var err error
+	if c.HostConfig.Privileged {
+		caplist = caps.GetAllCapabilities()
+	} else {
+		caplist, err = caps.TweakCapabilities(s.Process.Capabilities, c.HostConfig.CapAdd, c.HostConfig.CapDrop)
+		if err != nil {
+			return err
+		}
+	}
+	s.Process.Capabilities = caplist
+	return nil
+}
+
+func delNamespace(s *specs.Spec, nsType specs.NamespaceType) {
+	idx := -1
+	for i, n := range s.Linux.Namespaces {
+		if n.Type == nsType {
+			idx = i
+		}
+	}
+	if idx >= 0 {
+		s.Linux.Namespaces = append(s.Linux.Namespaces[:idx], s.Linux.Namespaces[idx+1:]...)
+	}
+}
+
+func setNamespaces(daemon *Daemon, s *specs.Spec, c *container.Container) error {
+	// network
+	if !c.Config.NetworkDisabled {
+		ns := specs.Namespace{Type: "network"}
+		parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2)
+		if parts[0] == "container" {
+			nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer())
+			if err != nil {
+				return err
+			}
+			ns.Path = fmt.Sprintf("/proc/%d/ns/net", nc.State.GetPID())
+		} else if c.HostConfig.NetworkMode.IsHost() {
+			ns.Path = c.NetworkSettings.SandboxKey
+		}
+		setNamespace(s, ns)
+	}
+	// ipc
+	if c.HostConfig.IpcMode.IsContainer() {
+		ns := specs.Namespace{Type: "ipc"}
+		ic, err := daemon.getIpcContainer(c)
+		if err != nil {
+			return err
+		}
+		ns.Path = fmt.Sprintf("/proc/%d/ns/ipc", ic.State.GetPID())
+		setNamespace(s, ns)
+	} else if c.HostConfig.IpcMode.IsHost() {
+		delNamespace(s, specs.NamespaceType("ipc"))
+	} else {
+		ns := specs.Namespace{Type: "ipc"}
+		setNamespace(s, ns)
+	}
+	// pid
+	if c.HostConfig.PidMode.IsHost() {
+		delNamespace(s, specs.NamespaceType("pid"))
+	}
+	// uts
+	if c.HostConfig.UTSMode.IsHost() {
+		delNamespace(s, specs.NamespaceType("uts"))
+		s.Hostname = ""
+	}
+	// user
+	if c.HostConfig.UsernsMode.IsPrivate() {
+		uidMap, gidMap := daemon.GetUIDGIDMaps()
+		if uidMap != nil {
+			ns := specs.Namespace{Type: "user"}
+			setNamespace(s, ns)
+			s.Linux.UIDMappings = specMapping(uidMap)
+			s.Linux.GIDMappings = specMapping(gidMap)
+		}
+	}
+
+	return nil
+}
+
+func specMapping(s []idtools.IDMap) []specs.IDMapping {
+	var ids []specs.IDMapping
+	for _, item := range s {
+		ids = append(ids, specs.IDMapping{
+			HostID:      uint32(item.HostID),
+			ContainerID: uint32(item.ContainerID),
+			Size:        uint32(item.Size),
+		})
+	}
+	return ids
+}
+
+func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
+	for _, m := range mountinfo {
+		if m.Mountpoint == dir {
+			return m
+		}
+	}
+	return nil
+}
+
+// Get the source mount point of directory passed in as argument. Also return
+// optional fields.
+func getSourceMount(source string) (string, string, error) {
+	// Ensure any symlinks are resolved.
+	sourcePath, err := filepath.EvalSymlinks(source)
+	if err != nil {
+		return "", "", err
+	}
+
+	mountinfos, err := mount.GetMounts()
+	if err != nil {
+		return "", "", err
+	}
+
+	mountinfo := getMountInfo(mountinfos, sourcePath)
+	if mountinfo != nil {
+		return sourcePath, mountinfo.Optional, nil
+	}
+
+	path := sourcePath
+	for {
+		path = filepath.Dir(path)
+
+		mountinfo = getMountInfo(mountinfos, path)
+		if mountinfo != nil {
+			return path, mountinfo.Optional, nil
+		}
+
+		if path == "/" {
+			break
+		}
+	}
+
+	// If we are here, we did not find parent mount. Something is wrong.
+	return "", "", fmt.Errorf("Could not find source mount of %s", source)
+}
+
+// Ensure mount point on which path is mounted, is shared.
+func ensureShared(path string) error {
+	sharedMount := false
+
+	sourceMount, optionalOpts, err := getSourceMount(path)
+	if err != nil {
+		return err
+	}
+	// Make sure source mount point is shared.
+	optsSplit := strings.Split(optionalOpts, " ")
+	for _, opt := range optsSplit {
+		if strings.HasPrefix(opt, "shared:") {
+			sharedMount = true
+			break
+		}
+	}
+
+	if !sharedMount {
+		return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount)
+	}
+	return nil
+}
+
+// Ensure mount point on which path is mounted, is either shared or slave.
+func ensureSharedOrSlave(path string) error {
+	sharedMount := false
+	slaveMount := false
+
+	sourceMount, optionalOpts, err := getSourceMount(path)
+	if err != nil {
+		return err
+	}
+	// Make sure source mount point is shared.
+	optsSplit := strings.Split(optionalOpts, " ")
+	for _, opt := range optsSplit {
+		if strings.HasPrefix(opt, "shared:") {
+			sharedMount = true
+			break
+		} else if strings.HasPrefix(opt, "master:") {
+			slaveMount = true
+			break
+		}
+	}
+
+	if !sharedMount && !slaveMount {
+		return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount)
+	}
+	return nil
+}
+
+var (
+	mountPropagationMap = map[string]int{
+		"private":  mount.PRIVATE,
+		"rprivate": mount.RPRIVATE,
+		"shared":   mount.SHARED,
+		"rshared":  mount.RSHARED,
+		"slave":    mount.SLAVE,
+		"rslave":   mount.RSLAVE,
+	}
+
+	mountPropagationReverseMap = map[int]string{
+		mount.PRIVATE:  "private",
+		mount.RPRIVATE: "rprivate",
+		mount.SHARED:   "shared",
+		mount.RSHARED:  "rshared",
+		mount.SLAVE:    "slave",
+		mount.RSLAVE:   "rslave",
+	}
+)
+
+func setMounts(daemon *Daemon, s *specs.Spec, c *container.Container, mounts []container.Mount) error {
+	userMounts := make(map[string]struct{})
+	for _, m := range mounts {
+		userMounts[m.Destination] = struct{}{}
+	}
+
+	// Filter out mounts that are overriden by user supplied mounts
+	var defaultMounts []specs.Mount
+	_, mountDev := userMounts["/dev"]
+	for _, m := range s.Mounts {
+		if _, ok := userMounts[m.Destination]; !ok {
+			if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
+				continue
+			}
+			defaultMounts = append(defaultMounts, m)
+		}
+	}
+
+	s.Mounts = defaultMounts
+	for _, m := range mounts {
+		for _, cm := range s.Mounts {
+			if cm.Destination == m.Destination {
+				return fmt.Errorf("Duplicate mount point '%s'", m.Destination)
+			}
+		}
+
+		if m.Source == "tmpfs" {
+			opt := []string{"noexec", "nosuid", "nodev", volume.DefaultPropagationMode}
+			if m.Data != "" {
+				opt = append(opt, strings.Split(m.Data, ",")...)
+			} else {
+				opt = append(opt, "size=65536k")
+			}
+
+			s.Mounts = append(s.Mounts, specs.Mount{Destination: m.Destination, Source: m.Source, Type: "tmpfs", Options: opt})
+			continue
+		}
+
+		mt := specs.Mount{Destination: m.Destination, Source: m.Source, Type: "bind"}
+
+		// Determine property of RootPropagation based on volume
+		// properties. If a volume is shared, then keep root propagation
+		// shared. This should work for slave and private volumes too.
+		//
+		// For slave volumes, it can be either [r]shared/[r]slave.
+		//
+		// For private volumes any root propagation value should work.
+		pFlag := mountPropagationMap[m.Propagation]
+		if pFlag == mount.SHARED || pFlag == mount.RSHARED {
+			if err := ensureShared(m.Source); err != nil {
+				return err
+			}
+			rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
+			if rootpg != mount.SHARED && rootpg != mount.RSHARED {
+				s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.SHARED]
+			}
+		} else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
+			if err := ensureSharedOrSlave(m.Source); err != nil {
+				return err
+			}
+			rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
+			if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
+				s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.RSLAVE]
+			}
+		}
+
+		opts := []string{"rbind"}
+		if !m.Writable {
+			opts = append(opts, "ro")
+		}
+		if pFlag != 0 {
+			opts = append(opts, mountPropagationReverseMap[pFlag])
+		}
+
+		mt.Options = opts
+		s.Mounts = append(s.Mounts, mt)
+	}
+
+	if s.Root.Readonly {
+		for i, m := range s.Mounts {
+			switch m.Destination {
+			case "/proc", "/dev/pts", "/dev/mqueue": // /dev is remounted by runc
+				continue
+			}
+			if _, ok := userMounts[m.Destination]; !ok {
+				if !stringutils.InSlice(m.Options, "ro") {
+					s.Mounts[i].Options = append(s.Mounts[i].Options, "ro")
+				}
+			}
+		}
+	}
+
+	if c.HostConfig.Privileged {
+		if !s.Root.Readonly {
+			// clear readonly for /sys
+			for i := range s.Mounts {
+				if s.Mounts[i].Destination == "/sys" {
+					clearReadOnly(&s.Mounts[i])
+				}
+			}
+		}
+	}
+
+	// TODO: until a kernel/mount solution exists for handling remount in a user namespace,
+	// we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
+	if uidMap, _ := daemon.GetUIDGIDMaps(); uidMap != nil || c.HostConfig.Privileged {
+		for i, m := range s.Mounts {
+			if m.Type == "cgroup" {
+				clearReadOnly(&s.Mounts[i])
+			}
+		}
+	}
+
+	return nil
+}
+
+func (daemon *Daemon) populateCommonSpec(s *specs.Spec, c *container.Container) error {
+	linkedEnv, err := daemon.setupLinkedContainers(c)
+	if err != nil {
+		return err
+	}
+	s.Root = specs.Root{
+		Path:     c.BaseFS,
+		Readonly: c.HostConfig.ReadonlyRootfs,
+	}
+	rootUID, rootGID := daemon.GetRemappedUIDGID()
+	if err := c.SetupWorkingDirectory(rootUID, rootGID); err != nil {
+		return err
+	}
+	cwd := c.Config.WorkingDir
+	if len(cwd) == 0 {
+		cwd = "/"
+	}
+	s.Process.Args = append([]string{c.Path}, c.Args...)
+	s.Process.Cwd = cwd
+	s.Process.Env = c.CreateDaemonEnvironment(linkedEnv)
+	s.Process.Terminal = c.Config.Tty
+	s.Hostname = c.FullHostname()
+
+	return nil
+}
+
+func (daemon *Daemon) createSpec(c *container.Container) (*libcontainerd.Spec, error) {
+	s := oci.DefaultSpec()
+	if err := daemon.populateCommonSpec(&s, c); err != nil {
+		return nil, err
+	}
+
+	var cgroupsPath string
+	if c.HostConfig.CgroupParent != "" {
+		cgroupsPath = filepath.Join(c.HostConfig.CgroupParent, c.ID)
+	} else {
+		defaultCgroupParent := "/docker"
+		if daemon.configStore.CgroupParent != "" {
+			defaultCgroupParent = daemon.configStore.CgroupParent
+		} else if daemon.usingSystemd() {
+			defaultCgroupParent = "system.slice"
+		}
+		cgroupsPath = filepath.Join(defaultCgroupParent, c.ID)
+	}
+	s.Linux.CgroupsPath = &cgroupsPath
+
+	if err := setResources(&s, c.HostConfig.Resources); err != nil {
+		return nil, fmt.Errorf("linux runtime spec resources: %v", err)
+	}
+	s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj
+	if err := setDevices(&s, c); err != nil {
+		return nil, fmt.Errorf("linux runtime spec devices: %v", err)
+	}
+	if err := setRlimits(daemon, &s, c); err != nil {
+		return nil, fmt.Errorf("linux runtime spec rlimits: %v", err)
+	}
+	if err := setUser(&s, c); err != nil {
+		return nil, fmt.Errorf("linux spec user: %v", err)
+	}
+	if err := setNamespaces(daemon, &s, c); err != nil {
+		return nil, fmt.Errorf("linux spec namespaces: %v", err)
+	}
+	if err := setCapabilities(&s, c); err != nil {
+		return nil, fmt.Errorf("linux spec capabilities: %v", err)
+	}
+	if err := setSeccomp(daemon, &s, c); err != nil {
+		return nil, fmt.Errorf("linux seccomp: %v", err)
+	}
+
+	if err := daemon.setupIpcDirs(c); err != nil {
+		return nil, err
+	}
+
+	mounts, err := daemon.setupMounts(c)
+	if err != nil {
+		return nil, err
+	}
+	mounts = append(mounts, c.IpcMounts()...)
+	mounts = append(mounts, c.TmpfsMounts()...)
+	if err := setMounts(daemon, &s, c, mounts); err != nil {
+		return nil, fmt.Errorf("linux mounts: %v", err)
+	}
+
+	for _, ns := range s.Linux.Namespaces {
+		if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled {
+			target, err := os.Readlink(filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe"))
+			if err != nil {
+				return nil, err
+			}
+
+			s.Hooks = specs.Hooks{
+				Prestart: []specs.Hook{{
+					Path: target, // FIXME: cross-platform
+					Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()},
+				}},
+			}
+		}
+	}
+
+	if apparmor.IsEnabled() {
+		appArmorProfile := "docker-default"
+		if c.HostConfig.Privileged {
+			appArmorProfile = "unconfined"
+		} else if len(c.AppArmorProfile) > 0 {
+			appArmorProfile = c.AppArmorProfile
+		}
+		s.Process.ApparmorProfile = appArmorProfile
+	}
+	s.Process.SelinuxLabel = c.GetProcessLabel()
+	s.Process.NoNewPrivileges = c.NoNewPrivileges
+
+	return (*libcontainerd.Spec)(&s), nil
+}
+
+func clearReadOnly(m *specs.Mount) {
+	var opt []string
+	for _, o := range m.Options {
+		if o != "ro" {
+			opt = append(opt, o)
+		}
+	}
+	m.Options = opt
+}

+ 204 - 0
daemon/oci_windows.go

@@ -0,0 +1,204 @@
+package daemon
+
+import (
+	"fmt"
+	"strings"
+	"syscall"
+
+	"github.com/docker/docker/container"
+	"github.com/docker/docker/layer"
+	"github.com/docker/docker/libcontainerd"
+	"github.com/docker/docker/libcontainerd/windowsoci"
+	"github.com/docker/docker/oci"
+)
+
+func (daemon *Daemon) createSpec(c *container.Container) (*libcontainerd.Spec, error) {
+	s := oci.DefaultSpec()
+
+	linkedEnv, err := daemon.setupLinkedContainers(c)
+	if err != nil {
+		return nil, err
+	}
+
+	// TODO Windows - this can be removed. Not used (UID/GID)
+	rootUID, rootGID := daemon.GetRemappedUIDGID()
+	if err := c.SetupWorkingDirectory(rootUID, rootGID); err != nil {
+		return nil, err
+	}
+
+	img, err := daemon.imageStore.Get(c.ImageID)
+	if err != nil {
+		return nil, fmt.Errorf("Failed to graph.Get on ImageID %s - %s", c.ImageID, err)
+	}
+
+	// In base spec
+	s.Hostname = c.FullHostname()
+
+	// In s.Mounts
+	mounts, err := daemon.setupMounts(c)
+	if err != nil {
+		return nil, err
+	}
+	for _, mount := range mounts {
+		s.Mounts = append(s.Mounts, windowsoci.Mount{
+			Source:      mount.Source,
+			Destination: mount.Destination,
+			Readonly:    !mount.Writable,
+		})
+	}
+
+	// Are we going to run as a Hyper-V container?
+	hv := false
+	if c.HostConfig.Isolation.IsDefault() {
+		// Container is set to use the default, so take the default from the daemon configuration
+		hv = daemon.defaultIsolation.IsHyperV()
+	} else {
+		// Container is requesting an isolation mode. Honour it.
+		hv = c.HostConfig.Isolation.IsHyperV()
+	}
+	if hv {
+		// TODO We don't yet have the ImagePath hooked up. But set to
+		// something non-nil to pickup in libcontainerd.
+		s.Windows.HvRuntime = &windowsoci.HvRuntime{}
+	}
+
+	// In s.Process
+	if c.Config.ArgsEscaped {
+		s.Process.Args = append([]string{c.Path}, c.Args...)
+	} else {
+		// TODO (jstarks): escape the entrypoint too once the tests are fixed to not rely on this behavior
+		s.Process.Args = append([]string{c.Path}, escapeArgs(c.Args)...)
+	}
+	s.Process.Cwd = c.Config.WorkingDir
+	s.Process.Env = c.CreateDaemonEnvironment(linkedEnv)
+	s.Process.InitialConsoleSize = c.HostConfig.ConsoleSize
+	s.Process.Terminal = c.Config.Tty
+	s.Process.User.User = c.Config.User
+
+	// In spec.Root
+	s.Root.Path = c.BaseFS
+	s.Root.Readonly = c.HostConfig.ReadonlyRootfs
+
+	// In s.Windows
+	s.Windows.FirstStart = !c.HasBeenStartedBefore
+
+	// s.Windows.LayerFolder.
+	m, err := c.RWLayer.Metadata()
+	if err != nil {
+		return nil, fmt.Errorf("Failed to get layer metadata - %s", err)
+	}
+	s.Windows.LayerFolder = m["dir"]
+
+	// s.Windows.LayerPaths
+	var layerPaths []string
+	if img.RootFS != nil && img.RootFS.Type == "layers+base" {
+		max := len(img.RootFS.DiffIDs)
+		for i := 0; i <= max; i++ {
+			img.RootFS.DiffIDs = img.RootFS.DiffIDs[:i]
+			path, err := layer.GetLayerPath(daemon.layerStore, img.RootFS.ChainID())
+			if err != nil {
+				return nil, fmt.Errorf("Failed to get layer path from graphdriver %s for ImageID %s - %s", daemon.layerStore, img.RootFS.ChainID(), err)
+			}
+			// Reverse order, expecting parent most first
+			layerPaths = append([]string{path}, layerPaths...)
+		}
+	}
+	s.Windows.LayerPaths = layerPaths
+
+	// In s.Windows.Networking (TP5+ libnetwork way of doing things)
+	// Connect all the libnetwork allocated networks to the container
+	var epList []string
+	if c.NetworkSettings != nil {
+		for n := range c.NetworkSettings.Networks {
+			sn, err := daemon.FindNetwork(n)
+			if err != nil {
+				continue
+			}
+
+			ep, err := c.GetEndpointInNetwork(sn)
+			if err != nil {
+				continue
+			}
+
+			data, err := ep.DriverInfo()
+			if err != nil {
+				continue
+			}
+			if data["hnsid"] != nil {
+				epList = append(epList, data["hnsid"].(string))
+			}
+		}
+	}
+	s.Windows.Networking = &windowsoci.Networking{
+		EndpointList: epList,
+	}
+
+	// In s.Windows.Networking (TP4 back compat)
+	// TODO Windows: Post TP4 - Remove this along with definitions from spec
+	// and changes to libcontainerd to not read these fields.
+	if daemon.netController == nil {
+		parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2)
+		switch parts[0] {
+		case "none":
+		case "default", "": // empty string to support existing containers
+			if !c.Config.NetworkDisabled {
+				s.Windows.Networking = &windowsoci.Networking{
+					MacAddress:   c.Config.MacAddress,
+					Bridge:       daemon.configStore.bridgeConfig.Iface,
+					PortBindings: c.HostConfig.PortBindings,
+				}
+			}
+		default:
+			return nil, fmt.Errorf("invalid network mode: %s", c.HostConfig.NetworkMode)
+		}
+	}
+
+	// In s.Windows.Resources
+	// @darrenstahlmsft implement these resources
+	cpuShares := uint64(c.HostConfig.CPUShares)
+	s.Windows.Resources = &windowsoci.Resources{
+		CPU: &windowsoci.CPU{
+			//TODO Count: ...,
+			//TODO Percent: ...,
+			Shares: &cpuShares,
+		},
+		Memory: &windowsoci.Memory{
+		//TODO Limit: ...,
+		//TODO Reservation: ...,
+		},
+		Network: &windowsoci.Network{
+		//TODO Bandwidth: ...,
+		},
+		Storage: &windowsoci.Storage{
+		//TODO Bps: ...,
+		//TODO Iops: ...,
+		//TODO SandboxSize: ...,
+		},
+	}
+
+	// BUGBUG - Next problem. This was an exec opt. Where do we now get these?
+	// Come back to this when add Xenon support.
+	//	var hvPartition bool
+	//	// Work out the isolation (whether it is a hypervisor partition)
+	//	if c.HostConfig.Isolation.IsDefault() {
+	//		// Not specified by caller. Take daemon default
+	//		hvPartition = windows.DefaultIsolation.IsHyperV()
+	//	} else {
+	//		// Take value specified by caller
+	//		hvPartition = c.HostConfig.Isolation.IsHyperV()
+	//	}
+
+	//		Isolation:   string(c.HostConfig.Isolation),
+	//		HvPartition: hvPartition,
+	//	}
+
+	return (*libcontainerd.Spec)(&s), nil
+}
+
+func escapeArgs(args []string) []string {
+	escapedArgs := make([]string, len(args))
+	for i, a := range args {
+		escapedArgs[i] = syscall.EscapeArg(a)
+	}
+	return escapedArgs
+}

+ 2 - 3
daemon/pause.go

@@ -41,10 +41,9 @@ func (daemon *Daemon) containerPause(container *container.Container) error {
 		return errContainerIsRestarting(container.ID)
 	}
 
-	if err := daemon.execDriver.Pause(container.Command); err != nil {
+	if err := daemon.containerd.Pause(container.ID); err != nil {
 		return fmt.Errorf("Cannot pause container %s: %s", container.ID, err)
 	}
-	container.Paused = true
-	daemon.LogContainerEvent(container, "pause")
+
 	return nil
 }

+ 8 - 5
daemon/resize.go

@@ -1,6 +1,10 @@
 package daemon
 
-import "fmt"
+import (
+	"fmt"
+
+	"github.com/docker/docker/libcontainerd"
+)
 
 // ContainerResize changes the size of the TTY of the process running
 // in the container with the given name to the given height and width.
@@ -14,7 +18,7 @@ func (daemon *Daemon) ContainerResize(name string, height, width int) error {
 		return errNotRunning{container.ID}
 	}
 
-	if err = container.Resize(height, width); err == nil {
+	if err = daemon.containerd.Resize(container.ID, libcontainerd.InitFriendlyName, width, height); err == nil {
 		attributes := map[string]string{
 			"height": fmt.Sprintf("%d", height),
 			"width":  fmt.Sprintf("%d", width),
@@ -28,10 +32,9 @@ func (daemon *Daemon) ContainerResize(name string, height, width int) error {
 // running in the exec with the given name to the given height and
 // width.
 func (daemon *Daemon) ContainerExecResize(name string, height, width int) error {
-	ExecConfig, err := daemon.getExecConfig(name)
+	ec, err := daemon.getExecConfig(name)
 	if err != nil {
 		return err
 	}
-
-	return ExecConfig.Resize(height, width)
+	return daemon.containerd.Resize(ec.ContainerID, ec.ID, width, height)
 }

+ 1600 - 0
daemon/seccomp_default_linux.go

@@ -0,0 +1,1600 @@
+// +build linux,seccomp
+
+package daemon
+
+import (
+	"syscall"
+
+	"github.com/opencontainers/specs/specs-go"
+	libseccomp "github.com/seccomp/libseccomp-golang"
+)
+
+func arches() []specs.Arch {
+	var native, err = libseccomp.GetNativeArch()
+	if err != nil {
+		return []specs.Arch{}
+	}
+	var a = native.String()
+	switch a {
+	case "amd64":
+		return []specs.Arch{specs.ArchX86_64, specs.ArchX86, specs.ArchX32}
+	case "arm64":
+		return []specs.Arch{specs.ArchAARCH64, specs.ArchARM}
+	case "mips64":
+		return []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64, specs.ArchMIPS64N32}
+	case "mips64n32":
+		return []specs.Arch{specs.ArchMIPS, specs.ArchMIPS64, specs.ArchMIPS64N32}
+	case "mipsel64":
+		return []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64, specs.ArchMIPSEL64N32}
+	case "mipsel64n32":
+		return []specs.Arch{specs.ArchMIPSEL, specs.ArchMIPSEL64, specs.ArchMIPSEL64N32}
+	default:
+		return []specs.Arch{}
+	}
+}
+
+var defaultSeccompProfile = specs.Seccomp{
+	DefaultAction: specs.ActErrno,
+	Architectures: arches(),
+	Syscalls: []specs.Syscall{
+		{
+			Name:   "accept",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "accept4",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "access",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "alarm",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "arch_prctl",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "bind",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "brk",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "capget",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "capset",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "chdir",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "chmod",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "chown",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "chown32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "chroot",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "clock_getres",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "clock_gettime",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "clock_nanosleep",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "clone",
+			Action: specs.ActAllow,
+			Args: []specs.Arg{
+				{
+					Index:    0,
+					Value:    syscall.CLONE_NEWNS | syscall.CLONE_NEWUTS | syscall.CLONE_NEWIPC | syscall.CLONE_NEWUSER | syscall.CLONE_NEWPID | syscall.CLONE_NEWNET,
+					ValueTwo: 0,
+					Op:       specs.OpMaskedEqual,
+				},
+			},
+		},
+		{
+			Name:   "close",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "connect",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "creat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "dup",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "dup2",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "dup3",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "epoll_create",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "epoll_create1",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "epoll_ctl",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "epoll_ctl_old",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "epoll_pwait",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "epoll_wait",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "epoll_wait_old",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "eventfd",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "eventfd2",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "execve",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "execveat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "exit",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "exit_group",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "faccessat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fadvise64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fadvise64_64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fallocate",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fanotify_init",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fanotify_mark",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fchdir",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fchmod",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fchmodat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fchown",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fchown32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fchownat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fcntl",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fcntl64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fdatasync",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fgetxattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "flistxattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "flock",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fork",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fremovexattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fsetxattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fstat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fstat64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fstatat64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fstatfs",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fstatfs64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "fsync",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "ftruncate",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "ftruncate64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "futex",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "futimesat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getcpu",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getcwd",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getdents",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getdents64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getegid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getegid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "geteuid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "geteuid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getgid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getgid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getgroups",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getgroups32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getitimer",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getpeername",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getpgid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getpgrp",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getpid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getppid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getpriority",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getrandom",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getresgid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getresgid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getresuid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getresuid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getrlimit",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "get_robust_list",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getrusage",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getsid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getsockname",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getsockopt",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "get_thread_area",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "gettid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "gettimeofday",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getuid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getuid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "getxattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "inotify_add_watch",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "inotify_init",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "inotify_init1",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "inotify_rm_watch",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "io_cancel",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "ioctl",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "io_destroy",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "io_getevents",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "ioprio_get",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "ioprio_set",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "io_setup",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "io_submit",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "kill",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "lchown",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "lchown32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "lgetxattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "link",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "linkat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "listen",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "listxattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "llistxattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "_llseek",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "lremovexattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "lseek",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "lsetxattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "lstat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "lstat64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "madvise",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "memfd_create",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mincore",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mkdir",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mkdirat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mknod",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mknodat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mlock",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mlockall",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mmap",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mmap2",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mprotect",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mq_getsetattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mq_notify",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mq_open",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mq_timedreceive",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mq_timedsend",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mq_unlink",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "mremap",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "msgctl",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "msgget",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "msgrcv",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "msgsnd",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "msync",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "munlock",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "munlockall",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "munmap",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "nanosleep",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "newfstatat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "_newselect",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "open",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "openat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "pause",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "pipe",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "pipe2",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "poll",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "ppoll",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "prctl",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "pread64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "preadv",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "prlimit64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "pselect6",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "pwrite64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "pwritev",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "read",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "readahead",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "readlink",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "readlinkat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "readv",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "recv",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "recvfrom",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "recvmmsg",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "recvmsg",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "remap_file_pages",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "removexattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rename",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "renameat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "renameat2",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rmdir",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rt_sigaction",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rt_sigpending",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rt_sigprocmask",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rt_sigqueueinfo",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rt_sigreturn",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rt_sigsuspend",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rt_sigtimedwait",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "rt_tgsigqueueinfo",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_getaffinity",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_getattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_getparam",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_get_priority_max",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_get_priority_min",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_getscheduler",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_rr_get_interval",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_setaffinity",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_setattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_setparam",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_setscheduler",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sched_yield",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "seccomp",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "select",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "semctl",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "semget",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "semop",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "semtimedop",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "send",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sendfile",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sendfile64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sendmmsg",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sendmsg",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sendto",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setdomainname",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setfsgid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setfsgid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setfsuid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setfsuid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setgid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setgid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setgroups",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setgroups32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sethostname",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setitimer",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setpgid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setpriority",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setregid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setregid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setresgid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setresgid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setresuid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setresuid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setreuid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setreuid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setrlimit",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "set_robust_list",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setsid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setsockopt",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "set_thread_area",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "set_tid_address",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setuid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setuid32",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "setxattr",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "shmat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "shmctl",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "shmdt",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "shmget",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "shutdown",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sigaltstack",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "signalfd",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "signalfd4",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sigreturn",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "socket",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "socketpair",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "splice",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "stat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "stat64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "statfs",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "statfs64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "symlink",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "symlinkat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sync",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sync_file_range",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "syncfs",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "sysinfo",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "syslog",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "tee",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "tgkill",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "time",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "timer_create",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "timer_delete",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "timerfd_create",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "timerfd_gettime",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "timerfd_settime",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "timer_getoverrun",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "timer_gettime",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "timer_settime",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "times",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "tkill",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "truncate",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "truncate64",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "ugetrlimit",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "umask",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "uname",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "unlink",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "unlinkat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "utime",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "utimensat",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "utimes",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "vfork",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "vhangup",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "vmsplice",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "wait4",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "waitid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "waitpid",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "write",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "writev",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		// i386 specific syscalls
+		{
+			Name:   "modify_ldt",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		// arm specific syscalls
+		{
+			Name:   "breakpoint",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "cacheflush",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+		{
+			Name:   "set_tls",
+			Action: specs.ActAllow,
+			Args:   []specs.Arg{},
+		},
+	},
+}

+ 12 - 0
daemon/seccomp_disabled.go

@@ -0,0 +1,12 @@
+// +build !seccomp,!windows
+
+package daemon
+
+import (
+	"github.com/docker/docker/container"
+	"github.com/opencontainers/specs/specs-go"
+)
+
+func setSeccomp(daemon *Daemon, rs *specs.Spec, c *container.Container) error {
+	return nil
+}

+ 100 - 0
daemon/seccomp_linux.go

@@ -0,0 +1,100 @@
+// +build linux,seccomp
+
+package daemon
+
+import (
+	"encoding/json"
+	"fmt"
+
+	"github.com/Sirupsen/logrus"
+	"github.com/docker/docker/container"
+	"github.com/docker/engine-api/types"
+	"github.com/opencontainers/specs/specs-go"
+)
+
+func setSeccomp(daemon *Daemon, rs *specs.Spec, c *container.Container) error {
+	var seccomp *specs.Seccomp
+	var err error
+
+	if c.HostConfig.Privileged {
+		return nil
+	}
+
+	if !daemon.seccompEnabled {
+		if c.SeccompProfile != "" && c.SeccompProfile != "unconfined" {
+			return fmt.Errorf("Seccomp is not enabled in your kernel, cannot run a custom seccomp profile.")
+		}
+		logrus.Warn("Seccomp is not enabled in your kernel, running container without default profile.")
+		c.SeccompProfile = "unconfined"
+	}
+	if c.SeccompProfile == "unconfined" {
+		return nil
+	}
+	if c.SeccompProfile != "" {
+		seccomp, err = loadSeccompProfile(c.SeccompProfile)
+		if err != nil {
+			return err
+		}
+	} else {
+		seccomp = &defaultSeccompProfile
+	}
+
+	rs.Linux.Seccomp = seccomp
+	return nil
+}
+
+func loadSeccompProfile(body string) (*specs.Seccomp, error) {
+	var config types.Seccomp
+	if err := json.Unmarshal([]byte(body), &config); err != nil {
+		return nil, fmt.Errorf("Decoding seccomp profile failed: %v", err)
+	}
+
+	return setupSeccomp(&config)
+}
+
+func setupSeccomp(config *types.Seccomp) (newConfig *specs.Seccomp, err error) {
+	if config == nil {
+		return nil, nil
+	}
+
+	// No default action specified, no syscalls listed, assume seccomp disabled
+	if config.DefaultAction == "" && len(config.Syscalls) == 0 {
+		return nil, nil
+	}
+
+	newConfig = &specs.Seccomp{}
+
+	// if config.Architectures == 0 then libseccomp will figure out the architecture to use
+	if len(config.Architectures) > 0 {
+		// newConfig.Architectures = []string{}
+		for _, arch := range config.Architectures {
+			newConfig.Architectures = append(newConfig.Architectures, specs.Arch(arch))
+		}
+	}
+
+	newConfig.DefaultAction = specs.Action(config.DefaultAction)
+
+	// Loop through all syscall blocks and convert them to libcontainer format
+	for _, call := range config.Syscalls {
+		newCall := specs.Syscall{
+			Name:   call.Name,
+			Action: specs.Action(call.Action),
+		}
+
+		// Loop through all the arguments of the syscall and convert them
+		for _, arg := range call.Args {
+			newArg := specs.Arg{
+				Index:    arg.Index,
+				Value:    arg.Value,
+				ValueTwo: arg.ValueTwo,
+				Op:       specs.Operator(arg.Op),
+			}
+
+			newCall.Args = append(newCall.Args, newArg)
+		}
+
+		newConfig.Syscalls = append(newConfig.Syscalls, newCall)
+	}
+
+	return newConfig, nil
+}

+ 28 - 27
daemon/start.go

@@ -4,10 +4,13 @@ import (
 	"fmt"
 	"net/http"
 	"runtime"
+	"strings"
+	"syscall"
 
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/errors"
+	"github.com/docker/docker/libcontainerd"
 	"github.com/docker/docker/runconfig"
 	containertypes "github.com/docker/engine-api/types/container"
 )
@@ -122,44 +125,36 @@ func (daemon *Daemon) containerStart(container *container.Container) (err error)
 	if err := daemon.initializeNetworking(container); err != nil {
 		return err
 	}
-	linkedEnv, err := daemon.setupLinkedContainers(container)
+
+	spec, err := daemon.createSpec(container)
 	if err != nil {
 		return err
 	}
-	rootUID, rootGID := daemon.GetRemappedUIDGID()
-	if err := container.SetupWorkingDirectory(rootUID, rootGID); err != nil {
-		return err
-	}
-	env := container.CreateDaemonEnvironment(linkedEnv)
-	if err := daemon.populateCommand(container, env); err != nil {
-		return err
-	}
 
-	if !container.HostConfig.IpcMode.IsContainer() && !container.HostConfig.IpcMode.IsHost() {
-		if err := daemon.setupIpcDirs(container); err != nil {
-			return err
+	defer daemon.LogContainerEvent(container, "start") // this is logged even on error
+	if err := daemon.containerd.Create(container.ID, *spec, libcontainerd.WithRestartManager(container.RestartManager(true))); err != nil {
+		// if we receive an internal error from the initial start of a container then lets
+		// return it instead of entering the restart loop
+		// set to 127 for container cmd not found/does not exist)
+		if strings.Contains(err.Error(), "executable file not found") ||
+			strings.Contains(err.Error(), "no such file or directory") ||
+			strings.Contains(err.Error(), "system cannot find the file specified") {
+			container.ExitCode = 127
+			err = fmt.Errorf("Container command not found or does not exist.")
+		}
+		// set to 126 for container cmd can't be invoked errors
+		if strings.Contains(err.Error(), syscall.EACCES.Error()) {
+			container.ExitCode = 126
+			err = fmt.Errorf("Container command could not be invoked.")
 		}
-	}
 
-	mounts, err := daemon.setupMounts(container)
-	if err != nil {
+		container.Reset(false)
 		return err
 	}
-	mounts = append(mounts, container.IpcMounts()...)
-	mounts = append(mounts, container.TmpfsMounts()...)
 
-	container.Command.Mounts = mounts
-	if err := daemon.waitForStart(container); err != nil {
-		return err
-	}
-	container.HasBeenStartedBefore = true
 	return nil
 }
 
-func (daemon *Daemon) waitForStart(container *container.Container) error {
-	return container.StartMonitor(daemon)
-}
-
 // Cleanup releases any network resources allocated to the container along with any rules
 // around how containers are linked together.  It also unmounts the container's root filesystem.
 func (daemon *Daemon) Cleanup(container *container.Container) {
@@ -167,7 +162,13 @@ func (daemon *Daemon) Cleanup(container *container.Container) {
 
 	container.UnmountIpcMounts(detachMounted)
 
-	daemon.conditionalUnmountOnCleanup(container)
+	if err := daemon.conditionalUnmountOnCleanup(container); err != nil {
+		// FIXME: remove once reference counting for graphdrivers has been refactored
+		// Ensure that all the mounts are gone
+		if mountid, err := daemon.layerStore.GetMountID(container.ID); err == nil {
+			daemon.cleanupMountsByID(mountid)
+		}
+	}
 
 	for _, eConfig := range container.ExecCommands.Commands() {
 		daemon.unregisterExecCommand(container, eConfig)

+ 2 - 6
daemon/stats.go

@@ -6,7 +6,6 @@ import (
 	"runtime"
 
 	"github.com/docker/docker/api/types/backend"
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/pkg/ioutils"
 	"github.com/docker/docker/pkg/version"
 	"github.com/docker/engine-api/types"
@@ -42,12 +41,9 @@ func (daemon *Daemon) ContainerStats(prefixOrName string, config *backend.Contai
 
 	var preCPUStats types.CPUStats
 	getStatJSON := func(v interface{}) *types.StatsJSON {
-		update := v.(*execdriver.ResourceStats)
-		ss := convertStatsToAPITypes(update.Stats)
+		ss := v.(*types.StatsJSON)
 		ss.PreCPUStats = preCPUStats
-		ss.MemoryStats.Limit = uint64(update.MemoryLimit)
-		ss.Read = update.Read
-		ss.CPUStats.SystemUsage = update.SystemUsage
+		// ss.MemoryStats.Limit = uint64(update.MemoryLimit)
 		preCPUStats = ss.CPUStats
 		return ss
 	}

+ 5 - 4
daemon/stats_collector_unix.go

@@ -13,14 +13,14 @@ import (
 
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/docker/container"
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/pkg/pubsub"
+	"github.com/docker/engine-api/types"
 	"github.com/opencontainers/runc/libcontainer/system"
 )
 
 type statsSupervisor interface {
 	// GetContainerStats collects all the stats related to a container
-	GetContainerStats(container *container.Container) (*execdriver.ResourceStats, error)
+	GetContainerStats(container *container.Container) (*types.StatsJSON, error)
 }
 
 // newStatsCollector returns a new statsCollector that collections
@@ -120,12 +120,13 @@ func (s *statsCollector) run() {
 		for _, pair := range pairs {
 			stats, err := s.supervisor.GetContainerStats(pair.container)
 			if err != nil {
-				if err != execdriver.ErrNotRunning {
+				if err, ok := err.(errNotRunning); ok {
 					logrus.Errorf("collecting stats for %s: %v", pair.container.ID, err)
 				}
 				continue
 			}
-			stats.SystemUsage = systemUsage
+			// FIXME: move to containerd
+			stats.CPUStats.SystemUsage = systemUsage
 
 			pair.publisher.Publish(stats)
 		}

+ 0 - 84
daemon/stats_linux.go

@@ -1,84 +0,0 @@
-package daemon
-
-import (
-	"github.com/docker/engine-api/types"
-	"github.com/opencontainers/runc/libcontainer"
-	"github.com/opencontainers/runc/libcontainer/cgroups"
-)
-
-// convertStatsToAPITypes converts the libcontainer.Stats to the api specific
-// structs. This is done to preserve API compatibility and versioning.
-func convertStatsToAPITypes(ls *libcontainer.Stats) *types.StatsJSON {
-	s := &types.StatsJSON{}
-	if ls.Interfaces != nil {
-		s.Networks = make(map[string]types.NetworkStats)
-		for _, iface := range ls.Interfaces {
-			// For API Version >= 1.21, the original data of network will
-			// be returned.
-			s.Networks[iface.Name] = types.NetworkStats{
-				RxBytes:   iface.RxBytes,
-				RxPackets: iface.RxPackets,
-				RxErrors:  iface.RxErrors,
-				RxDropped: iface.RxDropped,
-				TxBytes:   iface.TxBytes,
-				TxPackets: iface.TxPackets,
-				TxErrors:  iface.TxErrors,
-				TxDropped: iface.TxDropped,
-			}
-		}
-	}
-
-	cs := ls.CgroupStats
-	if cs != nil {
-		s.BlkioStats = types.BlkioStats{
-			IoServiceBytesRecursive: copyBlkioEntry(cs.BlkioStats.IoServiceBytesRecursive),
-			IoServicedRecursive:     copyBlkioEntry(cs.BlkioStats.IoServicedRecursive),
-			IoQueuedRecursive:       copyBlkioEntry(cs.BlkioStats.IoQueuedRecursive),
-			IoServiceTimeRecursive:  copyBlkioEntry(cs.BlkioStats.IoServiceTimeRecursive),
-			IoWaitTimeRecursive:     copyBlkioEntry(cs.BlkioStats.IoWaitTimeRecursive),
-			IoMergedRecursive:       copyBlkioEntry(cs.BlkioStats.IoMergedRecursive),
-			IoTimeRecursive:         copyBlkioEntry(cs.BlkioStats.IoTimeRecursive),
-			SectorsRecursive:        copyBlkioEntry(cs.BlkioStats.SectorsRecursive),
-		}
-		cpu := cs.CpuStats
-		s.CPUStats = types.CPUStats{
-			CPUUsage: types.CPUUsage{
-				TotalUsage:        cpu.CpuUsage.TotalUsage,
-				PercpuUsage:       cpu.CpuUsage.PercpuUsage,
-				UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode,
-				UsageInUsermode:   cpu.CpuUsage.UsageInUsermode,
-			},
-			ThrottlingData: types.ThrottlingData{
-				Periods:          cpu.ThrottlingData.Periods,
-				ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods,
-				ThrottledTime:    cpu.ThrottlingData.ThrottledTime,
-			},
-		}
-		mem := cs.MemoryStats
-		s.MemoryStats = types.MemoryStats{
-			Usage:    mem.Usage.Usage,
-			MaxUsage: mem.Usage.MaxUsage,
-			Stats:    mem.Stats,
-			Failcnt:  mem.Usage.Failcnt,
-		}
-		pids := cs.PidsStats
-		s.PidsStats = types.PidsStats{
-			Current: pids.Current,
-		}
-	}
-
-	return s
-}
-
-func copyBlkioEntry(entries []cgroups.BlkioStatEntry) []types.BlkioStatEntry {
-	out := make([]types.BlkioStatEntry, len(entries))
-	for i, re := range entries {
-		out[i] = types.BlkioStatEntry{
-			Major: re.Major,
-			Minor: re.Minor,
-			Op:    re.Op,
-			Value: re.Value,
-		}
-	}
-	return out
-}

+ 0 - 14
daemon/stats_windows.go

@@ -1,14 +0,0 @@
-package daemon
-
-import (
-	"github.com/docker/engine-api/types"
-	"github.com/opencontainers/runc/libcontainer"
-)
-
-// convertStatsToAPITypes converts the libcontainer.Stats to the api specific
-// structs. This is done to preserve API compatibility and versioning.
-func convertStatsToAPITypes(ls *libcontainer.Stats) *types.StatsJSON {
-	// TODO Windows. Refactor accordingly to fill in stats.
-	s := &types.StatsJSON{}
-	return s
-}

+ 2 - 1
daemon/top_unix.go

@@ -33,7 +33,8 @@ func (daemon *Daemon) ContainerTop(name string, psArgs string) (*types.Container
 	if container.IsRestarting() {
 		return nil, errContainerIsRestarting(container.ID)
 	}
-	pids, err := daemon.ExecutionDriver().GetPidsForContainer(container.ID)
+
+	pids, err := daemon.containerd.GetPidsForContainer(container.ID)
 	if err != nil {
 		return nil, err
 	}

+ 1 - 3
daemon/unpause.go

@@ -35,11 +35,9 @@ func (daemon *Daemon) containerUnpause(container *container.Container) error {
 		return fmt.Errorf("Container %s is not paused", container.ID)
 	}
 
-	if err := daemon.execDriver.Unpause(container.Command); err != nil {
+	if err := daemon.containerd.Resume(container.ID); err != nil {
 		return fmt.Errorf("Cannot unpause container %s: %s", container.ID, err)
 	}
 
-	container.Paused = false
-	daemon.LogContainerEvent(container, "unpause")
 	return nil
 }

+ 1 - 1
daemon/update.go

@@ -84,7 +84,7 @@ func (daemon *Daemon) update(name string, hostConfig *container.HostConfig) erro
 	// If container is running (including paused), we need to update configs
 	// to the real world.
 	if container.IsRunning() && !container.IsRestarting() {
-		if err := daemon.execDriver.Update(container.Command); err != nil {
+		if err := daemon.containerd.UpdateResources(container.ID, toContainerdResources(hostConfig.Resources)); err != nil {
 			restoreConfig = true
 			return errCannotUpdate(container.ID, err)
 		}

+ 25 - 0
daemon/update_linux.go

@@ -0,0 +1,25 @@
+// +build linux
+
+package daemon
+
+import (
+	"github.com/docker/docker/libcontainerd"
+	"github.com/docker/engine-api/types/container"
+)
+
+func toContainerdResources(resources container.Resources) libcontainerd.Resources {
+	var r libcontainerd.Resources
+	r.BlkioWeight = uint32(resources.BlkioWeight)
+	r.CpuShares = uint32(resources.CPUShares)
+	r.CpuPeriod = uint32(resources.CPUPeriod)
+	r.CpuQuota = uint32(resources.CPUQuota)
+	r.CpusetCpus = resources.CpusetCpus
+	r.CpusetMems = resources.CpusetMems
+	r.MemoryLimit = uint32(resources.Memory)
+	if resources.MemorySwap > 0 {
+		r.MemorySwap = uint32(resources.MemorySwap)
+	}
+	r.MemoryReservation = uint32(resources.MemoryReservation)
+	r.KernelMemoryLimit = uint32(resources.KernelMemory)
+	return r
+}

+ 13 - 0
daemon/update_windows.go

@@ -0,0 +1,13 @@
+// +build windows
+
+package daemon
+
+import (
+	"github.com/docker/docker/libcontainerd"
+	"github.com/docker/engine-api/types/container"
+)
+
+func toContainerdResources(resources container.Resources) libcontainerd.Resources {
+	var r libcontainerd.Resources
+	return r
+}

+ 1 - 2
daemon/volumes.go

@@ -8,7 +8,6 @@ import (
 	"strings"
 
 	"github.com/docker/docker/container"
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/volume"
 	"github.com/docker/engine-api/types"
 	containertypes "github.com/docker/engine-api/types/container"
@@ -21,7 +20,7 @@ var (
 	ErrVolumeReadonly = errors.New("mounted volume is marked read-only")
 )
 
-type mounts []execdriver.Mount
+type mounts []container.Mount
 
 // volumeToAPIType converts a volume.Volume to the type used by the remote API
 func volumeToAPIType(v volume.Volume) *types.Volume {

+ 9 - 10
daemon/volumes_unix.go

@@ -8,25 +8,24 @@ import (
 	"strconv"
 
 	"github.com/docker/docker/container"
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/volume"
 )
 
 // setupMounts iterates through each of the mount points for a container and
 // calls Setup() on each. It also looks to see if is a network mount such as
 // /etc/resolv.conf, and if it is not, appends it to the array of mounts.
-func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver.Mount, error) {
-	var mounts []execdriver.Mount
-	for _, m := range container.MountPoints {
-		if err := daemon.lazyInitializeVolume(container.ID, m); err != nil {
+func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, error) {
+	var mounts []container.Mount
+	for _, m := range c.MountPoints {
+		if err := daemon.lazyInitializeVolume(c.ID, m); err != nil {
 			return nil, err
 		}
 		path, err := m.Setup()
 		if err != nil {
 			return nil, err
 		}
-		if !container.TrySetNetworkMount(m.Destination, path) {
-			mnt := execdriver.Mount{
+		if !c.TrySetNetworkMount(m.Destination, path) {
+			mnt := container.Mount{
 				Source:      path,
 				Destination: m.Destination,
 				Writable:    m.RW,
@@ -35,7 +34,7 @@ func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver.
 			if m.Volume != nil {
 				attributes := map[string]string{
 					"driver":      m.Volume.DriverName(),
-					"container":   container.ID,
+					"container":   c.ID,
 					"destination": m.Destination,
 					"read/write":  strconv.FormatBool(m.RW),
 					"propagation": m.Propagation,
@@ -47,7 +46,7 @@ func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver.
 	}
 
 	mounts = sortMounts(mounts)
-	netMounts := container.NetworkMounts()
+	netMounts := c.NetworkMounts()
 	// if we are going to mount any of the network files from container
 	// metadata, the ownership must be set properly for potential container
 	// remapped root (user namespaces)
@@ -63,7 +62,7 @@ func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver.
 // sortMounts sorts an array of mounts in lexicographic order. This ensure that
 // when mounting, the mounts don't shadow other mounts. For example, if mounting
 // /etc and /etc/resolv.conf, /etc/resolv.conf must not be mounted first.
-func sortMounts(m []execdriver.Mount) []execdriver.Mount {
+func sortMounts(m []container.Mount) []container.Mount {
 	sort.Sort(mounts(m))
 	return m
 }

+ 11 - 7
daemon/volumes_windows.go

@@ -7,18 +7,22 @@ import (
 	"sort"
 
 	"github.com/docker/docker/container"
-	"github.com/docker/docker/daemon/execdriver"
 	"github.com/docker/docker/volume"
 )
 
 // setupMounts configures the mount points for a container by appending each
-// of the configured mounts on the container to the execdriver mount structure
+// of the configured mounts on the container to the oci mount structure
 // which will ultimately be passed into the exec driver during container creation.
 // It also ensures each of the mounts are lexographically sorted.
-func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver.Mount, error) {
-	var mnts []execdriver.Mount
-	for _, mount := range container.MountPoints { // type is volume.MountPoint
-		if err := daemon.lazyInitializeVolume(container.ID, mount); err != nil {
+
+// BUGBUG TODO Windows containerd. This would be much better if it returned
+// an array of windowsoci mounts, not container mounts. Then no need to
+// do multiple transitions.
+
+func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, error) {
+	var mnts []container.Mount
+	for _, mount := range c.MountPoints { // type is volume.MountPoint
+		if err := daemon.lazyInitializeVolume(c.ID, mount); err != nil {
 			return nil, err
 		}
 		// If there is no source, take it from the volume path
@@ -29,7 +33,7 @@ func (daemon *Daemon) setupMounts(container *container.Container) ([]execdriver.
 		if s == "" {
 			return nil, fmt.Errorf("No source for mount name '%s' driver %q destination '%s'", mount.Name, mount.Driver, mount.Destination)
 		}
-		mnts = append(mnts, execdriver.Mount{
+		mnts = append(mnts, container.Mount{
 			Source:      s,
 			Destination: mount.Destination,
 			Writable:    mount.RW,

+ 3 - 2
distribution/xfer/download_test.go

@@ -112,12 +112,13 @@ func (ls *mockLayerStore) CreateRWLayer(string, layer.ChainID, string, layer.Mou
 
 func (ls *mockLayerStore) GetRWLayer(string) (layer.RWLayer, error) {
 	return nil, errors.New("not implemented")
-
 }
 
 func (ls *mockLayerStore) ReleaseRWLayer(layer.RWLayer) ([]layer.Metadata, error) {
 	return nil, errors.New("not implemented")
-
+}
+func (ls *mockLayerStore) GetMountID(string) (string, error) {
+	return "", errors.New("not implemented")
 }
 
 func (ls *mockLayerStore) Cleanup() error {

+ 9 - 2
docker/daemon.go

@@ -29,6 +29,7 @@ import (
 	"github.com/docker/docker/daemon/logger"
 	"github.com/docker/docker/docker/listeners"
 	"github.com/docker/docker/dockerversion"
+	"github.com/docker/docker/libcontainerd"
 	"github.com/docker/docker/opts"
 	"github.com/docker/docker/pkg/jsonlog"
 	flag "github.com/docker/docker/pkg/mflag"
@@ -264,7 +265,13 @@ func (cli *DaemonCli) CmdDaemon(args ...string) error {
 	cli.TrustKeyPath = commonFlags.TrustKey
 
 	registryService := registry.NewService(cli.Config.ServiceOptions)
-	d, err := daemon.NewDaemon(cli.Config, registryService)
+
+	containerdRemote, err := libcontainerd.New(filepath.Join(cli.Config.ExecRoot, "libcontainerd"), cli.getPlatformRemoteOptions()...)
+	if err != nil {
+		logrus.Fatal(err)
+	}
+
+	d, err := daemon.NewDaemon(cli.Config, registryService, containerdRemote)
 	if err != nil {
 		if pfile != nil {
 			if err := pfile.Remove(); err != nil {
@@ -279,7 +286,6 @@ func (cli *DaemonCli) CmdDaemon(args ...string) error {
 	logrus.WithFields(logrus.Fields{
 		"version":     dockerversion.Version,
 		"commit":      dockerversion.GitCommit,
-		"execdriver":  d.ExecutionDriver().Name(),
 		"graphdriver": d.GraphDriverName(),
 	}).Info("Docker daemon")
 
@@ -330,6 +336,7 @@ func (cli *DaemonCli) CmdDaemon(args ...string) error {
 	// Wait for serve API to complete
 	errAPI := <-serveAPIWait
 	shutdownDaemon(d, 15)
+	containerdRemote.Cleanup()
 	if errAPI != nil {
 		if pfile != nil {
 			if err := pfile.Remove(); err != nil {

Някои файлове не бяха показани, защото твърде много файлове са промени