瀏覽代碼

Add init process for zombie fighting

This adds a small C binary for fighting zombies.  It is mounted under
`/dev/init` and is prepended to the args specified by the user.  You
enable it via a daemon flag, `dockerd --init`, as it is disable by
default for backwards compat.

You can also override the daemon option or specify this on a per
container basis with `docker run --init=true|false`.

You can test this by running a process like this as the pid 1 in a
container and see the extra zombie that appears in the container as it
is running.

```c

int main(int argc, char ** argv) {
	pid_t pid = fork();
	if (pid == 0) {
		pid = fork();
		if (pid == 0) {
			exit(0);
		}
		sleep(3);
		exit(0);
	}
	printf("got pid %d and exited\n", pid);
	sleep(20);
}
```

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
Michael Crosby 9 年之前
父節點
當前提交
ee3ac3aa66

+ 10 - 0
Dockerfile

@@ -255,6 +255,16 @@ RUN set -x \
 	&& cp bin/ctr /usr/local/bin/docker-containerd-ctr \
 	&& rm -rf "$GOPATH"
 
+ENV GRIMES_COMMIT f207601a8d19a534cc90d9e26e037e9931ccb9db
+RUN set -x \
+    && export GOPATH="$(mktemp -d)" \
+	&& git clone https://github.com/crosbymichael/grimes.git "$GOPATH/grimes" \
+	&& cd "$GOPATH/grimes" \
+	&& git checkout -q "$GRIMES_COMMIT" \
+	&& make \
+	&& cp init /usr/local/bin/docker-init \
+	&& rm -rf "$GOPATH"
+
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 ENTRYPOINT ["hack/dind"]
 

+ 10 - 0
Dockerfile.aarch64

@@ -198,6 +198,16 @@ RUN set -x \
 	&& cp bin/ctr /usr/local/bin/docker-containerd-ctr \
 	&& rm -rf "$GOPATH"
 
+ENV GRIMES_COMMIT f207601a8d19a534cc90d9e26e037e9931ccb9db
+RUN set -x \
+    && export GOPATH="$(mktemp -d)" \
+	&& git clone https://github.com/crosbymichael/grimes.git "$GOPATH/grimes" \
+	&& cd "$GOPATH/grimes" \
+	&& git checkout -q "$GRIMES_COMMIT" \
+	&& make \
+	&& cp init /usr/local/bin/docker-init \
+	&& rm -rf "$GOPATH"
+
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 ENTRYPOINT ["hack/dind"]
 

+ 10 - 0
Dockerfile.armhf

@@ -196,6 +196,16 @@ RUN set -x \
 	&& cp bin/ctr /usr/local/bin/docker-containerd-ctr \
 	&& rm -rf "$GOPATH"
 
+ENV GRIMES_COMMIT f207601a8d19a534cc90d9e26e037e9931ccb9db
+RUN set -x \
+    && export GOPATH="$(mktemp -d)" \
+	&& git clone https://github.com/crosbymichael/grimes.git "$GOPATH/grimes" \
+	&& cd "$GOPATH/grimes" \
+	&& git checkout -q "$GRIMES_COMMIT" \
+	&& make \
+	&& cp init /usr/local/bin/docker-init \
+	&& rm -rf "$GOPATH"
+
 ENTRYPOINT ["hack/dind"]
 
 # Upload docker source

+ 10 - 0
Dockerfile.ppc64le

@@ -216,6 +216,16 @@ RUN set -x \
 	&& cp bin/ctr /usr/local/bin/docker-containerd-ctr \
 	&& rm -rf "$GOPATH"
 
+ENV GRIMES_COMMIT f207601a8d19a534cc90d9e26e037e9931ccb9db
+RUN set -x \
+    && export GOPATH="$(mktemp -d)" \
+	&& git clone https://github.com/crosbymichael/grimes.git "$GOPATH/grimes" \
+	&& cd "$GOPATH/grimes" \
+	&& git checkout -q "$GRIMES_COMMIT" \
+	&& make \
+	&& cp init /usr/local/bin/docker-init \
+	&& rm -rf "$GOPATH"
+
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 ENTRYPOINT ["hack/dind"]
 

+ 10 - 0
Dockerfile.s390x

@@ -208,6 +208,16 @@ RUN set -x \
 	&& cp bin/ctr /usr/local/bin/docker-containerd-ctr \
 	&& rm -rf "$GOPATH"
 
+ENV GRIMES_COMMIT f207601a8d19a534cc90d9e26e037e9931ccb9db
+RUN set -x \
+    && export GOPATH="$(mktemp -d)" \
+	&& git clone https://github.com/crosbymichael/grimes.git "$GOPATH/grimes" \
+	&& cd "$GOPATH/grimes" \
+	&& git checkout -q "$GRIMES_COMMIT" \
+	&& make \
+	&& cp init /usr/local/bin/docker-init \
+	&& rm -rf "$GOPATH"
+
 # Wrap all commands in the "docker-in-docker" script to allow nested containers
 ENTRYPOINT ["hack/dind"]
 

+ 10 - 0
Dockerfile.simple

@@ -80,6 +80,16 @@ RUN set -x \
 	&& cp bin/ctr /usr/local/bin/docker-containerd-ctr \
 	&& rm -rf "$GOPATH"
 
+ENV GRIMES_COMMIT f207601a8d19a534cc90d9e26e037e9931ccb9db
+RUN set -x \
+    && export GOPATH="$(mktemp -d)" \
+	&& git clone https://github.com/crosbymichael/grimes.git "$GOPATH/grimes" \
+	&& cd "$GOPATH/grimes" \
+	&& git checkout -q "$GRIMES_COMMIT" \
+	&& make \
+	&& cp init /usr/local/bin/docker-init \
+	&& rm -rf "$GOPATH"
+
 ENV AUTO_GOPATH 1
 WORKDIR /usr/src/docker
 COPY . /usr/src/docker

+ 3 - 0
api/types/container/host_config.go

@@ -321,6 +321,9 @@ type HostConfig struct {
 
 	// Mounts specs used by the container
 	Mounts []mount.Mount `json:",omitempty"`
+
+	// Run a custom init inside the container, if null, use the daemon's configured settings
+	Init *bool `json:",om        itempty"`
 }
 
 // Box specifies height and width dimensions. Used for sizing of a console.

+ 2 - 0
daemon/config_unix.go

@@ -35,6 +35,7 @@ type Config struct {
 	Runtimes             map[string]types.Runtime `json:"runtimes,omitempty"`
 	DefaultRuntime       string                   `json:"default-runtime,omitempty"`
 	OOMScoreAdjust       int                      `json:"oom-score-adjust,omitempty"`
+	Init                 bool                     `json:"init,omitempty"`
 }
 
 // bridgeConfig stores all the bridge driver specific
@@ -91,6 +92,7 @@ func (config *Config) InstallFlags(flags *pflag.FlagSet) {
 	flags.Var(runconfigopts.NewNamedRuntimeOpt("runtimes", &config.Runtimes, stockRuntimeName), "add-runtime", "Register an additional OCI compatible runtime")
 	flags.StringVar(&config.DefaultRuntime, "default-runtime", stockRuntimeName, "Default OCI runtime for containers")
 	flags.IntVar(&config.OOMScoreAdjust, "oom-score-adjust", -500, "Set the oom_score_adj for the daemon")
+	flags.BoolVar(&config.Init, "init", false, "Run an init in the container to forward signals and reap processes")
 
 	config.attachExperimentalFlags(flags)
 }

+ 21 - 0
daemon/oci_linux.go

@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"io"
 	"os"
+	"os/exec"
 	"path/filepath"
 	"sort"
 	"strconv"
@@ -585,6 +586,26 @@ func (daemon *Daemon) populateCommonSpec(s *specs.Spec, c *container.Container)
 		cwd = "/"
 	}
 	s.Process.Args = append([]string{c.Path}, c.Args...)
+
+	// only add the custom init if it is specified and the container is running in its
+	// own private pid namespace.  It does not make sense to add if it is running in the
+	// host namespace or another container's pid namespace where we already have an init
+	if c.HostConfig.PidMode.IsPrivate() {
+		if (c.HostConfig.Init != nil && *c.HostConfig.Init) ||
+			(c.HostConfig.Init == nil && daemon.configStore.Init) {
+			s.Process.Args = append([]string{"/dev/init", c.Path}, c.Args...)
+			path, err := exec.LookPath("docker-init")
+			if err != nil {
+				return err
+			}
+			s.Mounts = append(s.Mounts, specs.Mount{
+				Destination: "/dev/init",
+				Type:        "bind",
+				Source:      path,
+				Options:     []string{"bind", "ro"},
+			})
+		}
+	}
 	s.Process.Cwd = cwd
 	s.Process.Env = c.CreateDaemonEnvironment(linkedEnv)
 	s.Process.Terminal = c.Config.Tty

+ 2 - 0
docs/reference/commandline/dockerd.md

@@ -48,6 +48,7 @@ Options:
       -H, --host=[]                          Daemon socket(s) to connect to
       --help                                 Print usage
       --icc=true                             Enable inter-container communication
+      --init                                 Run an init inside containers to forward signals and reap processes
       --insecure-registry=[]                 Enable insecure registry communication
       --ip=0.0.0.0                           Default IP when binding container ports
       --ip-forward=true                      Enable net.ipv4.ip_forward
@@ -1140,6 +1141,7 @@ This is a full example of the allowed configuration options on Linux:
 	"group": "",
 	"cgroup-parent": "",
 	"default-ulimits": {},
+	"init": false,
 	"ipv6": false,
 	"iptables": false,
 	"ip-forward": false,

+ 2 - 2
hack/make.sh

@@ -255,7 +255,7 @@ bundle() {
 	source "$SCRIPTDIR/make/$bundle" "$@"
 }
 
-copy_containerd() {
+copy_binaries() {
 	dir="$1"
 	# Add nested executables to bundle dir so we have complete set of
 	# them available, but only if the native OS/ARCH is the same as the
@@ -263,7 +263,7 @@ copy_containerd() {
 	if [ "$(go env GOOS)/$(go env GOARCH)" == "$(go env GOHOSTOS)/$(go env GOHOSTARCH)" ]; then
 		if [ -x /usr/local/bin/docker-runc ]; then
 			echo "Copying nested executables into $dir"
-			for file in containerd containerd-shim containerd-ctr runc; do
+			for file in containerd containerd-shim containerd-ctr runc init; do
 				cp `which "docker-$file"` "$dir/"
 				if [ "$2" == "hash" ]; then
 					hash_files "$dir/docker-$file"

+ 1 - 0
hack/make/.binary-setup

@@ -7,3 +7,4 @@ DOCKER_CONTAINERD_BINARY_NAME='docker-containerd'
 DOCKER_CONTAINERD_CTR_BINARY_NAME='docker-containerd-ctr'
 DOCKER_CONTAINERD_SHIM_BINARY_NAME='docker-containerd-shim'
 DOCKER_PROXY_BINARY_NAME='docker-proxy'
+DOCKER_INIT_BINARY_NAME='docker-init'

+ 1 - 1
hack/make/binary-daemon

@@ -12,5 +12,5 @@ set -e
 	export BINARY_SHORT_NAME="$DOCKER_PROXY_BINARY_NAME"
 	export SOURCE_PATH='./vendor/src/github.com/docker/libnetwork/cmd/proxy'
 	source "${MAKEDIR}/.binary"
-	copy_containerd "$DEST" 'hash'
+	copy_binaries "$DEST" 'hash'
 )

+ 1 - 0
hack/make/install-binary-daemon

@@ -12,4 +12,5 @@ rm -rf "$DEST"
 	install_binary "${DEST}/${DOCKER_CONTAINERD_CTR_BINARY_NAME}"
 	install_binary "${DEST}/${DOCKER_CONTAINERD_SHIM_BINARY_NAME}"
 	install_binary "${DEST}/${DOCKER_PROXY_BINARY_NAME}"
+	install_binary "${DEST}/${DOCKER_INIT_BINARY_NAME}"
 )

+ 2 - 2
hack/make/tgz

@@ -53,8 +53,8 @@ for d in "$CROSS/"*/*; do
 		cp -L "$d/$PROXY_BINARY_FULLNAME" "$TAR_PATH/${DOCKER_PROXY_BINARY_NAME}${BINARY_EXTENSION}"
 	fi
 
-	# copy over all the containerd binaries
-	copy_containerd $TAR_PATH
+	# copy over all the extra binaries
+	copy_binaries $TAR_PATH
 
 	if [ "$IS_TAR" == "true" ]; then
 		echo "Creating tgz from $BUILD_PATH and naming it $TGZ"

+ 0 - 34
integration-cli/docker_cli_events_test.go

@@ -78,40 +78,6 @@ func (s *DockerSuite) TestEventsUntag(c *check.C) {
 	}
 }
 
-func (s *DockerSuite) TestEventsContainerFailStartDie(c *check.C) {
-	_, _, err := dockerCmdWithError("run", "--name", "testeventdie", "busybox", "blerg")
-	c.Assert(err, checker.NotNil, check.Commentf("Container run with command blerg should have failed, but it did not"))
-
-	out, _ := dockerCmd(c, "events", "--since=0", "--until", daemonUnixTime(c))
-	events := strings.Split(strings.TrimSpace(out), "\n")
-
-	nEvents := len(events)
-	c.Assert(nEvents, checker.GreaterOrEqualThan, 1) //Missing expected event
-
-	actions := eventActionsByIDAndType(c, events, "testeventdie", "container")
-
-	var startEvent bool
-	var dieEvent bool
-	for _, a := range actions {
-		switch a {
-		case "start":
-			startEvent = true
-		case "die":
-			dieEvent = true
-		}
-	}
-
-	// Windows platform is different from Linux, it will start container whatever
-	// so Windows can get start/die event but Linux can't
-	if daemonPlatform == "windows" {
-		c.Assert(startEvent, checker.True, check.Commentf("Start event not found: %v\n%v", actions, events))
-		c.Assert(dieEvent, checker.True, check.Commentf("Die event not found: %v\n%v", actions, events))
-	} else {
-		c.Assert(startEvent, checker.False, check.Commentf("Start event not expected: %v\n%v", actions, events))
-		c.Assert(dieEvent, checker.False, check.Commentf("Die event not expected: %v\n%v", actions, events))
-	}
-}
-
 func (s *DockerSuite) TestEventsLimit(c *check.C) {
 	var waitGroup sync.WaitGroup
 	errChan := make(chan error, 17)

+ 0 - 24
integration-cli/docker_cli_run_test.go

@@ -2403,30 +2403,6 @@ func (s *DockerSuite) TestRunExposePort(c *check.C) {
 	c.Assert(out, checker.Contains, "invalid range format for --expose")
 }
 
-func (s *DockerSuite) TestRunUnknownCommand(c *check.C) {
-	out, _, _ := dockerCmdWithStdoutStderr(c, "create", "busybox", "/bin/nada")
-
-	cID := strings.TrimSpace(out)
-	_, _, err := dockerCmdWithError("start", cID)
-
-	// Windows and Linux are different here by architectural design. Linux will
-	// fail to start the container, so an error is expected. Windows will
-	// successfully start the container, and once started attempt to execute
-	// the command which will fail.
-	if daemonPlatform == "windows" {
-		// Wait for it to exit.
-		waitExited(cID, 30*time.Second)
-		c.Assert(err, check.IsNil)
-	} else {
-		c.Assert(err, check.NotNil)
-	}
-
-	rc := inspectField(c, cID, "State.ExitCode")
-	if rc == "0" {
-		c.Fatalf("ExitCode(%v) cannot be 0", rc)
-	}
-}
-
 func (s *DockerSuite) TestRunModeIpcHost(c *check.C) {
 	// Not applicable on Windows as uses Unix-specific capabilities
 	testRequires(c, SameHostDaemon, DaemonIsLinux, NotUserNamespace)

+ 3 - 3
integration-cli/docker_cli_run_unix_test.go

@@ -1234,11 +1234,11 @@ func (s *DockerSuite) TestRunPidsLimit(c *check.C) {
 	testRequires(c, pidsLimit)
 
 	file := "/sys/fs/cgroup/pids/pids.max"
-	out, _ := dockerCmd(c, "run", "--name", "skittles", "--pids-limit", "2", "busybox", "cat", file)
-	c.Assert(strings.TrimSpace(out), checker.Equals, "2")
+	out, _ := dockerCmd(c, "run", "--name", "skittles", "--pids-limit", "4", "busybox", "cat", file)
+	c.Assert(strings.TrimSpace(out), checker.Equals, "4")
 
 	out = inspectField(c, "skittles", "HostConfig.PidsLimit")
-	c.Assert(out, checker.Equals, "2", check.Commentf("setting the pids limit failed"))
+	c.Assert(out, checker.Equals, "4", check.Commentf("setting the pids limit failed"))
 }
 
 func (s *DockerSuite) TestRunPrivilegedAllowedDevices(c *check.C) {

+ 4 - 0
man/dockerd.8.md

@@ -34,6 +34,7 @@ dockerd - Enable daemon mode
 [**-H**|**--host**[=*[]*]]
 [**--help**]
 [**--icc**[=*true*]]
+[**--init**[=*false*]]
 [**--insecure-registry**[=*[]*]]
 [**--ip**[=*0.0.0.0*]]
 [**--ip-forward**[=*true*]]
@@ -166,6 +167,9 @@ unix://[/path/to/socket] to use.
 **--icc**=*true*|*false*
   Allow unrestricted inter\-container and Docker daemon host communication. If disabled, containers can still be linked together using the **--link** option (see **docker-run(1)**). Default is true.
 
+**--init**
+Run an init process inside containers for signal forwarding and process reaping.
+
 **--insecure-registry**=[]
   Enable insecure registry communication, i.e., enable un-encrypted and/or untrusted communication.
 

+ 0 - 1
oci/defaults_linux.go

@@ -59,7 +59,6 @@ func DefaultSpec() specs.Spec {
 			Options:     []string{"nosuid", "noexec", "nodev"},
 		},
 	}
-
 	s.Process.Capabilities = []string{
 		"CAP_CHOWN",
 		"CAP_DAC_OVERRIDE",

+ 2 - 1
runconfig/opts/opts.go

@@ -2,10 +2,11 @@ package opts
 
 import (
 	"fmt"
-	fopts "github.com/docker/docker/opts"
 	"net"
 	"os"
 	"strings"
+
+	fopts "github.com/docker/docker/opts"
 )
 
 // ValidateAttach validates that the specified string is a valid attach option.

+ 8 - 0
runconfig/opts/parse.go

@@ -103,6 +103,7 @@ type ContainerOptions struct {
 	healthRetries     int
 	runtime           string
 	autoRemove        bool
+	init              bool
 
 	Image string
 	Args  []string
@@ -243,6 +244,8 @@ func AddFlags(flags *pflag.FlagSet) *ContainerOptions {
 	flags.StringVar(&copts.shmSize, "shm-size", "", "Size of /dev/shm, default value is 64MB")
 	flags.StringVar(&copts.utsMode, "uts", "", "UTS namespace to use")
 	flags.StringVar(&copts.runtime, "runtime", "", "Runtime to use for this container")
+
+	flags.BoolVar(&copts.init, "init", false, "Run an init inside the container that forwards signals and reaps processes")
 	return copts
 }
 
@@ -593,6 +596,11 @@ func Parse(flags *pflag.FlagSet, copts *ContainerOptions) (*container.Config, *c
 		Runtime:        copts.runtime,
 	}
 
+	// only set this value if the user provided the flag, else it should default to nil
+	if flags.Changed("init") {
+		hostConfig.Init = &copts.init
+	}
+
 	// When allocating stdin in attached mode, close stdin at client disconnect
 	if config.OpenStdin && config.AttachStdin {
 		config.StdinOnce = true