ソースを参照

Merge pull request #23213 from crosbymichael/restore-option

Add --live-restore flag
Michael Crosby 9 年 前
コミット
3020081e94

+ 3 - 0
cmd/dockerd/daemon_unix.go

@@ -71,6 +71,9 @@ func (cli *DaemonCli) getPlatformRemoteOptions() []libcontainerd.RemoteOption {
 		args := []string{"--systemd-cgroup=true"}
 		opts = append(opts, libcontainerd.WithRuntimeArgs(args))
 	}
+	if cli.Config.LiveRestore {
+		opts = append(opts, libcontainerd.WithLiveRestore(true))
+	}
 	return opts
 }
 

+ 1 - 0
daemon/config.go

@@ -90,6 +90,7 @@ type CommonConfig struct {
 	TrustKeyPath         string              `json:"-"`
 	CorsHeaders          string              `json:"api-cors-header,omitempty"`
 	EnableCors           bool                `json:"api-enable-cors,omitempty"`
+	LiveRestore          bool                `json:"live-restore,omitempty"`
 
 	// ClusterStore is the storage backend used for the cluster information. It is used by both
 	// multihost networking (to store networks and endpoints information) and by the node discovery

+ 1 - 0
daemon/config_unix.go

@@ -82,6 +82,7 @@ func (config *Config) InstallFlags(cmd *flag.FlagSet, usageFn func(string) strin
 	cmd.StringVar(&config.CgroupParent, []string{"-cgroup-parent"}, "", usageFn("Set parent cgroup for all containers"))
 	cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces"))
 	cmd.StringVar(&config.ContainerdAddr, []string{"-containerd"}, "", usageFn("Path to containerd socket"))
+	cmd.BoolVar(&config.LiveRestore, []string{"-live-restore"}, false, usageFn("Enable live restore of docker when containers are still running"))
 
 	config.attachExperimentalFlags(cmd, usageFn)
 }

+ 15 - 0
daemon/daemon.go

@@ -92,6 +92,7 @@ type Daemon struct {
 	nameIndex                 *registrar.Registrar
 	linkIndex                 *linkIndex
 	containerd                libcontainerd.Client
+	containerdRemote          libcontainerd.Remote
 	defaultIsolation          containertypes.Isolation // Default isolation mode on Windows
 }
 
@@ -552,6 +553,7 @@ func NewDaemon(config *Config, registryService registry.Service, containerdRemot
 
 	d.nameIndex = registrar.NewRegistrar()
 	d.linkIndex = newLinkIndex()
+	d.containerdRemote = containerdRemote
 
 	go d.execCommandGC()
 
@@ -609,6 +611,11 @@ func (daemon *Daemon) shutdownContainer(c *container.Container) error {
 // Shutdown stops the daemon.
 func (daemon *Daemon) Shutdown() error {
 	daemon.shutdown = true
+	// Keep mounts and networking running on daemon shutdown if
+	// we are to keep containers running and restore them.
+	if daemon.configStore.LiveRestore {
+		return nil
+	}
 	if daemon.containers != nil {
 		logrus.Debug("starting clean shutdown of all containers...")
 		daemon.containers.ApplyAll(func(c *container.Container) {
@@ -794,6 +801,7 @@ func (daemon *Daemon) initDiscovery(config *Config) error {
 // - Daemon max concurrent downloads
 // - Daemon max concurrent uploads
 // - Cluster discovery (reconfigure and restart).
+// - Daemon live restore
 func (daemon *Daemon) Reload(config *Config) error {
 	daemon.configStore.reloadLock.Lock()
 	defer daemon.configStore.reloadLock.Unlock()
@@ -808,6 +816,13 @@ func (daemon *Daemon) Reload(config *Config) error {
 	if config.IsValueSet("debug") {
 		daemon.configStore.Debug = config.Debug
 	}
+	if config.IsValueSet("live-restore") {
+		daemon.configStore.LiveRestore = config.LiveRestore
+		if err := daemon.containerdRemote.UpdateOptions(libcontainerd.WithLiveRestore(config.LiveRestore)); err != nil {
+			return err
+		}
+
+	}
 
 	// If no value is set for max-concurrent-downloads we assume it is the default value
 	// We always "reset" as the cost is lightweight and easy to maintain.

+ 13 - 0
docs/admin/configuring.md

@@ -278,3 +278,16 @@ be viewed using `journalctl -u docker`
     May 06 00:22:06 localhost.localdomain docker[2495]: time="2015-05-06T00:22:06Z" level="info" msg="-job acceptconnections() = OK (0)"
 
 _Note: Using and configuring journal is an advanced topic and is beyond the scope of this article._
+
+
+### Daemonless Containers
+
+Starting with Docker 1.12 containers can run without Docker or containerd running.  This allows the 
+Docker daemon to exit, be upgraded, or recover from a crash without affecting running containers 
+on the system.  To enable this functionality you need to add the `--live-restore` flag when
+launching `dockerd`.  This will ensure that Docker does not kill containers on graceful shutdown or
+on restart leaving the containers running.
+
+While the Docker daemon is down logging will still be captured, however, it will be capped at the kernel's pipe buffer size before the buffer fills up, blocking the process.
+Docker will need to be restarted to flush these buffers.
+You can modify the kernel's buffer size by changing `/proc/sys/fs/pipe-max-size`.

+ 6 - 6
integration-cli/docker_cli_daemon_experimental_test.go

@@ -63,7 +63,7 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithKilledRunningContainer(t *check
 // them now, should remove the mounts.
 func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonCrash(c *check.C) {
 	testRequires(c, DaemonIsLinux)
-	c.Assert(s.d.StartWithBusybox(), check.IsNil)
+	c.Assert(s.d.StartWithBusybox("--live-restore"), check.IsNil)
 
 	out, err := s.d.Cmd("run", "-d", "busybox", "top")
 	c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
@@ -78,7 +78,7 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonCrash(c *check.C) {
 	c.Assert(strings.Contains(string(mountOut), id), check.Equals, true, comment)
 
 	// restart daemon.
-	if err := s.d.Restart(); err != nil {
+	if err := s.d.Restart("--live-restore"); err != nil {
 		c.Fatal(err)
 	}
 
@@ -103,7 +103,7 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonCrash(c *check.C) {
 
 // TestDaemonRestartWithPausedRunningContainer requires live restore of running containers
 func (s *DockerDaemonSuite) TestDaemonRestartWithPausedRunningContainer(t *check.C) {
-	if err := s.d.StartWithBusybox(); err != nil {
+	if err := s.d.StartWithBusybox("--live-restore"); err != nil {
 		t.Fatal(err)
 	}
 
@@ -130,7 +130,7 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithPausedRunningContainer(t *check
 	time.Sleep(3 * time.Second)
 
 	// restart the daemon
-	if err := s.d.Start(); err != nil {
+	if err := s.d.Start("--live-restore"); err != nil {
 		t.Fatal(err)
 	}
 
@@ -148,7 +148,7 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithPausedRunningContainer(t *check
 func (s *DockerDaemonSuite) TestDaemonRestartWithUnpausedRunningContainer(t *check.C) {
 	// TODO(mlaventure): Not sure what would the exit code be on windows
 	testRequires(t, DaemonIsLinux)
-	if err := s.d.StartWithBusybox(); err != nil {
+	if err := s.d.StartWithBusybox("--live-restore"); err != nil {
 		t.Fatal(err)
 	}
 
@@ -180,7 +180,7 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithUnpausedRunningContainer(t *che
 	time.Sleep(3 * time.Second)
 
 	// restart the daemon
-	if err := s.d.Start(); err != nil {
+	if err := s.d.Start("--live-restore"); err != nil {
 		t.Fatal(err)
 	}
 

+ 40 - 3
libcontainerd/client_linux.go

@@ -8,6 +8,7 @@ import (
 	"strings"
 	"sync"
 	"syscall"
+	"time"
 
 	"github.com/Sirupsen/logrus"
 	containerd "github.com/docker/containerd/api/grpc/types"
@@ -24,6 +25,7 @@ type client struct {
 	remote        *remote
 	q             queue
 	exitNotifiers map[string]*exitNotifier
+	liveRestore   bool
 }
 
 func (clnt *client) AddProcess(containerID, processFriendlyName string, specp Process) error {
@@ -456,13 +458,48 @@ func (clnt *client) restore(cont *containerd.Container, options ...CreateOption)
 }
 
 func (clnt *client) Restore(containerID string, options ...CreateOption) error {
+	if clnt.liveRestore {
+		cont, err := clnt.getContainerdContainer(containerID)
+		if err == nil && cont.Status != "stopped" {
+			if err := clnt.restore(cont, options...); err != nil {
+				logrus.Errorf("error restoring %s: %v", containerID, err)
+			}
+			return nil
+		}
+		return clnt.setExited(containerID)
+	}
+
 	cont, err := clnt.getContainerdContainer(containerID)
 	if err == nil && cont.Status != "stopped" {
-		if err := clnt.restore(cont, options...); err != nil {
-			logrus.Errorf("error restoring %s: %v", containerID, err)
+		w := clnt.getOrCreateExitNotifier(containerID)
+		clnt.lock(cont.Id)
+		container := clnt.newContainer(cont.BundlePath)
+		container.systemPid = systemPid(cont)
+		clnt.appendContainer(container)
+		clnt.unlock(cont.Id)
+
+		container.discardFifos()
+
+		if err := clnt.Signal(containerID, int(syscall.SIGTERM)); err != nil {
+			logrus.Errorf("error sending sigterm to %v: %v", containerID, err)
+		}
+		select {
+		case <-time.After(10 * time.Second):
+			if err := clnt.Signal(containerID, int(syscall.SIGKILL)); err != nil {
+				logrus.Errorf("error sending sigkill to %v: %v", containerID, err)
+			}
+			select {
+			case <-time.After(2 * time.Second):
+			case <-w.wait():
+				return nil
+			}
+		case <-w.wait():
+			return nil
 		}
-		return nil
 	}
+
+	clnt.deleteContainer(containerID)
+
 	return clnt.setExited(containerID)
 }
 

+ 16 - 0
libcontainerd/container_linux.go

@@ -2,6 +2,7 @@ package libcontainerd
 
 import (
 	"encoding/json"
+	"io"
 	"io/ioutil"
 	"os"
 	"path/filepath"
@@ -194,3 +195,18 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
 	}
 	return nil
 }
+
+// discardFifos attempts to fully read the container fifos to unblock processes
+// that may be blocked on the writer side.
+func (ctr *container) discardFifos() {
+	for _, i := range []int{syscall.Stdout, syscall.Stderr} {
+		f := ctr.fifo(i)
+		c := make(chan struct{})
+		go func() {
+			close(c) // this channel is used to not close the writer too early, before readonly open has been called.
+			io.Copy(ioutil.Discard, openReaderFromFifo(f))
+		}()
+		<-c
+		closeReaderFifo(f) // avoid blocking permanently on open if there is no writer side
+	}
+}

+ 2 - 0
libcontainerd/remote.go

@@ -9,6 +9,8 @@ type Remote interface {
 	// Cleanup stops containerd if it was started by libcontainerd.
 	// Note this is not used on Windows as there is no remote containerd.
 	Cleanup()
+	// UpdateOptions allows various remote options to be updated at runtime.
+	UpdateOptions(...RemoteOption) error
 }
 
 // RemoteOption allows to configure parameters of remotes.

+ 29 - 0
libcontainerd/remote_linux.go

@@ -52,6 +52,7 @@ type remote struct {
 	pastEvents    map[string]*containerd.Event
 	runtimeArgs   []string
 	daemonWaitCh  chan struct{}
+	liveRestore   bool
 }
 
 // New creates a fresh instance of libcontainerd remote.
@@ -111,6 +112,15 @@ func New(stateDir string, options ...RemoteOption) (_ Remote, err error) {
 	return r, nil
 }
 
+func (r *remote) UpdateOptions(options ...RemoteOption) error {
+	for _, option := range options {
+		if err := option.Apply(r); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
 func (r *remote) handleConnectionChange() {
 	var transientFailureCount = 0
 	state := grpc.Idle
@@ -184,6 +194,7 @@ func (r *remote) Client(b Backend) (Client, error) {
 		},
 		remote:        r,
 		exitNotifiers: make(map[string]*exitNotifier),
+		liveRestore:   r.liveRestore,
 	}
 
 	r.Lock()
@@ -461,3 +472,21 @@ func (d debugLog) Apply(r Remote) error {
 	}
 	return fmt.Errorf("WithDebugLog option not supported for this remote")
 }
+
+// WithLiveRestore defines if containers are stopped on shutdown or restored.
+func WithLiveRestore(v bool) RemoteOption {
+	return liveRestore(v)
+}
+
+type liveRestore bool
+
+func (l liveRestore) Apply(r Remote) error {
+	if remote, ok := r.(*remote); ok {
+		remote.liveRestore = bool(l)
+		for _, c := range remote.clients {
+			c.liveRestore = bool(l)
+		}
+		return nil
+	}
+	return fmt.Errorf("WithLiveRestore option not supported for this remote")
+}

+ 9 - 0
libcontainerd/remote_solaris.go

@@ -19,7 +19,16 @@ func (r *remote) Client(b Backend) (Client, error) {
 func (r *remote) Cleanup() {
 }
 
+func (r *remote) UpdateOptions(opts ...RemoteOption) error {
+	return nil
+}
+
 // New creates a fresh instance of libcontainerd remote.
 func New(_ string, _ ...RemoteOption) (Remote, error) {
 	return &remote{}, nil
 }
+
+// WithLiveRestore is a noop on solaris.
+func WithLiveRestore(v bool) RemoteOption {
+	return nil
+}

+ 9 - 0
libcontainerd/remote_windows.go

@@ -20,8 +20,17 @@ func (r *remote) Client(b Backend) (Client, error) {
 func (r *remote) Cleanup() {
 }
 
+func (r *remote) UpdateOptions(opts ...RemoteOption) error {
+	return nil
+}
+
 // New creates a fresh instance of libcontainerd remote. On Windows,
 // this is not used as there is no remote containerd process.
 func New(_ string, _ ...RemoteOption) (Remote, error) {
 	return &remote{}, nil
 }
+
+// WithLiveRestore is a noop on windows.
+func WithLiveRestore(v bool) RemoteOption {
+	return nil
+}

+ 4 - 0
man/dockerd.8.md

@@ -42,6 +42,7 @@ dockerd - Enable daemon mode
 [**--isolation**[=*default*]]
 [**-l**|**--log-level**[=*info*]]
 [**--label**[=*[]*]]
+[**--live-restore**[=*false*]]
 [**--log-driver**[=*json-file*]]
 [**--log-opt**[=*map[]*]]
 [**--mtu**[=*0*]]
@@ -195,6 +196,9 @@ is `hyperv`. Linux only supports `default`.
 **--label**="[]"
   Set key=value labels to the daemon (displayed in `docker info`)
 
+**--live-restore**=*false*
+  Enable live restore of running containers when the daemon starts so that they are not restarted.
+
 **--log-driver**="*json-file*|*syslog*|*journald*|*gelf*|*fluentd*|*awslogs*|*splunk*|*etwlogs*|*gcplogs*|*none*"
   Default driver for container logs. Default is `json-file`.
   **Warning**: `docker logs` command works only for `json-file` logging driver.