Browse Source

Merge pull request #21372 from anusha-ragunathan/ctrd-rebase

Update mount state of live containers after a daemon crash.
Sebastiaan van Stijn 9 years ago
parent
commit
ffee5588cd

+ 5 - 0
daemon/daemon.go

@@ -293,6 +293,11 @@ func (daemon *Daemon) restore() error {
 		go func(c *container.Container) {
 			defer wg.Done()
 			if c.IsRunning() || c.IsPaused() {
+				// Fix activityCount such that graph mounts can be unmounted later
+				if err := daemon.layerStore.ReinitRWLayer(c.RWLayer); err != nil {
+					logrus.Errorf("Failed to ReinitRWLayer for %s due to %s", c.ID, err)
+					return
+				}
 				if err := daemon.containerd.Restore(c.ID, libcontainerd.WithRestartManager(c.RestartManager(true))); err != nil {
 					logrus.Errorf("Failed to restore with containerd: %q", err)
 					return

+ 4 - 0
distribution/xfer/download_test.go

@@ -121,6 +121,10 @@ func (ls *mockLayerStore) GetMountID(string) (string, error) {
 	return "", errors.New("not implemented")
 }
 
+func (ls *mockLayerStore) ReinitRWLayer(layer.RWLayer) error {
+	return errors.New("not implemented")
+}
+
 func (ls *mockLayerStore) Cleanup() error {
 	return nil
 }

+ 45 - 0
integration-cli/docker_cli_daemon_experimental_test.go

@@ -3,6 +3,8 @@
 package main
 
 import (
+	"io/ioutil"
+	"os"
 	"os/exec"
 	"strings"
 	"time"
@@ -56,6 +58,49 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithKilledRunningContainer(t *check
 
 }
 
+// os.Kill should kill daemon ungracefully, leaving behind live containers.
+// The live containers should be known to the restarted daemon. Stopping
+// them now, should remove the mounts.
+func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonCrash(c *check.C) {
+	testRequires(c, DaemonIsLinux)
+	c.Assert(s.d.StartWithBusybox(), check.IsNil)
+
+	out, err := s.d.Cmd("run", "-d", "busybox", "top")
+	c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
+	id := strings.TrimSpace(out)
+
+	c.Assert(s.d.cmd.Process.Signal(os.Kill), check.IsNil)
+	mountOut, err := ioutil.ReadFile("/proc/self/mountinfo")
+	c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
+
+	// container mounts should exist even after daemon has crashed.
+	comment := check.Commentf("%s should stay mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
+	c.Assert(strings.Contains(string(mountOut), id), check.Equals, true, comment)
+
+	// restart daemon.
+	if err := s.d.Restart(); err != nil {
+		c.Fatal(err)
+	}
+
+	// container should be running.
+	out, err = s.d.Cmd("inspect", "--format='{{.State.Running}}'", id)
+	c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
+	out = strings.TrimSpace(out)
+	if out != "true" {
+		c.Fatalf("Container %s expected to stay alive after daemon restart", id)
+	}
+
+	// 'docker stop' should work.
+	out, err = s.d.Cmd("stop", id)
+	c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
+
+	// Now, container mounts should be gone.
+	mountOut, err = ioutil.ReadFile("/proc/self/mountinfo")
+	c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
+	comment = check.Commentf("%s is still mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
+	c.Assert(strings.Contains(string(mountOut), id), check.Equals, false, comment)
+}
+
 // TestDaemonRestartWithPausedRunningContainer requires live restore of running containers
 func (s *DockerDaemonSuite) TestDaemonRestartWithPausedRunningContainer(t *check.C) {
 	if err := s.d.StartWithBusybox(); err != nil {

+ 39 - 0
integration-cli/docker_cli_daemon_not_experimental_test.go

@@ -0,0 +1,39 @@
+// +build daemon,!windows,!experimental
+
+package main
+
+import (
+	"io/ioutil"
+	"os"
+	"strings"
+
+	"github.com/go-check/check"
+)
+
+// os.Kill should kill daemon ungracefully, leaving behind container mounts.
+// A subsequent daemon restart shoud clean up said mounts.
+func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonKill(c *check.C) {
+	c.Assert(s.d.StartWithBusybox(), check.IsNil)
+
+	out, err := s.d.Cmd("run", "-d", "busybox", "top")
+	c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
+	id := strings.TrimSpace(out)
+	c.Assert(s.d.cmd.Process.Signal(os.Kill), check.IsNil)
+	mountOut, err := ioutil.ReadFile("/proc/self/mountinfo")
+	c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
+
+	// container mounts should exist even after daemon has crashed.
+	comment := check.Commentf("%s should stay mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
+	c.Assert(strings.Contains(string(mountOut), id), check.Equals, true, comment)
+
+	// restart daemon.
+	if err := s.d.Restart(); err != nil {
+		c.Fatal(err)
+	}
+
+	// Now, container mounts should be gone.
+	mountOut, err = ioutil.ReadFile("/proc/self/mountinfo")
+	c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
+	comment = check.Commentf("%s is still mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
+	c.Assert(strings.Contains(string(mountOut), id), check.Equals, false, comment)
+}

+ 35 - 6
integration-cli/docker_cli_daemon_test.go

@@ -1501,25 +1501,54 @@ func (s *DockerDaemonSuite) TestDaemonRestartWithSocketAsVolume(c *check.C) {
 	c.Assert(s.d.Restart(), check.IsNil)
 }
 
-func (s *DockerDaemonSuite) TestCleanupMountsAfterCrash(c *check.C) {
+// os.Kill should kill daemon ungracefully, leaving behind container mounts.
+// A subsequent daemon restart shoud clean up said mounts.
+func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonAndContainerKill(c *check.C) {
 	c.Assert(s.d.StartWithBusybox(), check.IsNil)
 
 	out, err := s.d.Cmd("run", "-d", "busybox", "top")
 	c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
 	id := strings.TrimSpace(out)
-	c.Assert(s.d.Kill(), check.IsNil)
+	c.Assert(s.d.cmd.Process.Signal(os.Kill), check.IsNil)
+	mountOut, err := ioutil.ReadFile("/proc/self/mountinfo")
+	c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
+
+	// container mounts should exist even after daemon has crashed.
+	comment := check.Commentf("%s should stay mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
+	c.Assert(strings.Contains(string(mountOut), id), check.Equals, true, comment)
 
 	// kill the container
 	runCmd := exec.Command(ctrBinary, "--address", "/var/run/docker/libcontainerd/docker-containerd.sock", "containers", "kill", id)
 	if out, ec, err := runCommandWithOutput(runCmd); err != nil {
-		c.Fatalf("Failed to run ctr, ExitCode: %d, err: '%v' output: '%s' cid: '%s'\n", ec, err, out, id)
+		c.Fatalf("Failed to run ctr, ExitCode: %d, err: %v output: %s id: %s\n", ec, err, out, id)
+	}
+
+	// restart daemon.
+	if err := s.d.Restart(); err != nil {
+		c.Fatal(err)
 	}
 
-	// Give time to containerd to process the command if we don't
-	// the exit event might be received after we do the inspect
+	// Now, container mounts should be gone.
+	mountOut, err = ioutil.ReadFile("/proc/self/mountinfo")
+	c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
+	comment = check.Commentf("%s is still mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
+	c.Assert(strings.Contains(string(mountOut), id), check.Equals, false, comment)
+}
+
+// os.Interrupt should perform a graceful daemon shutdown and hence cleanup mounts.
+func (s *DockerDaemonSuite) TestCleanupMountsAfterGracefulShutdown(c *check.C) {
+	c.Assert(s.d.StartWithBusybox(), check.IsNil)
+
+	out, err := s.d.Cmd("run", "-d", "busybox", "top")
+	c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
+	id := strings.TrimSpace(out)
+
+	// Send SIGINT and daemon should clean up
+	c.Assert(s.d.cmd.Process.Signal(os.Interrupt), check.IsNil)
+
+	// Wait a bit for the daemon to handle cleanups.
 	time.Sleep(3 * time.Second)
 
-	c.Assert(s.d.Start(), check.IsNil)
 	mountOut, err := ioutil.ReadFile("/proc/self/mountinfo")
 	c.Assert(err, check.IsNil, check.Commentf("Output: %s", mountOut))
 

+ 1 - 0
layer/layer.go

@@ -174,6 +174,7 @@ type Store interface {
 	CreateRWLayer(id string, parent ChainID, mountLabel string, initFunc MountInit, storageOpt map[string]string) (RWLayer, error)
 	GetRWLayer(id string) (RWLayer, error)
 	GetMountID(id string) (string, error)
+	ReinitRWLayer(l RWLayer) error
 	ReleaseRWLayer(RWLayer) ([]Metadata, error)
 
 	Cleanup() error

+ 20 - 1
layer/layer_store.go

@@ -487,11 +487,30 @@ func (ls *layerStore) GetMountID(id string) (string, error) {
 	if !ok {
 		return "", ErrMountDoesNotExist
 	}
-	logrus.Debugf("GetRWLayer id: %s -> mountID: %s", id, mount.mountID)
+	logrus.Debugf("GetMountID id: %s -> mountID: %s", id, mount.mountID)
 
 	return mount.mountID, nil
 }
 
+// ReinitRWLayer reinitializes a given mount to the layerstore, specifically
+// initializing the usage count. It should strictly only be used in the
+// daemon's restore path to restore state of live containers.
+func (ls *layerStore) ReinitRWLayer(l RWLayer) error {
+	ls.mountL.Lock()
+	defer ls.mountL.Unlock()
+
+	m, ok := ls.mounts[l.Name()]
+	if !ok {
+		return ErrMountDoesNotExist
+	}
+
+	if err := m.incActivityCount(l); err != nil {
+		return err
+	}
+
+	return nil
+}
+
 func (ls *layerStore) ReleaseRWLayer(l RWLayer) ([]Metadata, error) {
 	ls.mountL.Lock()
 	defer ls.mountL.Unlock()

+ 21 - 0
layer/mounted_layer.go

@@ -83,6 +83,18 @@ func (ml *mountedLayer) hasReferences() bool {
 	return len(ml.references) > 0
 }
 
+func (ml *mountedLayer) incActivityCount(ref RWLayer) error {
+	rl, ok := ml.references[ref]
+	if !ok {
+		return ErrLayerNotRetained
+	}
+
+	if err := rl.acquire(); err != nil {
+		return err
+	}
+	return nil
+}
+
 func (ml *mountedLayer) deleteReference(ref RWLayer) error {
 	rl, ok := ml.references[ref]
 	if !ok {
@@ -111,6 +123,15 @@ type referencedRWLayer struct {
 	activityCount int
 }
 
+func (rl *referencedRWLayer) acquire() error {
+	rl.activityL.Lock()
+	defer rl.activityL.Unlock()
+
+	rl.activityCount++
+
+	return nil
+}
+
 func (rl *referencedRWLayer) release() error {
 	rl.activityL.Lock()
 	defer rl.activityL.Unlock()