瀏覽代碼

Fix already active error on restoring from ungraceful restart

On killing the processes that were still running
after ungraceful restart, containerd can’t fully
exit the process if the fifos have not been fully
read. In this case process was just marked as exited
and not removed from the internal containers array.

Fixes #22913

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>
Tonis Tiigi 9 年之前
父節點
當前提交
1e36e346db

+ 20 - 0
integration-cli/docker_cli_daemon_not_experimental_test.go

@@ -37,3 +37,23 @@ func (s *DockerDaemonSuite) TestCleanupMountsAfterDaemonKill(c *check.C) {
 	comment = check.Commentf("%s is still mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
 	comment = check.Commentf("%s is still mounted from older daemon start:\nDaemon root repository %s\n%s", id, s.d.folder, mountOut)
 	c.Assert(strings.Contains(string(mountOut), id), check.Equals, false, comment)
 	c.Assert(strings.Contains(string(mountOut), id), check.Equals, false, comment)
 }
 }
+
+// #22913
+func (s *DockerDaemonSuite) TestContainerStartAfterDaemonKill(c *check.C) {
+	testRequires(c, DaemonIsLinux)
+	c.Assert(s.d.StartWithBusybox(), check.IsNil)
+
+	// the application is chosen so it generates output and doesn't react to SIGTERM
+	out, err := s.d.Cmd("run", "-d", "busybox", "sh", "-c", "while true;do date;done")
+	c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
+	id := strings.TrimSpace(out)
+	c.Assert(s.d.cmd.Process.Signal(os.Kill), check.IsNil)
+
+	// restart daemon.
+	if err := s.d.Restart(); err != nil {
+		c.Fatal(err)
+	}
+
+	out, err = s.d.Cmd("start", id)
+	c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
+}

+ 5 - 0
libcontainerd/client_shutdownrestore_linux.go

@@ -20,6 +20,8 @@ func (clnt *client) Restore(containerID string, options ...CreateOption) error {
 		clnt.appendContainer(container)
 		clnt.appendContainer(container)
 		clnt.unlock(cont.Id)
 		clnt.unlock(cont.Id)
 
 
+		container.discardFifos()
+
 		if err := clnt.Signal(containerID, int(syscall.SIGTERM)); err != nil {
 		if err := clnt.Signal(containerID, int(syscall.SIGTERM)); err != nil {
 			logrus.Errorf("error sending sigterm to %v: %v", containerID, err)
 			logrus.Errorf("error sending sigterm to %v: %v", containerID, err)
 		}
 		}
@@ -37,5 +39,8 @@ func (clnt *client) Restore(containerID string, options ...CreateOption) error {
 			return nil
 			return nil
 		}
 		}
 	}
 	}
+
+	clnt.deleteContainer(containerID)
+
 	return clnt.setExited(containerID)
 	return clnt.setExited(containerID)
 }
 }

+ 16 - 0
libcontainerd/container_linux.go

@@ -2,6 +2,7 @@ package libcontainerd
 
 
 import (
 import (
 	"encoding/json"
 	"encoding/json"
+	"io"
 	"io/ioutil"
 	"io/ioutil"
 	"os"
 	"os"
 	"path/filepath"
 	"path/filepath"
@@ -191,3 +192,18 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
 	}
 	}
 	return nil
 	return nil
 }
 }
+
+// discardFifos attempts to fully read the container fifos to unblock processes
+// that may be blocked on the writer side.
+func (ctr *container) discardFifos() {
+	for _, i := range []int{syscall.Stdout, syscall.Stderr} {
+		f := ctr.fifo(i)
+		c := make(chan struct{})
+		go func() {
+			close(c) // this channel is used to not close the writer too early, before readonly open has been called.
+			io.Copy(ioutil.Discard, openReaderFromFifo(f))
+		}()
+		<-c
+		closeReaderFifo(f) // avoid blocking permanently on open if there is no writer side
+	}
+}