浏览代码

Merge pull request #23723 from tonistiigi/incomplete-restore

Don’t try to restore swarm from incomplete state
Vincent Demeester 9 年之前
父节点
当前提交
4cc70c921f
共有 2 个文件被更改,包括 46 次插入6 次删除
  1. 21 6
      daemon/cluster/cluster.go
  2. 25 0
      integration-cli/docker_api_swarm_test.go

+ 21 - 6
daemon/cluster/cluster.go

@@ -93,7 +93,7 @@ func New(config Config) (*Cluster, error) {
 		reconnectDelay: initialReconnectDelay,
 	}
 
-	dt, err := ioutil.ReadFile(filepath.Join(root, stateFile))
+	st, err := c.loadState()
 	if err != nil {
 		if os.IsNotExist(err) {
 			return c, nil
@@ -101,11 +101,6 @@ func New(config Config) (*Cluster, error) {
 		return nil, err
 	}
 
-	var st state
-	if err := json.Unmarshal(dt, &st); err != nil {
-		return nil, err
-	}
-
 	n, ctx, err := c.startNewNode(false, st.ListenAddr, "", "", "", false)
 	if err != nil {
 		return nil, err
@@ -124,6 +119,25 @@ func New(config Config) (*Cluster, error) {
 	return c, nil
 }
 
+func (c *Cluster) loadState() (*state, error) {
+	dt, err := ioutil.ReadFile(filepath.Join(c.root, stateFile))
+	if err != nil {
+		return nil, err
+	}
+	// missing certificate means no actual state to restore from
+	if _, err := os.Stat(filepath.Join(c.root, "certificates/swarm-node.crt")); err != nil {
+		if os.IsNotExist(err) {
+			c.clearState()
+		}
+		return nil, err
+	}
+	var st state
+	if err := json.Unmarshal(dt, &st); err != nil {
+		return nil, err
+	}
+	return &st, nil
+}
+
 func (c *Cluster) saveState() error {
 	dt, err := json.Marshal(state{ListenAddr: c.listenAddr})
 	if err != nil {
@@ -410,6 +424,7 @@ func (c *Cluster) Leave(force bool) error {
 }
 
 func (c *Cluster) clearState() error {
+	// todo: backup this data instead of removing?
 	if err := os.RemoveAll(c.root); err != nil {
 		return err
 	}

+ 25 - 0
integration-cli/docker_api_swarm_test.go

@@ -578,6 +578,31 @@ func (s *DockerSwarmSuite) TestApiSwarmLeaveOnPendingJoin(c *check.C) {
 	c.Assert(id, checker.HasPrefix, strings.TrimSpace(id2))
 }
 
+// #23705
+func (s *DockerSwarmSuite) TestApiSwarmRestoreOnPendingJoin(c *check.C) {
+	d := s.AddDaemon(c, false, false)
+	go d.Join("nosuchhost:1234", "", "", false) // will block on pending state
+
+	for i := 0; ; i++ {
+		info, err := d.info()
+		c.Assert(err, checker.IsNil)
+		if info.LocalNodeState == swarm.LocalNodeStatePending {
+			break
+		}
+		if i > 10 {
+			c.Fatalf("node did not go to pending state: %v", info.LocalNodeState)
+		}
+		time.Sleep(100 * time.Millisecond)
+	}
+
+	c.Assert(d.Stop(), checker.IsNil)
+	c.Assert(d.Start(), checker.IsNil)
+
+	info, err := d.info()
+	c.Assert(err, checker.IsNil)
+	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
+}
+
 func (s *DockerSwarmSuite) TestApiSwarmManagerRestore(c *check.C) {
 	d1 := s.AddDaemon(c, true, true)