Don’t try to restore swarm from incomplete state

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>
This commit is contained in:
Tonis Tiigi 2016-06-18 19:43:47 -07:00
parent 83f2feb72d
commit ded1d9af38
2 changed files with 46 additions and 6 deletions

View file

@ -93,7 +93,7 @@ func New(config Config) (*Cluster, error) {
reconnectDelay: initialReconnectDelay,
}
dt, err := ioutil.ReadFile(filepath.Join(root, stateFile))
st, err := c.loadState()
if err != nil {
if os.IsNotExist(err) {
return c, nil
@ -101,11 +101,6 @@ func New(config Config) (*Cluster, error) {
return nil, err
}
var st state
if err := json.Unmarshal(dt, &st); err != nil {
return nil, err
}
n, ctx, err := c.startNewNode(false, st.ListenAddr, "", "", "", false)
if err != nil {
return nil, err
@ -124,6 +119,25 @@ func New(config Config) (*Cluster, error) {
return c, nil
}
func (c *Cluster) loadState() (*state, error) {
dt, err := ioutil.ReadFile(filepath.Join(c.root, stateFile))
if err != nil {
return nil, err
}
// missing certificate means no actual state to restore from
if _, err := os.Stat(filepath.Join(c.root, "certificates/swarm-node.crt")); err != nil {
if os.IsNotExist(err) {
c.clearState()
}
return nil, err
}
var st state
if err := json.Unmarshal(dt, &st); err != nil {
return nil, err
}
return &st, nil
}
func (c *Cluster) saveState() error {
dt, err := json.Marshal(state{ListenAddr: c.listenAddr})
if err != nil {
@ -410,6 +424,7 @@ func (c *Cluster) Leave(force bool) error {
}
func (c *Cluster) clearState() error {
// todo: backup this data instead of removing?
if err := os.RemoveAll(c.root); err != nil {
return err
}

View file

@ -578,6 +578,31 @@ func (s *DockerSwarmSuite) TestApiSwarmLeaveOnPendingJoin(c *check.C) {
c.Assert(id, checker.HasPrefix, strings.TrimSpace(id2))
}
// #23705
func (s *DockerSwarmSuite) TestApiSwarmRestoreOnPendingJoin(c *check.C) {
d := s.AddDaemon(c, false, false)
go d.Join("nosuchhost:1234", "", "", false) // will block on pending state
for i := 0; ; i++ {
info, err := d.info()
c.Assert(err, checker.IsNil)
if info.LocalNodeState == swarm.LocalNodeStatePending {
break
}
if i > 10 {
c.Fatalf("node did not go to pending state: %v", info.LocalNodeState)
}
time.Sleep(100 * time.Millisecond)
}
c.Assert(d.Stop(), checker.IsNil)
c.Assert(d.Start(), checker.IsNil)
info, err := d.info()
c.Assert(err, checker.IsNil)
c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
}
func (s *DockerSwarmSuite) TestApiSwarmManagerRestore(c *check.C) {
d1 := s.AddDaemon(c, true, true)