Update Swarmkit to pick up fixes to heartbeat period and stalled tasks

Signed-off-by: Adam Williams <awilliams@mirantis.com>
2021-02-22 10:32:08 -08:00 · 2021-02-22 10:32:08 -08:00 · cbd2f726bf
commit cbd2f726bf
parent bc6f4cc703
3 changed files with 33 additions and 7 deletions
--- a/vendor.conf
+++ b/vendor.conf
@ -142,7 +142,7 @@ github.com/gogo/googleapis                          01e0f9cca9b92166042241267ee2
 github.com/cilium/ebpf                              1c8d4c9ef7759622653a1d319284a44652333b28

 # cluster
-github.com/docker/swarmkit                          d6592ddefd8a5319aadff74c558b816b1a0b2590
+github.com/docker/swarmkit                          17d8d4e4d8bdec33d386e6362d3537fa9493ba00
 github.com/gogo/protobuf                            5628607bb4c51c3157aacc3a50f0ab707582b805 # v1.3.1
 github.com/golang/protobuf                          84668698ea25b64748563aa20726db66a6b8d299 # v1.3.5
 github.com/cloudflare/cfssl                         5d63dbd981b5c408effbb58c442d54761ff94fbd # 1.3.2
--- a/vendor/github.com/docker/swarmkit/manager/manager.go
+++ b/vendor/github.com/docker/swarmkit/manager/manager.go
@ -1049,7 +1049,16 @@ func (m *Manager) becomeLeader(ctx context.Context) {

 	go func(d *dispatcher.Dispatcher) {
 		// Initialize the dispatcher.
-		d.Init(m.raftNode, dispatcher.DefaultConfig(), drivers.New(m.config.PluginGetter), m.config.SecurityConfig)
+		var cluster *api.Cluster
+		s.View(func(tx store.ReadTx) {
+			cluster = store.GetCluster(tx, clusterID)
+		})
+		var defaultConfig = dispatcher.DefaultConfig()
+		heartbeatPeriod, err := gogotypes.DurationFromProto(cluster.Spec.Dispatcher.HeartbeatPeriod)
+		if err == nil {
+			defaultConfig.HeartbeatPeriod = heartbeatPeriod
+		}
+		d.Init(m.raftNode, defaultConfig, drivers.New(m.config.PluginGetter), m.config.SecurityConfig)
 		if err := d.Run(ctx); err != nil {
 			log.G(ctx).WithError(err).Error("Dispatcher exited with an error")
 		}
--- a/vendor/github.com/docker/swarmkit/manager/scheduler/scheduler.go
+++ b/vendor/github.com/docker/swarmkit/manager/scheduler/scheduler.go
@ -721,15 +721,32 @@ func (s *Scheduler) noSuitableNode(ctx context.Context, taskGroup map[string]*ap

 		newT := *t
 		newT.Status.Timestamp = ptypes.MustTimestampProto(time.Now())
-		if explanation != "" {
-			newT.Status.Err = "no suitable node (" + explanation + ")"
+		sv := service.SpecVersion
+		tv := newT.SpecVersion
+		if sv != nil && tv != nil && sv.Index > tv.Index {
+			log.G(ctx).WithField("task.id", t.ID).Debug(
+				"task belongs to old revision of service",
+			)
+			if t.Status.State == api.TaskStatePending && t.DesiredState >= api.TaskStateShutdown {
+				log.G(ctx).WithField("task.id", t.ID).Debug(
+					"task is desired shutdown, scheduler will go ahead and do so",
+				)
+				newT.Status.State = api.TaskStateShutdown
+				newT.Status.Err = ""
+			}
 		} else {
-			newT.Status.Err = "no suitable node"
+			if explanation != "" {
+				newT.Status.Err = "no suitable node (" + explanation + ")"
+			} else {
+				newT.Status.Err = "no suitable node"
+			}
+
+			// re-enqueue a task that should still be attempted
+			s.enqueue(&newT)
 		}
+
 		s.allTasks[t.ID] = &newT
 		schedulingDecisions[t.ID] = schedulingDecision{old: t, new: &newT}
-
-		s.enqueue(&newT)
 	}
 }