瀏覽代碼

Bump swarmkit to ebe39a32e3ed4c3a3783a02c11cccf388818694c

Bumps swarmkit vendoring. Includes docker/swarmkit#2938, which fixes
tasks.db growing out of control on worker nodes.

Signed-off-by: Drew Erny <derny@mirantis.com>
Drew Erny 5 年之前
父節點
當前提交
1dbf34f3aa
共有 3 個文件被更改,包括 27 次插入5 次删除
  1. 1 1
      vendor.conf
  2. 3 1
      vendor/github.com/docker/swarmkit/agent/storage.go
  3. 23 3
      vendor/github.com/docker/swarmkit/agent/worker.go

+ 1 - 1
vendor.conf

@@ -130,7 +130,7 @@ github.com/containerd/ttrpc                         0be804eadb152bc3b3c20c5edc31
 github.com/gogo/googleapis                          01e0f9cca9b92166042241267ee2a5cdf5cff46c # v1.3.2
 
 # cluster
-github.com/docker/swarmkit                          49e35619b18200845c9365c1e953440c28868002
+github.com/docker/swarmkit                          ebe39a32e3ed4c3a3783a02c11cccf388818694c
 github.com/gogo/protobuf                            5628607bb4c51c3157aacc3a50f0ab707582b805 # v1.3.1
 github.com/golang/protobuf                          d23c5127dc24889085f8ccea5c9d560a57a879d8 # v1.3.3
 github.com/cloudflare/cfssl                         5d63dbd981b5c408effbb58c442d54761ff94fbd # 1.3.2

+ 3 - 1
vendor/github.com/docker/swarmkit/agent/storage.go

@@ -131,7 +131,9 @@ func PutTask(tx *bolt.Tx, task *api.Task) error {
 
 // PutTaskStatus updates the status for the task with id.
 func PutTaskStatus(tx *bolt.Tx, id string, status *api.TaskStatus) error {
-	return withCreateTaskBucketIfNotExists(tx, id, func(bkt *bolt.Bucket) error {
+	// this used to be withCreateTaskBucketIfNotExists, but that could lead
+	// to weird race conditions, and was not necessary.
+	return withTaskBucket(tx, id, func(bkt *bolt.Bucket) error {
 		p, err := proto.Marshal(status)
 		if err != nil {
 			return err

+ 23 - 3
vendor/github.com/docker/swarmkit/agent/worker.go

@@ -278,10 +278,15 @@ func reconcileTaskState(ctx context.Context, w *worker, assignments []*api.Assig
 
 	removeTaskAssignment := func(taskID string) error {
 		ctx := log.WithLogger(ctx, log.G(ctx).WithField("task.id", taskID))
-		if err := SetTaskAssignment(tx, taskID, false); err != nil {
-			log.G(ctx).WithError(err).Error("error setting task assignment in database")
+		// if a task is no longer assigned, then we do not have to keep track
+		// of it. a task will only be unassigned when it is deleted on the
+		// manager. instead of SetTaskAssginment to true, we'll just remove the
+		// task now.
+		if err := DeleteTask(tx, taskID); err != nil {
+			log.G(ctx).WithError(err).Error("error removing de-assigned task")
+			return err
 		}
-		return err
+		return nil
 	}
 
 	// If this was a complete set of assignments, we're going to remove all the remaining
@@ -500,6 +505,21 @@ func (w *worker) newTaskManager(ctx context.Context, tx *bolt.Tx, task *api.Task
 // updateTaskStatus reports statuses to listeners, read lock must be held.
 func (w *worker) updateTaskStatus(ctx context.Context, tx *bolt.Tx, taskID string, status *api.TaskStatus) error {
 	if err := PutTaskStatus(tx, taskID, status); err != nil {
+		// we shouldn't fail to put a task status. however, there exists the
+		// possibility of a race in which we try to put a task status after the
+		// task has been deleted. because this whole contraption is a careful
+		// dance of too-tightly-coupled concurrent parts, fixing tht race is
+		// fraught with hazards. instead, we'll recognize that it can occur,
+		// log the error, and then ignore it.
+		if err == errTaskUnknown {
+			// log at info level. debug logging in docker is already really
+			// verbose, so many people disable it. the race that causes this
+			// behavior should be very rare, but if it occurs, we should know
+			// about it, because if there is some case where it is _not_ rare,
+			// then knowing about it will go a long way toward debugging.
+			log.G(ctx).Info("attempted to update status for a task that has been removed")
+			return nil
+		}
 		log.G(ctx).WithError(err).Error("failed writing status to disk")
 		return err
 	}