Kaynağa Gözat

Merge pull request #26421 from aaronlehmann/update-thresholds-rollbacks

Service update failure thresholds and rollback
Sebastiaan van Stijn 8 yıl önce
ebeveyn
işleme
3b0660da30

+ 1 - 1
api/server/router/swarm/backend.go

@@ -15,7 +15,7 @@ type Backend interface {
 	GetServices(basictypes.ServiceListOptions) ([]types.Service, error)
 	GetServices(basictypes.ServiceListOptions) ([]types.Service, error)
 	GetService(string) (types.Service, error)
 	GetService(string) (types.Service, error)
 	CreateService(types.ServiceSpec, string) (string, error)
 	CreateService(types.ServiceSpec, string) (string, error)
-	UpdateService(string, uint64, types.ServiceSpec, string) error
+	UpdateService(string, uint64, types.ServiceSpec, string, string) error
 	RemoveService(string) error
 	RemoveService(string) error
 	GetNodes(basictypes.NodeListOptions) ([]types.Node, error)
 	GetNodes(basictypes.NodeListOptions) ([]types.Node, error)
 	GetNode(string) (types.Node, error)
 	GetNode(string) (types.Node, error)

+ 3 - 1
api/server/router/swarm/cluster_routes.go

@@ -156,7 +156,9 @@ func (sr *swarmRouter) updateService(ctx context.Context, w http.ResponseWriter,
 	// Get returns "" if the header does not exist
 	// Get returns "" if the header does not exist
 	encodedAuth := r.Header.Get("X-Registry-Auth")
 	encodedAuth := r.Header.Get("X-Registry-Auth")
 
 
-	if err := sr.backend.UpdateService(vars["id"], version, service, encodedAuth); err != nil {
+	registryAuthFrom := r.URL.Query().Get("registryAuthFrom")
+
+	if err := sr.backend.UpdateService(vars["id"], version, service, encodedAuth, registryAuthFrom); err != nil {
 		logrus.Errorf("Error updating service %s: %v", vars["id"], err)
 		logrus.Errorf("Error updating service %s: %v", vars["id"], err)
 		return err
 		return err
 	}
 	}

+ 11 - 0
api/types/client.go

@@ -275,6 +275,12 @@ type ServiceCreateResponse struct {
 	ID string
 	ID string
 }
 }
 
 
+// Values for RegistryAuthFrom in ServiceUpdateOptions
+const (
+	RegistryAuthFromSpec         = "spec"
+	RegistryAuthFromPreviousSpec = "previous-spec"
+)
+
 // ServiceUpdateOptions contains the options to be used for updating services.
 // ServiceUpdateOptions contains the options to be used for updating services.
 type ServiceUpdateOptions struct {
 type ServiceUpdateOptions struct {
 	// EncodedRegistryAuth is the encoded registry authorization credentials to
 	// EncodedRegistryAuth is the encoded registry authorization credentials to
@@ -286,6 +292,11 @@ type ServiceUpdateOptions struct {
 	// TODO(stevvooe): Consider moving the version parameter of ServiceUpdate
 	// TODO(stevvooe): Consider moving the version parameter of ServiceUpdate
 	// into this field. While it does open API users up to racy writes, most
 	// into this field. While it does open API users up to racy writes, most
 	// users may not need that level of consistency in practice.
 	// users may not need that level of consistency in practice.
+
+	// RegistryAuthFrom specifies where to find the registry authorization
+	// credentials if they are not given in EncodedRegistryAuth. Valid
+	// values are "spec" and "previous-spec".
+	RegistryAuthFrom string
 }
 }
 
 
 // ServiceListOptions holds parameters to list  services with.
 // ServiceListOptions holds parameters to list  services with.

+ 31 - 3
api/types/swarm/service.go

@@ -7,6 +7,7 @@ type Service struct {
 	ID string
 	ID string
 	Meta
 	Meta
 	Spec         ServiceSpec  `json:",omitempty"`
 	Spec         ServiceSpec  `json:",omitempty"`
+	PreviousSpec *ServiceSpec `json:",omitempty"`
 	Endpoint     Endpoint     `json:",omitempty"`
 	Endpoint     Endpoint     `json:",omitempty"`
 	UpdateStatus UpdateStatus `json:",omitempty"`
 	UpdateStatus UpdateStatus `json:",omitempty"`
 }
 }
@@ -71,7 +72,34 @@ const (
 
 
 // UpdateConfig represents the update configuration.
 // UpdateConfig represents the update configuration.
 type UpdateConfig struct {
 type UpdateConfig struct {
-	Parallelism   uint64        `json:",omitempty"`
-	Delay         time.Duration `json:",omitempty"`
-	FailureAction string        `json:",omitempty"`
+	// Maximum number of tasks to be updated in one iteration.
+	// 0 means unlimited parallelism.
+	Parallelism uint64 `json:",omitempty"`
+
+	// Amount of time between updates.
+	Delay time.Duration `json:",omitempty"`
+
+	// FailureAction is the action to take when an update failures.
+	FailureAction string `json:",omitempty"`
+
+	// Monitor indicates how long to monitor a task for failure after it is
+	// created. If the task fails by ending up in one of the states
+	// REJECTED, COMPLETED, or FAILED, within Monitor from its creation,
+	// this counts as a failure. If it fails after Monitor, it does not
+	// count as a failure. If Monitor is unspecified, a default value will
+	// be used.
+	Monitor time.Duration `json:",omitempty"`
+
+	// MaxFailureRatio is the fraction of tasks that may fail during
+	// an update before the failure action is invoked. Any task created by
+	// the current update which ends up in one of the states REJECTED,
+	// COMPLETED or FAILED within Monitor from its creation counts as a
+	// failure. The number of failures is divided by the number of tasks
+	// being updated, and if this fraction is greater than
+	// MaxFailureRatio, the failure action is invoked.
+	//
+	// If the failure action is CONTINUE, there is no effect.
+	// If the failure action is PAUSE, no more tasks will be updated until
+	// another update is started.
+	MaxFailureRatio float32
 }
 }

+ 17 - 1
cli/command/formatter/service.go

@@ -41,10 +41,14 @@ Placement:
 {{- if .HasUpdateConfig }}
 {{- if .HasUpdateConfig }}
 UpdateConfig:
 UpdateConfig:
  Parallelism:	{{ .UpdateParallelism }}
  Parallelism:	{{ .UpdateParallelism }}
-{{- if .HasUpdateDelay -}}
+{{- if .HasUpdateDelay}}
  Delay:		{{ .UpdateDelay }}
  Delay:		{{ .UpdateDelay }}
 {{- end }}
 {{- end }}
  On failure:	{{ .UpdateOnFailure }}
  On failure:	{{ .UpdateOnFailure }}
+{{- if .HasUpdateMonitor}}
+ Monitoring Period: {{ .UpdateMonitor }}
+{{- end }}
+ Max failure ratio: {{ .UpdateMaxFailureRatio }}
 {{- end }}
 {{- end }}
 ContainerSpec:
 ContainerSpec:
  Image:		{{ .ContainerImage }}
  Image:		{{ .ContainerImage }}
@@ -218,6 +222,18 @@ func (ctx *serviceInspectContext) UpdateOnFailure() string {
 	return ctx.Service.Spec.UpdateConfig.FailureAction
 	return ctx.Service.Spec.UpdateConfig.FailureAction
 }
 }
 
 
+func (ctx *serviceInspectContext) HasUpdateMonitor() bool {
+	return ctx.Service.Spec.UpdateConfig.Monitor.Nanoseconds() > 0
+}
+
+func (ctx *serviceInspectContext) UpdateMonitor() time.Duration {
+	return ctx.Service.Spec.UpdateConfig.Monitor
+}
+
+func (ctx *serviceInspectContext) UpdateMaxFailureRatio() float32 {
+	return ctx.Service.Spec.UpdateConfig.MaxFailureRatio
+}
+
 func (ctx *serviceInspectContext) ContainerImage() string {
 func (ctx *serviceInspectContext) ContainerImage() string {
 	return ctx.Service.Spec.TaskTemplate.ContainerSpec.Image
 	return ctx.Service.Spec.TaskTemplate.ContainerSpec.Image
 }
 }

+ 56 - 48
cli/command/service/opts.go

@@ -267,9 +267,11 @@ func (m *MountOpt) Value() []mounttypes.Mount {
 }
 }
 
 
 type updateOptions struct {
 type updateOptions struct {
-	parallelism uint64
-	delay       time.Duration
-	onFailure   string
+	parallelism     uint64
+	delay           time.Duration
+	monitor         time.Duration
+	onFailure       string
+	maxFailureRatio float32
 }
 }
 
 
 type resourceOptions struct {
 type resourceOptions struct {
@@ -458,9 +460,11 @@ func (opts *serviceOptions) ToService() (swarm.ServiceSpec, error) {
 		Networks: convertNetworks(opts.networks),
 		Networks: convertNetworks(opts.networks),
 		Mode:     swarm.ServiceMode{},
 		Mode:     swarm.ServiceMode{},
 		UpdateConfig: &swarm.UpdateConfig{
 		UpdateConfig: &swarm.UpdateConfig{
-			Parallelism:   opts.update.parallelism,
-			Delay:         opts.update.delay,
-			FailureAction: opts.update.onFailure,
+			Parallelism:     opts.update.parallelism,
+			Delay:           opts.update.delay,
+			Monitor:         opts.update.monitor,
+			FailureAction:   opts.update.onFailure,
+			MaxFailureRatio: opts.update.maxFailureRatio,
 		},
 		},
 		EndpointSpec: opts.endpoint.ToEndpointSpec(),
 		EndpointSpec: opts.endpoint.ToEndpointSpec(),
 	}
 	}
@@ -507,7 +511,9 @@ func addServiceFlags(cmd *cobra.Command, opts *serviceOptions) {
 
 
 	flags.Uint64Var(&opts.update.parallelism, flagUpdateParallelism, 1, "Maximum number of tasks updated simultaneously (0 to update all at once)")
 	flags.Uint64Var(&opts.update.parallelism, flagUpdateParallelism, 1, "Maximum number of tasks updated simultaneously (0 to update all at once)")
 	flags.DurationVar(&opts.update.delay, flagUpdateDelay, time.Duration(0), "Delay between updates")
 	flags.DurationVar(&opts.update.delay, flagUpdateDelay, time.Duration(0), "Delay between updates")
+	flags.DurationVar(&opts.update.monitor, flagUpdateMonitor, time.Duration(0), "Duration after each task update to monitor for failure")
 	flags.StringVar(&opts.update.onFailure, flagUpdateFailureAction, "pause", "Action on update failure (pause|continue)")
 	flags.StringVar(&opts.update.onFailure, flagUpdateFailureAction, "pause", "Action on update failure (pause|continue)")
+	flags.Float32Var(&opts.update.maxFailureRatio, flagUpdateMaxFailureRatio, 0, "Failure rate to tolerate during an update")
 
 
 	flags.StringVar(&opts.endpoint.mode, flagEndpointMode, "", "Endpoint mode (vip or dnsrr)")
 	flags.StringVar(&opts.endpoint.mode, flagEndpointMode, "", "Endpoint mode (vip or dnsrr)")
 
 
@@ -518,46 +524,48 @@ func addServiceFlags(cmd *cobra.Command, opts *serviceOptions) {
 }
 }
 
 
 const (
 const (
-	flagConstraint           = "constraint"
-	flagConstraintRemove     = "constraint-rm"
-	flagConstraintAdd        = "constraint-add"
-	flagContainerLabel       = "container-label"
-	flagContainerLabelRemove = "container-label-rm"
-	flagContainerLabelAdd    = "container-label-add"
-	flagEndpointMode         = "endpoint-mode"
-	flagEnv                  = "env"
-	flagEnvRemove            = "env-rm"
-	flagEnvAdd               = "env-add"
-	flagGroupAdd             = "group-add"
-	flagGroupRemove          = "group-rm"
-	flagLabel                = "label"
-	flagLabelRemove          = "label-rm"
-	flagLabelAdd             = "label-add"
-	flagLimitCPU             = "limit-cpu"
-	flagLimitMemory          = "limit-memory"
-	flagMode                 = "mode"
-	flagMount                = "mount"
-	flagMountRemove          = "mount-rm"
-	flagMountAdd             = "mount-add"
-	flagName                 = "name"
-	flagNetwork              = "network"
-	flagPublish              = "publish"
-	flagPublishRemove        = "publish-rm"
-	flagPublishAdd           = "publish-add"
-	flagReplicas             = "replicas"
-	flagReserveCPU           = "reserve-cpu"
-	flagReserveMemory        = "reserve-memory"
-	flagRestartCondition     = "restart-condition"
-	flagRestartDelay         = "restart-delay"
-	flagRestartMaxAttempts   = "restart-max-attempts"
-	flagRestartWindow        = "restart-window"
-	flagStopGracePeriod      = "stop-grace-period"
-	flagUpdateDelay          = "update-delay"
-	flagUpdateFailureAction  = "update-failure-action"
-	flagUpdateParallelism    = "update-parallelism"
-	flagUser                 = "user"
-	flagWorkdir              = "workdir"
-	flagRegistryAuth         = "with-registry-auth"
-	flagLogDriver            = "log-driver"
-	flagLogOpt               = "log-opt"
+	flagConstraint            = "constraint"
+	flagConstraintRemove      = "constraint-rm"
+	flagConstraintAdd         = "constraint-add"
+	flagContainerLabel        = "container-label"
+	flagContainerLabelRemove  = "container-label-rm"
+	flagContainerLabelAdd     = "container-label-add"
+	flagEndpointMode          = "endpoint-mode"
+	flagEnv                   = "env"
+	flagEnvRemove             = "env-rm"
+	flagEnvAdd                = "env-add"
+	flagGroupAdd              = "group-add"
+	flagGroupRemove           = "group-rm"
+	flagLabel                 = "label"
+	flagLabelRemove           = "label-rm"
+	flagLabelAdd              = "label-add"
+	flagLimitCPU              = "limit-cpu"
+	flagLimitMemory           = "limit-memory"
+	flagMode                  = "mode"
+	flagMount                 = "mount"
+	flagMountRemove           = "mount-rm"
+	flagMountAdd              = "mount-add"
+	flagName                  = "name"
+	flagNetwork               = "network"
+	flagPublish               = "publish"
+	flagPublishRemove         = "publish-rm"
+	flagPublishAdd            = "publish-add"
+	flagReplicas              = "replicas"
+	flagReserveCPU            = "reserve-cpu"
+	flagReserveMemory         = "reserve-memory"
+	flagRestartCondition      = "restart-condition"
+	flagRestartDelay          = "restart-delay"
+	flagRestartMaxAttempts    = "restart-max-attempts"
+	flagRestartWindow         = "restart-window"
+	flagStopGracePeriod       = "stop-grace-period"
+	flagUpdateDelay           = "update-delay"
+	flagUpdateFailureAction   = "update-failure-action"
+	flagUpdateMaxFailureRatio = "update-max-failure-ratio"
+	flagUpdateMonitor         = "update-monitor"
+	flagUpdateParallelism     = "update-parallelism"
+	flagUser                  = "user"
+	flagWorkdir               = "workdir"
+	flagRegistryAuth          = "with-registry-auth"
+	flagLogDriver             = "log-driver"
+	flagLogOpt                = "log-opt"
 )
 )

+ 30 - 4
cli/command/service/update.go

@@ -36,6 +36,7 @@ func newUpdateCommand(dockerCli *command.DockerCli) *cobra.Command {
 	flags := cmd.Flags()
 	flags := cmd.Flags()
 	flags.String("image", "", "Service image tag")
 	flags.String("image", "", "Service image tag")
 	flags.String("args", "", "Service command args")
 	flags.String("args", "", "Service command args")
+	flags.Bool("rollback", false, "Rollback to previous specification")
 	addServiceFlags(cmd, opts)
 	addServiceFlags(cmd, opts)
 
 
 	flags.Var(newListOptsVar(), flagEnvRemove, "Remove an environment variable")
 	flags.Var(newListOptsVar(), flagEnvRemove, "Remove an environment variable")
@@ -68,7 +69,20 @@ func runUpdate(dockerCli *command.DockerCli, flags *pflag.FlagSet, serviceID str
 		return err
 		return err
 	}
 	}
 
 
-	err = updateService(flags, &service.Spec)
+	rollback, err := flags.GetBool("rollback")
+	if err != nil {
+		return err
+	}
+
+	spec := &service.Spec
+	if rollback {
+		spec = service.PreviousSpec
+		if spec == nil {
+			return fmt.Errorf("service does not have a previous specification to roll back to")
+		}
+	}
+
+	err = updateService(flags, spec)
 	if err != nil {
 	if err != nil {
 		return err
 		return err
 	}
 	}
@@ -81,15 +95,19 @@ func runUpdate(dockerCli *command.DockerCli, flags *pflag.FlagSet, serviceID str
 	if sendAuth {
 	if sendAuth {
 		// Retrieve encoded auth token from the image reference
 		// Retrieve encoded auth token from the image reference
 		// This would be the old image if it didn't change in this update
 		// This would be the old image if it didn't change in this update
-		image := service.Spec.TaskTemplate.ContainerSpec.Image
+		image := spec.TaskTemplate.ContainerSpec.Image
 		encodedAuth, err := command.RetrieveAuthTokenFromImage(ctx, dockerCli, image)
 		encodedAuth, err := command.RetrieveAuthTokenFromImage(ctx, dockerCli, image)
 		if err != nil {
 		if err != nil {
 			return err
 			return err
 		}
 		}
 		updateOpts.EncodedRegistryAuth = encodedAuth
 		updateOpts.EncodedRegistryAuth = encodedAuth
+	} else if rollback {
+		updateOpts.RegistryAuthFrom = types.RegistryAuthFromPreviousSpec
+	} else {
+		updateOpts.RegistryAuthFrom = types.RegistryAuthFromSpec
 	}
 	}
 
 
-	err = apiClient.ServiceUpdate(ctx, service.ID, service.Version, service.Spec, updateOpts)
+	err = apiClient.ServiceUpdate(ctx, service.ID, service.Version, *spec, updateOpts)
 	if err != nil {
 	if err != nil {
 		return err
 		return err
 	}
 	}
@@ -111,6 +129,12 @@ func updateService(flags *pflag.FlagSet, spec *swarm.ServiceSpec) error {
 		}
 		}
 	}
 	}
 
 
+	updateFloat32 := func(flag string, field *float32) {
+		if flags.Changed(flag) {
+			*field, _ = flags.GetFloat32(flag)
+		}
+	}
+
 	updateDuration := func(flag string, field *time.Duration) {
 	updateDuration := func(flag string, field *time.Duration) {
 		if flags.Changed(flag) {
 		if flags.Changed(flag) {
 			*field, _ = flags.GetDuration(flag)
 			*field, _ = flags.GetDuration(flag)
@@ -195,13 +219,15 @@ func updateService(flags *pflag.FlagSet, spec *swarm.ServiceSpec) error {
 		return err
 		return err
 	}
 	}
 
 
-	if anyChanged(flags, flagUpdateParallelism, flagUpdateDelay, flagUpdateFailureAction) {
+	if anyChanged(flags, flagUpdateParallelism, flagUpdateDelay, flagUpdateMonitor, flagUpdateFailureAction, flagUpdateMaxFailureRatio) {
 		if spec.UpdateConfig == nil {
 		if spec.UpdateConfig == nil {
 			spec.UpdateConfig = &swarm.UpdateConfig{}
 			spec.UpdateConfig = &swarm.UpdateConfig{}
 		}
 		}
 		updateUint64(flagUpdateParallelism, &spec.UpdateConfig.Parallelism)
 		updateUint64(flagUpdateParallelism, &spec.UpdateConfig.Parallelism)
 		updateDuration(flagUpdateDelay, &spec.UpdateConfig.Delay)
 		updateDuration(flagUpdateDelay, &spec.UpdateConfig.Delay)
+		updateDuration(flagUpdateMonitor, &spec.UpdateConfig.Monitor)
 		updateString(flagUpdateFailureAction, &spec.UpdateConfig.FailureAction)
 		updateString(flagUpdateFailureAction, &spec.UpdateConfig.FailureAction)
+		updateFloat32(flagUpdateMaxFailureRatio, &spec.UpdateConfig.MaxFailureRatio)
 	}
 	}
 
 
 	if flags.Changed(flagEndpointMode) {
 	if flags.Changed(flagEndpointMode) {

+ 4 - 0
client/service_update.go

@@ -22,6 +22,10 @@ func (cli *Client) ServiceUpdate(ctx context.Context, serviceID string, version
 		}
 		}
 	}
 	}
 
 
+	if options.RegistryAuthFrom != "" {
+		query.Set("registryAuthFrom", options.RegistryAuthFrom)
+	}
+
 	query.Set("version", strconv.FormatUint(version.Index, 10))
 	query.Set("version", strconv.FormatUint(version.Index, 10))
 
 
 	resp, err := cli.post(ctx, "/services/"+serviceID+"/update", query, service, headers)
 	resp, err := cli.post(ctx, "/services/"+serviceID+"/update", query, service, headers)

+ 3 - 0
contrib/completion/bash/docker

@@ -1809,9 +1809,12 @@ _docker_service_update() {
 		--restart-delay
 		--restart-delay
 		--restart-max-attempts
 		--restart-max-attempts
 		--restart-window
 		--restart-window
+		--rollback
 		--stop-grace-period
 		--stop-grace-period
 		--update-delay
 		--update-delay
 		--update-failure-action
 		--update-failure-action
+		--update-max-failure-ratio
+		--update-monitor
 		--update-parallelism
 		--update-parallelism
 		--user -u
 		--user -u
 		--workdir -w
 		--workdir -w

+ 3 - 0
contrib/completion/zsh/_docker

@@ -1108,6 +1108,8 @@ __docker_service_subcommand() {
         "($help)--stop-grace-period=[Time to wait before force killing a container]:grace period: "
         "($help)--stop-grace-period=[Time to wait before force killing a container]:grace period: "
         "($help)--update-delay=[Delay between updates]:delay: "
         "($help)--update-delay=[Delay between updates]:delay: "
         "($help)--update-failure-action=[Action on update failure]:mode:(pause continue)"
         "($help)--update-failure-action=[Action on update failure]:mode:(pause continue)"
+        "($help)--update-max-failure-ratio=[Failure rate to tolerate during an update]:fraction: "
+        "($help)--update-monitor=[Duration after each task update to monitor for failure]:window: "
         "($help)--update-parallelism=[Maximum number of tasks updated simultaneously]:number: "
         "($help)--update-parallelism=[Maximum number of tasks updated simultaneously]:number: "
         "($help -u --user)"{-u=,--user=}"[Username or UID]:user:_users"
         "($help -u --user)"{-u=,--user=}"[Username or UID]:user:_users"
         "($help)--with-registry-auth[Send registry authentication details to swarm agents]"
         "($help)--with-registry-auth[Send registry authentication details to swarm agents]"
@@ -1185,6 +1187,7 @@ __docker_service_subcommand() {
                 "($help)*--container-label-rm=[Remove a container label by its key]:label: " \
                 "($help)*--container-label-rm=[Remove a container label by its key]:label: " \
                 "($help)*--group-rm=[Remove previously added user groups from the container]:group:_groups" \
                 "($help)*--group-rm=[Remove previously added user groups from the container]:group:_groups" \
                 "($help)--image=[Service image tag]:image:__docker_repositories" \
                 "($help)--image=[Service image tag]:image:__docker_repositories" \
+                "($help)--rollback[Rollback to previous specification]" \
                 "($help -)1:service:__docker_complete_services" && ret=0
                 "($help -)1:service:__docker_complete_services" && ret=0
             ;;
             ;;
         (help)
         (help)

+ 13 - 2
daemon/cluster/cluster.go

@@ -913,7 +913,7 @@ func (c *Cluster) GetService(input string) (types.Service, error) {
 }
 }
 
 
 // UpdateService updates existing service to match new properties.
 // UpdateService updates existing service to match new properties.
-func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec types.ServiceSpec, encodedAuth string) error {
+func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec types.ServiceSpec, encodedAuth string, registryAuthFrom string) error {
 	c.RLock()
 	c.RLock()
 	defer c.RUnlock()
 	defer c.RUnlock()
 
 
@@ -948,7 +948,18 @@ func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec typ
 	} else {
 	} else {
 		// this is needed because if the encodedAuth isn't being updated then we
 		// this is needed because if the encodedAuth isn't being updated then we
 		// shouldn't lose it, and continue to use the one that was already present
 		// shouldn't lose it, and continue to use the one that was already present
-		ctnr := currentService.Spec.Task.GetContainer()
+		var ctnr *swarmapi.ContainerSpec
+		switch registryAuthFrom {
+		case apitypes.RegistryAuthFromSpec, "":
+			ctnr = currentService.Spec.Task.GetContainer()
+		case apitypes.RegistryAuthFromPreviousSpec:
+			if currentService.PreviousSpec == nil {
+				return fmt.Errorf("service does not have a previous spec")
+			}
+			ctnr = currentService.PreviousSpec.Task.GetContainer()
+		default:
+			return fmt.Errorf("unsupported registryAuthFromValue")
+		}
 		if ctnr == nil {
 		if ctnr == nil {
 			return fmt.Errorf("service does not use container tasks")
 			return fmt.Errorf("service does not use container tasks")
 		}
 		}

+ 75 - 57
daemon/cluster/convert/service.go

@@ -12,8 +12,43 @@ import (
 
 
 // ServiceFromGRPC converts a grpc Service to a Service.
 // ServiceFromGRPC converts a grpc Service to a Service.
 func ServiceFromGRPC(s swarmapi.Service) types.Service {
 func ServiceFromGRPC(s swarmapi.Service) types.Service {
-	spec := s.Spec
-	containerConfig := spec.Task.Runtime.(*swarmapi.TaskSpec_Container).Container
+	service := types.Service{
+		ID:           s.ID,
+		Spec:         *serviceSpecFromGRPC(&s.Spec),
+		PreviousSpec: serviceSpecFromGRPC(s.PreviousSpec),
+
+		Endpoint: endpointFromGRPC(s.Endpoint),
+	}
+
+	// Meta
+	service.Version.Index = s.Meta.Version.Index
+	service.CreatedAt, _ = ptypes.Timestamp(s.Meta.CreatedAt)
+	service.UpdatedAt, _ = ptypes.Timestamp(s.Meta.UpdatedAt)
+
+	// UpdateStatus
+	service.UpdateStatus = types.UpdateStatus{}
+	if s.UpdateStatus != nil {
+		switch s.UpdateStatus.State {
+		case swarmapi.UpdateStatus_UPDATING:
+			service.UpdateStatus.State = types.UpdateStateUpdating
+		case swarmapi.UpdateStatus_PAUSED:
+			service.UpdateStatus.State = types.UpdateStatePaused
+		case swarmapi.UpdateStatus_COMPLETED:
+			service.UpdateStatus.State = types.UpdateStateCompleted
+		}
+
+		service.UpdateStatus.StartedAt, _ = ptypes.Timestamp(s.UpdateStatus.StartedAt)
+		service.UpdateStatus.CompletedAt, _ = ptypes.Timestamp(s.UpdateStatus.CompletedAt)
+		service.UpdateStatus.Message = s.UpdateStatus.Message
+	}
+
+	return service
+}
+
+func serviceSpecFromGRPC(spec *swarmapi.ServiceSpec) *types.ServiceSpec {
+	if spec == nil {
+		return nil
+	}
 
 
 	serviceNetworks := make([]types.NetworkAttachmentConfig, 0, len(spec.Networks))
 	serviceNetworks := make([]types.NetworkAttachmentConfig, 0, len(spec.Networks))
 	for _, n := range spec.Networks {
 	for _, n := range spec.Networks {
@@ -25,78 +60,57 @@ func ServiceFromGRPC(s swarmapi.Service) types.Service {
 		taskNetworks = append(taskNetworks, types.NetworkAttachmentConfig{Target: n.Target, Aliases: n.Aliases})
 		taskNetworks = append(taskNetworks, types.NetworkAttachmentConfig{Target: n.Target, Aliases: n.Aliases})
 	}
 	}
 
 
-	service := types.Service{
-		ID: s.ID,
-
-		Spec: types.ServiceSpec{
-			TaskTemplate: types.TaskSpec{
-				ContainerSpec: containerSpecFromGRPC(containerConfig),
-				Resources:     resourcesFromGRPC(s.Spec.Task.Resources),
-				RestartPolicy: restartPolicyFromGRPC(s.Spec.Task.Restart),
-				Placement:     placementFromGRPC(s.Spec.Task.Placement),
-				LogDriver:     driverFromGRPC(s.Spec.Task.LogDriver),
-				Networks:      taskNetworks,
-			},
-
-			Networks:     serviceNetworks,
-			EndpointSpec: endpointSpecFromGRPC(s.Spec.Endpoint),
+	containerConfig := spec.Task.Runtime.(*swarmapi.TaskSpec_Container).Container
+	convertedSpec := &types.ServiceSpec{
+		Annotations: types.Annotations{
+			Name:   spec.Annotations.Name,
+			Labels: spec.Annotations.Labels,
 		},
 		},
-		Endpoint: endpointFromGRPC(s.Endpoint),
-	}
 
 
-	// Meta
-	service.Version.Index = s.Meta.Version.Index
-	service.CreatedAt, _ = ptypes.Timestamp(s.Meta.CreatedAt)
-	service.UpdatedAt, _ = ptypes.Timestamp(s.Meta.UpdatedAt)
+		TaskTemplate: types.TaskSpec{
+			ContainerSpec: containerSpecFromGRPC(containerConfig),
+			Resources:     resourcesFromGRPC(spec.Task.Resources),
+			RestartPolicy: restartPolicyFromGRPC(spec.Task.Restart),
+			Placement:     placementFromGRPC(spec.Task.Placement),
+			LogDriver:     driverFromGRPC(spec.Task.LogDriver),
+			Networks:      taskNetworks,
+		},
 
 
-	// Annotations
-	service.Spec.Name = s.Spec.Annotations.Name
-	service.Spec.Labels = s.Spec.Annotations.Labels
+		Networks:     serviceNetworks,
+		EndpointSpec: endpointSpecFromGRPC(spec.Endpoint),
+	}
 
 
 	// UpdateConfig
 	// UpdateConfig
-	if s.Spec.Update != nil {
-		service.Spec.UpdateConfig = &types.UpdateConfig{
-			Parallelism: s.Spec.Update.Parallelism,
+	if spec.Update != nil {
+		convertedSpec.UpdateConfig = &types.UpdateConfig{
+			Parallelism:     spec.Update.Parallelism,
+			MaxFailureRatio: spec.Update.MaxFailureRatio,
 		}
 		}
 
 
-		service.Spec.UpdateConfig.Delay, _ = ptypes.Duration(&s.Spec.Update.Delay)
+		convertedSpec.UpdateConfig.Delay, _ = ptypes.Duration(&spec.Update.Delay)
+		if spec.Update.Monitor != nil {
+			convertedSpec.UpdateConfig.Monitor, _ = ptypes.Duration(spec.Update.Monitor)
+		}
 
 
-		switch s.Spec.Update.FailureAction {
+		switch spec.Update.FailureAction {
 		case swarmapi.UpdateConfig_PAUSE:
 		case swarmapi.UpdateConfig_PAUSE:
-			service.Spec.UpdateConfig.FailureAction = types.UpdateFailureActionPause
+			convertedSpec.UpdateConfig.FailureAction = types.UpdateFailureActionPause
 		case swarmapi.UpdateConfig_CONTINUE:
 		case swarmapi.UpdateConfig_CONTINUE:
-			service.Spec.UpdateConfig.FailureAction = types.UpdateFailureActionContinue
+			convertedSpec.UpdateConfig.FailureAction = types.UpdateFailureActionContinue
 		}
 		}
 	}
 	}
 
 
 	// Mode
 	// Mode
-	switch t := s.Spec.GetMode().(type) {
+	switch t := spec.GetMode().(type) {
 	case *swarmapi.ServiceSpec_Global:
 	case *swarmapi.ServiceSpec_Global:
-		service.Spec.Mode.Global = &types.GlobalService{}
+		convertedSpec.Mode.Global = &types.GlobalService{}
 	case *swarmapi.ServiceSpec_Replicated:
 	case *swarmapi.ServiceSpec_Replicated:
-		service.Spec.Mode.Replicated = &types.ReplicatedService{
+		convertedSpec.Mode.Replicated = &types.ReplicatedService{
 			Replicas: &t.Replicated.Replicas,
 			Replicas: &t.Replicated.Replicas,
 		}
 		}
 	}
 	}
 
 
-	// UpdateStatus
-	service.UpdateStatus = types.UpdateStatus{}
-	if s.UpdateStatus != nil {
-		switch s.UpdateStatus.State {
-		case swarmapi.UpdateStatus_UPDATING:
-			service.UpdateStatus.State = types.UpdateStateUpdating
-		case swarmapi.UpdateStatus_PAUSED:
-			service.UpdateStatus.State = types.UpdateStatePaused
-		case swarmapi.UpdateStatus_COMPLETED:
-			service.UpdateStatus.State = types.UpdateStateCompleted
-		}
-
-		service.UpdateStatus.StartedAt, _ = ptypes.Timestamp(s.UpdateStatus.StartedAt)
-		service.UpdateStatus.CompletedAt, _ = ptypes.Timestamp(s.UpdateStatus.CompletedAt)
-		service.UpdateStatus.Message = s.UpdateStatus.Message
-	}
-
-	return service
+	return convertedSpec
 }
 }
 
 
 // ServiceSpecToGRPC converts a ServiceSpec to a grpc ServiceSpec.
 // ServiceSpecToGRPC converts a ServiceSpec to a grpc ServiceSpec.
@@ -158,9 +172,13 @@ func ServiceSpecToGRPC(s types.ServiceSpec) (swarmapi.ServiceSpec, error) {
 			return swarmapi.ServiceSpec{}, fmt.Errorf("unrecongized update failure action %s", s.UpdateConfig.FailureAction)
 			return swarmapi.ServiceSpec{}, fmt.Errorf("unrecongized update failure action %s", s.UpdateConfig.FailureAction)
 		}
 		}
 		spec.Update = &swarmapi.UpdateConfig{
 		spec.Update = &swarmapi.UpdateConfig{
-			Parallelism:   s.UpdateConfig.Parallelism,
-			Delay:         *ptypes.DurationProto(s.UpdateConfig.Delay),
-			FailureAction: failureAction,
+			Parallelism:     s.UpdateConfig.Parallelism,
+			Delay:           *ptypes.DurationProto(s.UpdateConfig.Delay),
+			FailureAction:   failureAction,
+			MaxFailureRatio: s.UpdateConfig.MaxFailureRatio,
+		}
+		if s.UpdateConfig.Monitor != 0 {
+			spec.Update.Monitor = ptypes.DurationProto(s.UpdateConfig.Monitor)
 		}
 		}
 	}
 	}
 
 

+ 1 - 0
docs/reference/api/docker_remote_api.md

@@ -129,6 +129,7 @@ This section lists each version from latest to oldest.  Each listing includes a
 * `GET /containers/json` now supports a `is-task` filter to filter
 * `GET /containers/json` now supports a `is-task` filter to filter
   containers that are tasks (part of a service in swarm mode).
   containers that are tasks (part of a service in swarm mode).
 * `POST /containers/create` now takes `StopTimeout` field.
 * `POST /containers/create` now takes `StopTimeout` field.
+* `POST /services/create` and `POST /services/(id or name)/update` now accept `Monitor` and `MaxFailureRatio` parameters, which control the response to failures during service updates.
 
 
 ### v1.24 API changes
 ### v1.24 API changes
 
 

+ 23 - 7
docs/reference/api/docker_remote_api_v1.25.md

@@ -4877,7 +4877,9 @@ List services
           },
           },
           "UpdateConfig": {
           "UpdateConfig": {
             "Parallelism": 1,
             "Parallelism": 1,
-            "FailureAction": "pause"
+            "FailureAction": "pause",
+            "Monitor": 15000000000,
+            "MaxFailureRatio": 0.15
           },
           },
           "EndpointSpec": {
           "EndpointSpec": {
             "Mode": "vip",
             "Mode": "vip",
@@ -5077,8 +5079,8 @@ image](#create-an-image) section for more details.
     - **RestartPolicy** – Specification for the restart policy which applies to containers created
     - **RestartPolicy** – Specification for the restart policy which applies to containers created
       as part of this service.
       as part of this service.
         - **Condition** – Condition for restart (`none`, `on-failure`, or `any`).
         - **Condition** – Condition for restart (`none`, `on-failure`, or `any`).
-        - **Delay** – Delay between restart attempts.
-        - **Attempts** – Maximum attempts to restart a given container before giving up (default value
+        - **Delay** – Delay between restart attempts, in nanoseconds.
+        - **MaxAttempts** – Maximum attempts to restart a given container before giving up (default value
           is 0, which is ignored).
           is 0, which is ignored).
         - **Window** – Windows is the time window used to evaluate the restart policy (default value is
         - **Window** – Windows is the time window used to evaluate the restart policy (default value is
           0, which is unbounded).
           0, which is unbounded).
@@ -5087,9 +5089,12 @@ image](#create-an-image) section for more details.
 - **UpdateConfig** – Specification for the update strategy of the service.
 - **UpdateConfig** – Specification for the update strategy of the service.
     - **Parallelism** – Maximum number of tasks to be updated in one iteration (0 means unlimited
     - **Parallelism** – Maximum number of tasks to be updated in one iteration (0 means unlimited
       parallelism).
       parallelism).
-    - **Delay** – Amount of time between updates.
+    - **Delay** – Amount of time between updates, in nanoseconds.
     - **FailureAction** - Action to take if an updated task fails to run, or stops running during the
     - **FailureAction** - Action to take if an updated task fails to run, or stops running during the
       update. Values are `continue` and `pause`.
       update. Values are `continue` and `pause`.
+    - **Monitor** - Amount of time to monitor each updated task for failures, in nanoseconds.
+    - **MaxFailureRatio** - The fraction of tasks that may fail during an update before the
+      failure action is invoked, specified as a floating point number between 0 and 1. The default is 0.
 - **Networks** – Array of network names or IDs to attach the service to.
 - **Networks** – Array of network names or IDs to attach the service to.
 - **EndpointSpec** – Properties that can be configured to access and load balance a service.
 - **EndpointSpec** – Properties that can be configured to access and load balance a service.
     - **Mode** – The mode of resolution to use for internal load balancing
     - **Mode** – The mode of resolution to use for internal load balancing
@@ -5259,7 +5264,9 @@ image](#create-an-image) section for more details.
         }
         }
       },
       },
       "UpdateConfig": {
       "UpdateConfig": {
-        "Parallelism": 1
+        "Parallelism": 1,
+        "Monitor": 15000000000,
+        "MaxFailureRatio": 0.15
       },
       },
       "EndpointSpec": {
       "EndpointSpec": {
         "Mode": "vip"
         "Mode": "vip"
@@ -5314,7 +5321,7 @@ image](#create-an-image) section for more details.
     - **RestartPolicy** – Specification for the restart policy which applies to containers created
     - **RestartPolicy** – Specification for the restart policy which applies to containers created
       as part of this service.
       as part of this service.
         - **Condition** – Condition for restart (`none`, `on-failure`, or `any`).
         - **Condition** – Condition for restart (`none`, `on-failure`, or `any`).
-        - **Delay** – Delay between restart attempts.
+        - **Delay** – Delay between restart attempts, in nanoseconds.
         - **MaxAttempts** – Maximum attempts to restart a given container before giving up (default value
         - **MaxAttempts** – Maximum attempts to restart a given container before giving up (default value
           is 0, which is ignored).
           is 0, which is ignored).
         - **Window** – Windows is the time window used to evaluate the restart policy (default value is
         - **Window** – Windows is the time window used to evaluate the restart policy (default value is
@@ -5324,7 +5331,12 @@ image](#create-an-image) section for more details.
 - **UpdateConfig** – Specification for the update strategy of the service.
 - **UpdateConfig** – Specification for the update strategy of the service.
     - **Parallelism** – Maximum number of tasks to be updated in one iteration (0 means unlimited
     - **Parallelism** – Maximum number of tasks to be updated in one iteration (0 means unlimited
       parallelism).
       parallelism).
-    - **Delay** – Amount of time between updates.
+    - **Delay** – Amount of time between updates, in nanoseconds.
+    - **FailureAction** - Action to take if an updated task fails to run, or stops running during the
+      update. Values are `continue` and `pause`.
+    - **Monitor** - Amount of time to monitor each updated task for failures, in nanoseconds.
+    - **MaxFailureRatio** - The fraction of tasks that may fail during an update before the
+      failure action is invoked, specified as a floating point number between 0 and 1. The default is 0.
 - **Networks** – Array of network names or IDs to attach the service to.
 - **Networks** – Array of network names or IDs to attach the service to.
 - **EndpointSpec** – Properties that can be configured to access and load balance a service.
 - **EndpointSpec** – Properties that can be configured to access and load balance a service.
     - **Mode** – The mode of resolution to use for internal load balancing
     - **Mode** – The mode of resolution to use for internal load balancing
@@ -5338,6 +5350,10 @@ image](#create-an-image) section for more details.
 
 
 - **version** – The version number of the service object being updated. This is
 - **version** – The version number of the service object being updated. This is
   required to avoid conflicting writes.
   required to avoid conflicting writes.
+- **registryAuthFrom** - If the X-Registry-Auth header is not specified, this
+  parameter indicates where to find registry authorization credentials. The
+  valid values are `spec` and `previous-spec`. If unspecified, the default is
+  `spec`.
 
 
 **Request Headers**:
 **Request Headers**:
 
 

+ 32 - 30
docs/reference/commandline/service_create.md

@@ -12,36 +12,38 @@ Usage:  docker service create [OPTIONS] IMAGE [COMMAND] [ARG...]
 Create a new service
 Create a new service
 
 
 Options:
 Options:
-      --constraint value               Placement constraints (default [])
-      --container-label value          Service container labels (default [])
-      --endpoint-mode string           Endpoint mode (vip or dnsrr)
-  -e, --env value                      Set environment variables (default [])
-      --group-add value                Add additional user groups to the container (default [])
-      --help                           Print usage
-  -l, --label value                    Service labels (default [])
-      --limit-cpu value                Limit CPUs (default 0.000)
-      --limit-memory value             Limit Memory (default 0 B)
-      --log-driver string              Logging driver for service
-      --log-opt value                  Logging driver options (default [])
-      --mode string                    Service mode (replicated or global) (default "replicated")
-      --mount value                    Attach a mount to the service
-      --name string                    Service name
-      --network value                  Network attachments (default [])
-  -p, --publish value                  Publish a port as a node port (default [])
-      --replicas value                 Number of tasks (default none)
-      --reserve-cpu value              Reserve CPUs (default 0.000)
-      --reserve-memory value           Reserve Memory (default 0 B)
-      --restart-condition string       Restart when condition is met (none, on-failure, or any)
-      --restart-delay value            Delay between restart attempts (default none)
-      --restart-max-attempts value     Maximum number of restarts before giving up (default none)
-      --restart-window value           Window used to evaluate the restart policy (default none)
-      --stop-grace-period value        Time to wait before force killing a container (default none)
-      --update-delay duration          Delay between updates
-      --update-failure-action string   Action on update failure (pause|continue) (default "pause")
-      --update-parallelism uint        Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
-  -u, --user string                    Username or UID (format: <name|uid>[:<group|gid>])
-      --with-registry-auth             Send registry authentication details to Swarm agents
-  -w, --workdir string                 Working directory inside the container
+      --constraint value                 Placement constraints (default [])
+      --container-label value            Service container labels (default [])
+      --endpoint-mode string             Endpoint mode (vip or dnsrr)
+  -e, --env value                        Set environment variables (default [])
+      --group-add value                  Add additional user groups to the container (default [])
+      --help                             Print usage
+  -l, --label value                      Service labels (default [])
+      --limit-cpu value                  Limit CPUs (default 0.000)
+      --limit-memory value               Limit Memory (default 0 B)
+      --log-driver string                Logging driver for service
+      --log-opt value                    Logging driver options (default [])
+      --mode string                      Service mode (replicated or global) (default "replicated")
+      --mount value                      Attach a mount to the service
+      --name string                      Service name
+      --network value                    Network attachments (default [])
+  -p, --publish value                    Publish a port as a node port (default [])
+      --replicas value                   Number of tasks (default none)
+      --reserve-cpu value                Reserve CPUs (default 0.000)
+      --reserve-memory value             Reserve Memory (default 0 B)
+      --restart-condition string         Restart when condition is met (none, on-failure, or any)
+      --restart-delay value              Delay between restart attempts (default none)
+      --restart-max-attempts value       Maximum number of restarts before giving up (default none)
+      --restart-window value             Window used to evaluate the restart policy (default none)
+      --stop-grace-period value          Time to wait before force killing a container (default none)
+      --update-delay duration            Delay between updates
+      --update-failure-action string     Action on update failure (pause|continue) (default "pause")
+      --update-max-failure-ratio value   Failure rate to tolerate during an update
+      --update-monitor duration          Duration after each task update to monitor for failure (default 0s)
+      --update-parallelism uint          Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
+  -u, --user string                      Username or UID (format: <name|uid>[:<group|gid>])
+      --with-registry-auth               Send registry authentication details to Swarm agents
+  -w, --workdir string                   Working directory inside the container
 ```
 ```
 
 
 Creates a service as described by the specified parameters. You must run this
 Creates a service as described by the specified parameters. You must run this

+ 40 - 37
docs/reference/commandline/service_update.md

@@ -12,43 +12,46 @@ Usage:  docker service update [OPTIONS] SERVICE
 Update a service
 Update a service
 
 
 Options:
 Options:
-      --args string                    Service command args
-      --constraint-add value           Add or update placement constraints (default [])
-      --constraint-rm value            Remove a constraint (default [])
-      --container-label-add value      Add or update container labels (default [])
-      --container-label-rm value       Remove a container label by its key (default [])
-      --endpoint-mode string           Endpoint mode (vip or dnsrr)
-      --env-add value                  Add or update environment variables (default [])
-      --env-rm value                   Remove an environment variable (default [])
-      --group-add value                Add additional user groups to the container (default [])
-      --group-rm value                 Remove previously added user groups from the container (default [])
-      --help                           Print usage
-      --image string                   Service image tag
-      --label-add value                Add or update service labels (default [])
-      --label-rm value                 Remove a label by its key (default [])
-      --limit-cpu value                Limit CPUs (default 0.000)
-      --limit-memory value             Limit Memory (default 0 B)
-      --log-driver string              Logging driver for service
-      --log-opt value                  Logging driver options (default [])
-      --mount-add value                Add or update a mount on a service
-      --mount-rm value                 Remove a mount by its target path (default [])
-      --name string                    Service name
-      --publish-add value              Add or update a published port (default [])
-      --publish-rm value               Remove a published port by its target port (default [])
-      --replicas value                 Number of tasks (default none)
-      --reserve-cpu value              Reserve CPUs (default 0.000)
-      --reserve-memory value           Reserve Memory (default 0 B)
-      --restart-condition string       Restart when condition is met (none, on-failure, or any)
-      --restart-delay value            Delay between restart attempts (default none)
-      --restart-max-attempts value     Maximum number of restarts before giving up (default none)
-      --restart-window value           Window used to evaluate the restart policy (default none)
-      --stop-grace-period value        Time to wait before force killing a container (default none)
-      --update-delay duration          Delay between updates
-      --update-failure-action string   Action on update failure (pause|continue) (default "pause")
-      --update-parallelism uint        Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
-  -u, --user string                    Username or UID (format: <name|uid>[:<group|gid>])
-      --with-registry-auth             Send registry authentication details to Swarm agents
-  -w, --workdir string                 Working directory inside the container
+      --args string                      Service command args
+      --constraint-add value             Add or update placement constraints (default [])
+      --constraint-rm value              Remove a constraint (default [])
+      --container-label-add value        Add or update container labels (default [])
+      --container-label-rm value         Remove a container label by its key (default [])
+      --endpoint-mode string             Endpoint mode (vip or dnsrr)
+      --env-add value                    Add or update environment variables (default [])
+      --env-rm value                     Remove an environment variable (default [])
+      --group-add value                  Add additional user groups to the container (default [])
+      --group-rm value                   Remove previously added user groups from the container (default [])
+      --help                             Print usage
+      --image string                     Service image tag
+      --label-add value                  Add or update service labels (default [])
+      --label-rm value                   Remove a label by its key (default [])
+      --limit-cpu value                  Limit CPUs (default 0.000)
+      --limit-memory value               Limit Memory (default 0 B)
+      --log-driver string                Logging driver for service
+      --log-opt value                    Logging driver options (default [])
+      --mount-add value                  Add or update a mount on a service
+      --mount-rm value                   Remove a mount by its target path (default [])
+      --name string                      Service name
+      --publish-add value                Add or update a published port (default [])
+      --publish-rm value                 Remove a published port by its target port (default [])
+      --replicas value                   Number of tasks (default none)
+      --reserve-cpu value                Reserve CPUs (default 0.000)
+      --reserve-memory value             Reserve Memory (default 0 B)
+      --restart-condition string         Restart when condition is met (none, on-failure, or any)
+      --restart-delay value              Delay between restart attempts (default none)
+      --restart-max-attempts value       Maximum number of restarts before giving up (default none)
+      --restart-window value             Window used to evaluate the restart policy (default none)
+      --rollback                         Rollback to previous specification
+      --stop-grace-period value          Time to wait before force killing a container (default none)
+      --update-delay duration            Delay between updates
+      --update-failure-action string     Action on update failure (pause|continue) (default "pause")
+      --update-max-failure-ratio value   Failure rate to tolerate during an update
+      --update-monitor duration          Duration after each task update to monitor for failure (default 0s)
+      --update-parallelism uint          Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
+  -u, --user string                      Username or UID (format: <name|uid>[:<group|gid>])
+      --with-registry-auth               Send registry authentication details to Swarm agents
+  -w, --workdir string                   Working directory inside the container
 ```
 ```
 
 
 Updates a service as described by the specified parameters. This command has to be run targeting a manager node.
 Updates a service as described by the specified parameters. This command has to be run targeting a manager node.

+ 11 - 4
integration-cli/daemon_swarm.go

@@ -139,8 +139,8 @@ func (d *SwarmDaemon) getServiceTasks(c *check.C, service string) []swarm.Task {
 	return tasks
 	return tasks
 }
 }
 
 
-func (d *SwarmDaemon) checkServiceRunningTasks(c *check.C, service string) func(*check.C) (interface{}, check.CommentInterface) {
-	return func(*check.C) (interface{}, check.CommentInterface) {
+func (d *SwarmDaemon) checkServiceRunningTasks(service string) func(*check.C) (interface{}, check.CommentInterface) {
+	return func(c *check.C) (interface{}, check.CommentInterface) {
 		tasks := d.getServiceTasks(c, service)
 		tasks := d.getServiceTasks(c, service)
 		var runningCount int
 		var runningCount int
 		for _, task := range tasks {
 		for _, task := range tasks {
@@ -152,8 +152,15 @@ func (d *SwarmDaemon) checkServiceRunningTasks(c *check.C, service string) func(
 	}
 	}
 }
 }
 
 
-func (d *SwarmDaemon) checkServiceTasks(c *check.C, service string) func(*check.C) (interface{}, check.CommentInterface) {
-	return func(*check.C) (interface{}, check.CommentInterface) {
+func (d *SwarmDaemon) checkServiceUpdateState(service string) func(*check.C) (interface{}, check.CommentInterface) {
+	return func(c *check.C) (interface{}, check.CommentInterface) {
+		service := d.getService(c, service)
+		return service.UpdateStatus.State, nil
+	}
+}
+
+func (d *SwarmDaemon) checkServiceTasks(service string) func(*check.C) (interface{}, check.CommentInterface) {
+	return func(c *check.C) (interface{}, check.CommentInterface) {
 		tasks := d.getServiceTasks(c, service)
 		tasks := d.getServiceTasks(c, service)
 		return len(tasks), nil
 		return len(tasks), nil
 	}
 	}

+ 83 - 8
integration-cli/docker_api_swarm_test.go

@@ -310,6 +310,63 @@ func (s *DockerSwarmSuite) TestAPISwarmServicesUpdate(c *check.C) {
 	// 3nd batch
 	// 3nd batch
 	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
 	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
 		map[string]int{image2: instances})
 		map[string]int{image2: instances})
+
+	// Roll back to the previous version. This uses the CLI because
+	// rollback is a client-side operation.
+	out, err := daemons[0].Cmd("service", "update", "--rollback", id)
+	c.Assert(err, checker.IsNil, check.Commentf(out))
+
+	// first batch
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
+		map[string]int{image2: instances - parallelism, image1: parallelism})
+
+	// 2nd batch
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
+		map[string]int{image2: instances - 2*parallelism, image1: 2 * parallelism})
+
+	// 3nd batch
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
+		map[string]int{image1: instances})
+}
+
+func (s *DockerSwarmSuite) TestApiSwarmServicesFailedUpdate(c *check.C) {
+	const nodeCount = 3
+	var daemons [nodeCount]*SwarmDaemon
+	for i := 0; i < nodeCount; i++ {
+		daemons[i] = s.AddDaemon(c, true, i == 0)
+	}
+	// wait for nodes ready
+	waitAndAssert(c, 5*time.Second, daemons[0].checkNodeReadyCount, checker.Equals, nodeCount)
+
+	// service image at start
+	image1 := "busybox:latest"
+	// target image in update
+	image2 := "busybox:badtag"
+
+	// create service
+	instances := 5
+	id := daemons[0].createService(c, serviceForUpdate, setInstances(instances))
+
+	// wait for tasks ready
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
+		map[string]int{image1: instances})
+
+	// issue service update
+	service := daemons[0].getService(c, id)
+	daemons[0].updateService(c, service, setImage(image2), setFailureAction(swarm.UpdateFailureActionPause), setMaxFailureRatio(0.25), setParallelism(1))
+
+	// should update 2 tasks and then pause
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceUpdateState(id), checker.Equals, swarm.UpdateStatePaused)
+	v, _ := daemons[0].checkServiceRunningTasks(id)(c)
+	c.Assert(v, checker.Equals, instances-2)
+
+	// Roll back to the previous version. This uses the CLI because
+	// rollback is a client-side operation.
+	out, err := daemons[0].Cmd("service", "update", "--rollback", id)
+	c.Assert(err, checker.IsNil, check.Commentf(out))
+
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
+		map[string]int{image1: instances})
 }
 }
 
 
 func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintRole(c *check.C) {
 func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintRole(c *check.C) {
@@ -326,7 +383,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintRole(c *check.C) {
 	instances := 3
 	instances := 3
 	id := daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	id := daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	// wait for tasks ready
 	// wait for tasks ready
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
 	// validate tasks are running on worker nodes
 	// validate tasks are running on worker nodes
 	tasks := daemons[0].getServiceTasks(c, id)
 	tasks := daemons[0].getServiceTasks(c, id)
 	for _, task := range tasks {
 	for _, task := range tasks {
@@ -340,7 +397,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintRole(c *check.C) {
 	constraints = []string{"node.role!=worker"}
 	constraints = []string{"node.role!=worker"}
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	// wait for tasks ready
 	// wait for tasks ready
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
 	tasks = daemons[0].getServiceTasks(c, id)
 	tasks = daemons[0].getServiceTasks(c, id)
 	// validate tasks are running on manager nodes
 	// validate tasks are running on manager nodes
 	for _, task := range tasks {
 	for _, task := range tasks {
@@ -354,7 +411,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintRole(c *check.C) {
 	constraints = []string{"node.role==nosuchrole"}
 	constraints = []string{"node.role==nosuchrole"}
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	// wait for tasks created
 	// wait for tasks created
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(c, id), checker.Equals, instances)
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(id), checker.Equals, instances)
 	// let scheduler try
 	// let scheduler try
 	time.Sleep(250 * time.Millisecond)
 	time.Sleep(250 * time.Millisecond)
 	// validate tasks are not assigned to any node
 	// validate tasks are not assigned to any node
@@ -394,7 +451,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
 	constraints := []string{"node.labels.security==high"}
 	constraints := []string{"node.labels.security==high"}
 	id := daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	id := daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	// wait for tasks ready
 	// wait for tasks ready
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
 	tasks := daemons[0].getServiceTasks(c, id)
 	tasks := daemons[0].getServiceTasks(c, id)
 	// validate all tasks are running on nodes[0]
 	// validate all tasks are running on nodes[0]
 	for _, task := range tasks {
 	for _, task := range tasks {
@@ -407,7 +464,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
 	constraints = []string{"node.labels.security!=high"}
 	constraints = []string{"node.labels.security!=high"}
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	// wait for tasks ready
 	// wait for tasks ready
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
 	tasks = daemons[0].getServiceTasks(c, id)
 	tasks = daemons[0].getServiceTasks(c, id)
 	// validate all tasks are NOT running on nodes[0]
 	// validate all tasks are NOT running on nodes[0]
 	for _, task := range tasks {
 	for _, task := range tasks {
@@ -419,7 +476,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
 	constraints = []string{"node.labels.security==medium"}
 	constraints = []string{"node.labels.security==medium"}
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	// wait for tasks created
 	// wait for tasks created
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(c, id), checker.Equals, instances)
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(id), checker.Equals, instances)
 	// let scheduler try
 	// let scheduler try
 	time.Sleep(250 * time.Millisecond)
 	time.Sleep(250 * time.Millisecond)
 	tasks = daemons[0].getServiceTasks(c, id)
 	tasks = daemons[0].getServiceTasks(c, id)
@@ -437,7 +494,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
 	}
 	}
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
 	// wait for tasks created
 	// wait for tasks created
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(c, id), checker.Equals, instances)
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(id), checker.Equals, instances)
 	// let scheduler try
 	// let scheduler try
 	time.Sleep(250 * time.Millisecond)
 	time.Sleep(250 * time.Millisecond)
 	tasks = daemons[0].getServiceTasks(c, id)
 	tasks = daemons[0].getServiceTasks(c, id)
@@ -452,7 +509,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
 		}
 		}
 	})
 	})
 	// wait for tasks ready
 	// wait for tasks ready
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
 	tasks = daemons[0].getServiceTasks(c, id)
 	tasks = daemons[0].getServiceTasks(c, id)
 	for _, task := range tasks {
 	for _, task := range tasks {
 		c.Assert(task.NodeID, checker.Equals, nodes[1].ID)
 		c.Assert(task.NodeID, checker.Equals, nodes[1].ID)
@@ -1022,6 +1079,24 @@ func setImage(image string) serviceConstructor {
 	}
 	}
 }
 }
 
 
+func setFailureAction(failureAction string) serviceConstructor {
+	return func(s *swarm.Service) {
+		s.Spec.UpdateConfig.FailureAction = failureAction
+	}
+}
+
+func setMaxFailureRatio(maxFailureRatio float32) serviceConstructor {
+	return func(s *swarm.Service) {
+		s.Spec.UpdateConfig.MaxFailureRatio = maxFailureRatio
+	}
+}
+
+func setParallelism(parallelism uint64) serviceConstructor {
+	return func(s *swarm.Service) {
+		s.Spec.UpdateConfig.Parallelism = parallelism
+	}
+}
+
 func setConstraints(constraints []string) serviceConstructor {
 func setConstraints(constraints []string) serviceConstructor {
 	return func(s *swarm.Service) {
 	return func(s *swarm.Service) {
 		if s.Spec.TaskTemplate.Placement == nil {
 		if s.Spec.TaskTemplate.Placement == nil {

+ 1 - 1
integration-cli/docker_cli_swarm_test.go

@@ -349,7 +349,7 @@ func (s *DockerSwarmSuite) TestPsListContainersFilterIsTask(c *check.C) {
 	c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
 	c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
 
 
 	// make sure task has been deployed.
 	// make sure task has been deployed.
-	waitAndAssert(c, defaultReconciliationTimeout, d.checkServiceRunningTasks(c, name), checker.Equals, 1)
+	waitAndAssert(c, defaultReconciliationTimeout, d.checkServiceRunningTasks(name), checker.Equals, 1)
 
 
 	// Filter non-tasks
 	// Filter non-tasks
 	out, err = d.Cmd("ps", "-a", "-q", "--filter=is-task=false")
 	out, err = d.Cmd("ps", "-a", "-q", "--filter=is-task=false")