Merge pull request #31108 from aaronlehmann/automatic-rollback

Automatic service rollback on failure
This commit is contained in:
Yong Tang 2017-03-03 18:47:14 -08:00 committed by GitHub
commit 338565805c
17 changed files with 523 additions and 254 deletions

View file

@ -19,7 +19,7 @@ type Backend interface {
GetServices(basictypes.ServiceListOptions) ([]types.Service, error)
GetService(string) (types.Service, error)
CreateService(types.ServiceSpec, string) (*basictypes.ServiceCreateResponse, error)
UpdateService(string, uint64, types.ServiceSpec, string, string) (*basictypes.ServiceUpdateResponse, error)
UpdateService(string, uint64, types.ServiceSpec, basictypes.ServiceUpdateOptions) (*basictypes.ServiceUpdateResponse, error)
RemoveService(string) error
ServiceLogs(context.Context, string, *backend.ContainerLogsConfig, chan struct{}) error
GetNodes(basictypes.NodeListOptions) ([]types.Node, error)

View file

@ -192,12 +192,14 @@ func (sr *swarmRouter) updateService(ctx context.Context, w http.ResponseWriter,
return errors.NewBadRequestError(err)
}
var flags basictypes.ServiceUpdateOptions
// Get returns "" if the header does not exist
encodedAuth := r.Header.Get("X-Registry-Auth")
flags.EncodedRegistryAuth = r.Header.Get("X-Registry-Auth")
flags.RegistryAuthFrom = r.URL.Query().Get("registryAuthFrom")
flags.Rollback = r.URL.Query().Get("rollback")
registryAuthFrom := r.URL.Query().Get("registryAuthFrom")
resp, err := sr.backend.UpdateService(vars["id"], version, service, encodedAuth, registryAuthFrom)
resp, err := sr.backend.UpdateService(vars["id"], version, service, flags)
if err != nil {
logrus.Errorf("Error updating service %s: %v", vars["id"], err)
return err

View file

@ -2256,6 +2256,7 @@ definitions:
enum:
- "continue"
- "pause"
- "rollback"
Monitor:
description: "Amount of time to monitor each updated task for failures, in nanoseconds."
type: "integer"
@ -2264,6 +2265,32 @@ definitions:
description: "The fraction of tasks that may fail during an update before the failure action is invoked, specified as a floating point number between 0 and 1."
type: "number"
default: 0
RollbackConfig:
description: "Specification for the rollback strategy of the service."
type: "object"
properties:
Parallelism:
description: "Maximum number of tasks to be rolled back in one iteration (0 means unlimited parallelism)."
type: "integer"
format: "int64"
Delay:
description: "Amount of time between rollback iterations, in nanoseconds."
type: "integer"
format: "int64"
FailureAction:
description: "Action to take if an rolled back task fails to run, or stops running during the rollback."
type: "string"
enum:
- "continue"
- "pause"
Monitor:
description: "Amount of time to monitor each rolled back task for failures, in nanoseconds."
type: "integer"
format: "int64"
MaxFailureRatio:
description: "The fraction of tasks that may fail during a rollback before the failure action is invoked, specified as a floating point number between 0 and 1."
type: "number"
default: 0
Networks:
description: "Array of network names or IDs to attach the service to."
type: "array"
@ -2386,6 +2413,13 @@ definitions:
Replicas: 1
UpdateConfig:
Parallelism: 1
Delay: 1000000000
FailureAction: "pause"
Monitor: 15000000000
MaxFailureRatio: 0.15
RollbackConfig:
Parallelism: 1
Delay: 1000000000
FailureAction: "pause"
Monitor: 15000000000
MaxFailureRatio: 0.15
@ -7435,9 +7469,17 @@ paths:
Replicated:
Replicas: 4
UpdateConfig:
Delay: 30000000000
Parallelism: 2
Delay: 1000000000
FailureAction: "pause"
Monitor: 15000000000
MaxFailureRatio: 0.15
RollbackConfig:
Parallelism: 1
Delay: 1000000000
FailureAction: "pause"
Monitor: 15000000000
MaxFailureRatio: 0.15
EndpointSpec:
Ports:
-
@ -7563,7 +7605,15 @@ paths:
Replicated:
Replicas: 1
UpdateConfig:
Parallelism: 2
Delay: 1000000000
FailureAction: "pause"
Monitor: 15000000000
MaxFailureRatio: 0.15
RollbackConfig:
Parallelism: 1
Delay: 1000000000
FailureAction: "pause"
Monitor: 15000000000
MaxFailureRatio: 0.15
EndpointSpec:
@ -7581,6 +7631,12 @@ paths:
parameter indicates where to find registry authorization credentials. The
valid values are `spec` and `previous-spec`."
default: "spec"
- name: "rollback"
in: "query"
type: "string"
description: "Set to this parameter to `previous` to cause a
server-side rollback to the previous service spec. The supplied spec will be
ignored in this case."
- name: "X-Registry-Auth"
in: "header"
description: "A base64-encoded auth configuration for pulling from private registries. [See the authentication section for details.](#section/Authentication)"

View file

@ -320,6 +320,12 @@ type ServiceUpdateOptions struct {
// credentials if they are not given in EncodedRegistryAuth. Valid
// values are "spec" and "previous-spec".
RegistryAuthFrom string
// Rollback indicates whether a server-side rollback should be
// performed. When this is set, the provided spec will be ignored.
// The valid values are "previous" and "none". An empty value is the
// same as "none".
Rollback string
}
// ServiceListOptions holds parameters to list services with.

View file

@ -18,9 +18,10 @@ type ServiceSpec struct {
// TaskTemplate defines how the service should construct new tasks when
// orchestrating this service.
TaskTemplate TaskSpec `json:",omitempty"`
Mode ServiceMode `json:",omitempty"`
UpdateConfig *UpdateConfig `json:",omitempty"`
TaskTemplate TaskSpec `json:",omitempty"`
Mode ServiceMode `json:",omitempty"`
UpdateConfig *UpdateConfig `json:",omitempty"`
RollbackConfig *UpdateConfig `json:",omitempty"`
// Networks field in ServiceSpec is deprecated. The
// same field in TaskSpec should be used instead.
@ -45,6 +46,12 @@ const (
UpdateStatePaused UpdateState = "paused"
// UpdateStateCompleted is the completed state.
UpdateStateCompleted UpdateState = "completed"
// UpdateStateRollbackStarted is the state with a rollback in progress.
UpdateStateRollbackStarted UpdateState = "rollback_started"
// UpdateStateRollbackPaused is the state with a rollback in progress.
UpdateStateRollbackPaused UpdateState = "rollback_paused"
// UpdateStateRollbackCompleted is the state with a rollback in progress.
UpdateStateRollbackCompleted UpdateState = "rollback_completed"
)
// UpdateStatus reports the status of a service update.
@ -68,6 +75,8 @@ const (
UpdateFailureActionPause = "pause"
// UpdateFailureActionContinue CONTINUE
UpdateFailureActionContinue = "continue"
// UpdateFailureActionRollback ROLLBACK
UpdateFailureActionRollback = "rollback"
)
// UpdateConfig represents the update configuration.

View file

@ -57,6 +57,18 @@ UpdateConfig:
{{- end }}
Max failure ratio: {{ .UpdateMaxFailureRatio }}
{{- end }}
{{- if .HasRollbackConfig }}
RollbackConfig:
Parallelism: {{ .RollbackParallelism }}
{{- if .HasRollbackDelay}}
Delay: {{ .RollbackDelay }}
{{- end }}
On failure: {{ .RollbackOnFailure }}
{{- if .HasRollbackMonitor}}
Monitoring Period: {{ .RollbackMonitor }}
{{- end }}
Max failure ratio: {{ .RollbackMaxFailureRatio }}
{{- end }}
ContainerSpec:
Image: {{ .ContainerImage }}
{{- if .ContainerArgs }}
@ -259,6 +271,38 @@ func (ctx *serviceInspectContext) UpdateMaxFailureRatio() float32 {
return ctx.Service.Spec.UpdateConfig.MaxFailureRatio
}
func (ctx *serviceInspectContext) HasRollbackConfig() bool {
return ctx.Service.Spec.RollbackConfig != nil
}
func (ctx *serviceInspectContext) RollbackParallelism() uint64 {
return ctx.Service.Spec.RollbackConfig.Parallelism
}
func (ctx *serviceInspectContext) HasRollbackDelay() bool {
return ctx.Service.Spec.RollbackConfig.Delay.Nanoseconds() > 0
}
func (ctx *serviceInspectContext) RollbackDelay() time.Duration {
return ctx.Service.Spec.RollbackConfig.Delay
}
func (ctx *serviceInspectContext) RollbackOnFailure() string {
return ctx.Service.Spec.RollbackConfig.FailureAction
}
func (ctx *serviceInspectContext) HasRollbackMonitor() bool {
return ctx.Service.Spec.RollbackConfig.Monitor.Nanoseconds() > 0
}
func (ctx *serviceInspectContext) RollbackMonitor() time.Duration {
return ctx.Service.Spec.RollbackConfig.Monitor
}
func (ctx *serviceInspectContext) RollbackMaxFailureRatio() float32 {
return ctx.Service.Spec.RollbackConfig.MaxFailureRatio
}
func (ctx *serviceInspectContext) ContainerImage() string {
return ctx.Service.Spec.TaskTemplate.ContainerSpec.Image
}

View file

@ -49,7 +49,6 @@ func formatServiceInspect(t *testing.T, format formatter.Format, now time.Time)
Replicas: &two,
},
},
UpdateConfig: nil,
Networks: []swarm.NetworkAttachmentConfig{
{
Target: "5vpyomhb6ievnk0i0o60gcnei",

View file

@ -165,6 +165,16 @@ type updateOptions struct {
maxFailureRatio floatValue
}
func (opts updateOptions) config() *swarm.UpdateConfig {
return &swarm.UpdateConfig{
Parallelism: opts.parallelism,
Delay: opts.delay,
Monitor: opts.monitor,
FailureAction: opts.onFailure,
MaxFailureRatio: opts.maxFailureRatio.Value(),
}
}
type resourceOptions struct {
limitCPU opts.NanoCPUs
limitMemBytes opts.MemBytes
@ -328,6 +338,7 @@ type serviceOptions struct {
constraints opts.ListOpts
placementPrefs placementPrefOpts
update updateOptions
rollback updateOptions
networks opts.ListOpts
endpoint endpointOptions
@ -445,16 +456,11 @@ func (opts *serviceOptions) ToService() (swarm.ServiceSpec, error) {
},
LogDriver: opts.logDriver.toLogDriver(),
},
Networks: convertNetworks(opts.networks.GetAll()),
Mode: serviceMode,
UpdateConfig: &swarm.UpdateConfig{
Parallelism: opts.update.parallelism,
Delay: opts.update.delay,
Monitor: opts.update.monitor,
FailureAction: opts.update.onFailure,
MaxFailureRatio: opts.update.maxFailureRatio.Value(),
},
EndpointSpec: opts.endpoint.ToEndpointSpec(),
Networks: convertNetworks(opts.networks.GetAll()),
Mode: serviceMode,
UpdateConfig: opts.update.config(),
RollbackConfig: opts.rollback.config(),
EndpointSpec: opts.endpoint.ToEndpointSpec(),
}
return service, nil
@ -487,10 +493,21 @@ func addServiceFlags(cmd *cobra.Command, opts *serviceOptions) {
flags.DurationVar(&opts.update.delay, flagUpdateDelay, time.Duration(0), "Delay between updates (ns|us|ms|s|m|h) (default 0s)")
flags.DurationVar(&opts.update.monitor, flagUpdateMonitor, time.Duration(0), "Duration after each task update to monitor for failure (ns|us|ms|s|m|h) (default 0s)")
flags.SetAnnotation(flagUpdateMonitor, "version", []string{"1.25"})
flags.StringVar(&opts.update.onFailure, flagUpdateFailureAction, "pause", `Action on update failure ("pause"|"continue")`)
flags.StringVar(&opts.update.onFailure, flagUpdateFailureAction, "pause", `Action on update failure ("pause"|"continue"|"rollback")`)
flags.Var(&opts.update.maxFailureRatio, flagUpdateMaxFailureRatio, "Failure rate to tolerate during an update")
flags.SetAnnotation(flagUpdateMaxFailureRatio, "version", []string{"1.25"})
flags.Uint64Var(&opts.rollback.parallelism, flagRollbackParallelism, 1, "Maximum number of tasks rolled back simultaneously (0 to roll back all at once)")
flags.SetAnnotation(flagRollbackParallelism, "version", []string{"1.27"})
flags.DurationVar(&opts.rollback.delay, flagRollbackDelay, time.Duration(0), "Delay between task rollbacks (ns|us|ms|s|m|h) (default 0s)")
flags.SetAnnotation(flagRollbackDelay, "version", []string{"1.27"})
flags.DurationVar(&opts.rollback.monitor, flagRollbackMonitor, time.Duration(0), "Duration after each task rollback to monitor for failure (ns|us|ms|s|m|h) (default 0s)")
flags.SetAnnotation(flagRollbackMonitor, "version", []string{"1.27"})
flags.StringVar(&opts.rollback.onFailure, flagRollbackFailureAction, "pause", `Action on rollback failure ("pause"|"continue")`)
flags.SetAnnotation(flagRollbackFailureAction, "version", []string{"1.27"})
flags.Var(&opts.rollback.maxFailureRatio, flagRollbackMaxFailureRatio, "Failure rate to tolerate during a rollback")
flags.SetAnnotation(flagRollbackMaxFailureRatio, "version", []string{"1.27"})
flags.StringVar(&opts.endpoint.mode, flagEndpointMode, "vip", "Endpoint mode (vip or dnsrr)")
flags.BoolVar(&opts.registryAuth, flagRegistryAuth, false, "Send registry authentication details to swarm agents")
@ -520,77 +537,82 @@ func addServiceFlags(cmd *cobra.Command, opts *serviceOptions) {
}
const (
flagPlacementPref = "placement-pref"
flagPlacementPrefAdd = "placement-pref-add"
flagPlacementPrefRemove = "placement-pref-rm"
flagConstraint = "constraint"
flagConstraintRemove = "constraint-rm"
flagConstraintAdd = "constraint-add"
flagContainerLabel = "container-label"
flagContainerLabelRemove = "container-label-rm"
flagContainerLabelAdd = "container-label-add"
flagDNS = "dns"
flagDNSRemove = "dns-rm"
flagDNSAdd = "dns-add"
flagDNSOption = "dns-option"
flagDNSOptionRemove = "dns-option-rm"
flagDNSOptionAdd = "dns-option-add"
flagDNSSearch = "dns-search"
flagDNSSearchRemove = "dns-search-rm"
flagDNSSearchAdd = "dns-search-add"
flagEndpointMode = "endpoint-mode"
flagHost = "host"
flagHostAdd = "host-add"
flagHostRemove = "host-rm"
flagHostname = "hostname"
flagEnv = "env"
flagEnvFile = "env-file"
flagEnvRemove = "env-rm"
flagEnvAdd = "env-add"
flagGroup = "group"
flagGroupAdd = "group-add"
flagGroupRemove = "group-rm"
flagLabel = "label"
flagLabelRemove = "label-rm"
flagLabelAdd = "label-add"
flagLimitCPU = "limit-cpu"
flagLimitMemory = "limit-memory"
flagMode = "mode"
flagMount = "mount"
flagMountRemove = "mount-rm"
flagMountAdd = "mount-add"
flagName = "name"
flagNetwork = "network"
flagPublish = "publish"
flagPublishRemove = "publish-rm"
flagPublishAdd = "publish-add"
flagReadOnly = "read-only"
flagReplicas = "replicas"
flagReserveCPU = "reserve-cpu"
flagReserveMemory = "reserve-memory"
flagRestartCondition = "restart-condition"
flagRestartDelay = "restart-delay"
flagRestartMaxAttempts = "restart-max-attempts"
flagRestartWindow = "restart-window"
flagStopGracePeriod = "stop-grace-period"
flagStopSignal = "stop-signal"
flagTTY = "tty"
flagUpdateDelay = "update-delay"
flagUpdateFailureAction = "update-failure-action"
flagUpdateMaxFailureRatio = "update-max-failure-ratio"
flagUpdateMonitor = "update-monitor"
flagUpdateParallelism = "update-parallelism"
flagUser = "user"
flagWorkdir = "workdir"
flagRegistryAuth = "with-registry-auth"
flagLogDriver = "log-driver"
flagLogOpt = "log-opt"
flagHealthCmd = "health-cmd"
flagHealthInterval = "health-interval"
flagHealthRetries = "health-retries"
flagHealthTimeout = "health-timeout"
flagNoHealthcheck = "no-healthcheck"
flagSecret = "secret"
flagSecretAdd = "secret-add"
flagSecretRemove = "secret-rm"
flagPlacementPref = "placement-pref"
flagPlacementPrefAdd = "placement-pref-add"
flagPlacementPrefRemove = "placement-pref-rm"
flagConstraint = "constraint"
flagConstraintRemove = "constraint-rm"
flagConstraintAdd = "constraint-add"
flagContainerLabel = "container-label"
flagContainerLabelRemove = "container-label-rm"
flagContainerLabelAdd = "container-label-add"
flagDNS = "dns"
flagDNSRemove = "dns-rm"
flagDNSAdd = "dns-add"
flagDNSOption = "dns-option"
flagDNSOptionRemove = "dns-option-rm"
flagDNSOptionAdd = "dns-option-add"
flagDNSSearch = "dns-search"
flagDNSSearchRemove = "dns-search-rm"
flagDNSSearchAdd = "dns-search-add"
flagEndpointMode = "endpoint-mode"
flagHost = "host"
flagHostAdd = "host-add"
flagHostRemove = "host-rm"
flagHostname = "hostname"
flagEnv = "env"
flagEnvFile = "env-file"
flagEnvRemove = "env-rm"
flagEnvAdd = "env-add"
flagGroup = "group"
flagGroupAdd = "group-add"
flagGroupRemove = "group-rm"
flagLabel = "label"
flagLabelRemove = "label-rm"
flagLabelAdd = "label-add"
flagLimitCPU = "limit-cpu"
flagLimitMemory = "limit-memory"
flagMode = "mode"
flagMount = "mount"
flagMountRemove = "mount-rm"
flagMountAdd = "mount-add"
flagName = "name"
flagNetwork = "network"
flagPublish = "publish"
flagPublishRemove = "publish-rm"
flagPublishAdd = "publish-add"
flagReadOnly = "read-only"
flagReplicas = "replicas"
flagReserveCPU = "reserve-cpu"
flagReserveMemory = "reserve-memory"
flagRestartCondition = "restart-condition"
flagRestartDelay = "restart-delay"
flagRestartMaxAttempts = "restart-max-attempts"
flagRestartWindow = "restart-window"
flagRollbackDelay = "rollback-delay"
flagRollbackFailureAction = "rollback-failure-action"
flagRollbackMaxFailureRatio = "rollback-max-failure-ratio"
flagRollbackMonitor = "rollback-monitor"
flagRollbackParallelism = "rollback-parallelism"
flagStopGracePeriod = "stop-grace-period"
flagStopSignal = "stop-signal"
flagTTY = "tty"
flagUpdateDelay = "update-delay"
flagUpdateFailureAction = "update-failure-action"
flagUpdateMaxFailureRatio = "update-max-failure-ratio"
flagUpdateMonitor = "update-monitor"
flagUpdateParallelism = "update-parallelism"
flagUser = "user"
flagWorkdir = "workdir"
flagRegistryAuth = "with-registry-auth"
flagLogDriver = "log-driver"
flagLogOpt = "log-opt"
flagHealthCmd = "health-cmd"
flagHealthInterval = "health-interval"
flagHealthRetries = "health-retries"
flagHealthTimeout = "health-timeout"
flagNoHealthcheck = "no-healthcheck"
flagSecret = "secret"
flagSecretAdd = "secret-add"
flagSecretRemove = "secret-rm"
)

View file

@ -1,6 +1,7 @@
package service
import (
"errors"
"fmt"
"sort"
"strings"
@ -10,6 +11,7 @@ import (
"github.com/docker/docker/api/types/container"
mounttypes "github.com/docker/docker/api/types/mount"
"github.com/docker/docker/api/types/swarm"
"github.com/docker/docker/api/types/versions"
"github.com/docker/docker/cli"
"github.com/docker/docker/cli/command"
"github.com/docker/docker/client"
@ -95,7 +97,6 @@ func newListOptsVar() *opts.ListOpts {
func runUpdate(dockerCli *command.DockerCli, flags *pflag.FlagSet, serviceID string) error {
apiClient := dockerCli.Client()
ctx := context.Background()
updateOpts := types.ServiceUpdateOptions{}
service, _, err := apiClient.ServiceInspectWithRaw(ctx, serviceID)
if err != nil {
@ -107,12 +108,44 @@ func runUpdate(dockerCli *command.DockerCli, flags *pflag.FlagSet, serviceID str
return err
}
// There are two ways to do user-requested rollback. The old way is
// client-side, but with a sufficiently recent daemon we prefer
// server-side, because it will honor the rollback parameters.
var (
clientSideRollback bool
serverSideRollback bool
)
spec := &service.Spec
if rollback {
spec = service.PreviousSpec
if spec == nil {
return fmt.Errorf("service does not have a previous specification to roll back to")
// Rollback can't be combined with other flags.
otherFlagsPassed := false
flags.VisitAll(func(f *pflag.Flag) {
if f.Name == "rollback" {
return
}
if flags.Changed(f.Name) {
otherFlagsPassed = true
}
})
if otherFlagsPassed {
return errors.New("other flags may not be combined with --rollback")
}
if versions.LessThan(dockerCli.Client().ClientVersion(), "1.27") {
clientSideRollback = true
spec = service.PreviousSpec
if spec == nil {
return fmt.Errorf("service does not have a previous specification to roll back to")
}
} else {
serverSideRollback = true
}
}
updateOpts := types.ServiceUpdateOptions{}
if serverSideRollback {
updateOpts.Rollback = "previous"
}
err = updateService(flags, spec)
@ -147,7 +180,7 @@ func runUpdate(dockerCli *command.DockerCli, flags *pflag.FlagSet, serviceID str
return err
}
updateOpts.EncodedRegistryAuth = encodedAuth
} else if rollback {
} else if clientSideRollback {
updateOpts.RegistryAuthFrom = types.RegistryAuthFromPreviousSpec
} else {
updateOpts.RegistryAuthFrom = types.RegistryAuthFromSpec
@ -289,6 +322,17 @@ func updateService(flags *pflag.FlagSet, spec *swarm.ServiceSpec) error {
updateFloatValue(flagUpdateMaxFailureRatio, &spec.UpdateConfig.MaxFailureRatio)
}
if anyChanged(flags, flagRollbackParallelism, flagRollbackDelay, flagRollbackMonitor, flagRollbackFailureAction, flagRollbackMaxFailureRatio) {
if spec.RollbackConfig == nil {
spec.RollbackConfig = &swarm.UpdateConfig{}
}
updateUint64(flagRollbackParallelism, &spec.RollbackConfig.Parallelism)
updateDuration(flagRollbackDelay, &spec.RollbackConfig.Delay)
updateDuration(flagRollbackMonitor, &spec.RollbackConfig.Monitor)
updateString(flagRollbackFailureAction, &spec.RollbackConfig.FailureAction)
updateFloatValue(flagRollbackMaxFailureRatio, &spec.RollbackConfig.MaxFailureRatio)
}
if flags.Changed(flagEndpointMode) {
value, _ := flags.GetString(flagEndpointMode)
if spec.EndpointSpec == nil {

View file

@ -27,6 +27,10 @@ func (cli *Client) ServiceUpdate(ctx context.Context, serviceID string, version
query.Set("registryAuthFrom", options.RegistryAuthFrom)
}
if options.Rollback != "" {
query.Set("rollback", options.Rollback)
}
query.Set("version", strconv.FormatUint(version.Index, 10))
var response types.ServiceUpdateResponse

View file

@ -35,6 +35,12 @@ func ServiceFromGRPC(s swarmapi.Service) types.Service {
service.UpdateStatus.State = types.UpdateStatePaused
case swarmapi.UpdateStatus_COMPLETED:
service.UpdateStatus.State = types.UpdateStateCompleted
case swarmapi.UpdateStatus_ROLLBACK_STARTED:
service.UpdateStatus.State = types.UpdateStateRollbackStarted
case swarmapi.UpdateStatus_ROLLBACK_PAUSED:
service.UpdateStatus.State = types.UpdateStateRollbackPaused
case swarmapi.UpdateStatus_ROLLBACK_COMPLETED:
service.UpdateStatus.State = types.UpdateStateRollbackCompleted
}
startedAt, _ := gogotypes.TimestampFromProto(s.UpdateStatus.StartedAt)
@ -86,24 +92,8 @@ func serviceSpecFromGRPC(spec *swarmapi.ServiceSpec) *types.ServiceSpec {
}
// UpdateConfig
if spec.Update != nil {
convertedSpec.UpdateConfig = &types.UpdateConfig{
Parallelism: spec.Update.Parallelism,
MaxFailureRatio: spec.Update.MaxFailureRatio,
}
convertedSpec.UpdateConfig.Delay = spec.Update.Delay
if spec.Update.Monitor != nil {
convertedSpec.UpdateConfig.Monitor, _ = gogotypes.DurationFromProto(spec.Update.Monitor)
}
switch spec.Update.FailureAction {
case swarmapi.UpdateConfig_PAUSE:
convertedSpec.UpdateConfig.FailureAction = types.UpdateFailureActionPause
case swarmapi.UpdateConfig_CONTINUE:
convertedSpec.UpdateConfig.FailureAction = types.UpdateFailureActionContinue
}
}
convertedSpec.UpdateConfig = updateConfigFromGRPC(spec.Update)
convertedSpec.RollbackConfig = updateConfigFromGRPC(spec.Rollback)
// Mode
switch t := spec.GetMode().(type) {
@ -180,25 +170,13 @@ func ServiceSpecToGRPC(s types.ServiceSpec) (swarmapi.ServiceSpec, error) {
}
}
if s.UpdateConfig != nil {
var failureAction swarmapi.UpdateConfig_FailureAction
switch s.UpdateConfig.FailureAction {
case types.UpdateFailureActionPause, "":
failureAction = swarmapi.UpdateConfig_PAUSE
case types.UpdateFailureActionContinue:
failureAction = swarmapi.UpdateConfig_CONTINUE
default:
return swarmapi.ServiceSpec{}, fmt.Errorf("unrecognized update failure action %s", s.UpdateConfig.FailureAction)
}
spec.Update = &swarmapi.UpdateConfig{
Parallelism: s.UpdateConfig.Parallelism,
Delay: s.UpdateConfig.Delay,
FailureAction: failureAction,
MaxFailureRatio: s.UpdateConfig.MaxFailureRatio,
}
if s.UpdateConfig.Monitor != 0 {
spec.Update.Monitor = gogotypes.DurationProto(s.UpdateConfig.Monitor)
}
spec.Update, err = updateConfigToGRPC(s.UpdateConfig)
if err != nil {
return swarmapi.ServiceSpec{}, err
}
spec.Rollback, err = updateConfigToGRPC(s.RollbackConfig)
if err != nil {
return swarmapi.ServiceSpec{}, err
}
if s.EndpointSpec != nil {
@ -405,3 +383,58 @@ func driverToGRPC(p *types.Driver) *swarmapi.Driver {
Options: p.Options,
}
}
func updateConfigFromGRPC(updateConfig *swarmapi.UpdateConfig) *types.UpdateConfig {
if updateConfig == nil {
return nil
}
converted := &types.UpdateConfig{
Parallelism: updateConfig.Parallelism,
MaxFailureRatio: updateConfig.MaxFailureRatio,
}
converted.Delay = updateConfig.Delay
if updateConfig.Monitor != nil {
converted.Monitor, _ = gogotypes.DurationFromProto(updateConfig.Monitor)
}
switch updateConfig.FailureAction {
case swarmapi.UpdateConfig_PAUSE:
converted.FailureAction = types.UpdateFailureActionPause
case swarmapi.UpdateConfig_CONTINUE:
converted.FailureAction = types.UpdateFailureActionContinue
case swarmapi.UpdateConfig_ROLLBACK:
converted.FailureAction = types.UpdateFailureActionRollback
}
return converted
}
func updateConfigToGRPC(updateConfig *types.UpdateConfig) (*swarmapi.UpdateConfig, error) {
if updateConfig == nil {
return nil, nil
}
converted := &swarmapi.UpdateConfig{
Parallelism: updateConfig.Parallelism,
Delay: updateConfig.Delay,
MaxFailureRatio: updateConfig.MaxFailureRatio,
}
switch updateConfig.FailureAction {
case types.UpdateFailureActionPause, "":
converted.FailureAction = swarmapi.UpdateConfig_PAUSE
case types.UpdateFailureActionContinue:
converted.FailureAction = swarmapi.UpdateConfig_CONTINUE
case types.UpdateFailureActionRollback:
converted.FailureAction = swarmapi.UpdateConfig_ROLLBACK
default:
return nil, fmt.Errorf("unrecongized update failure action %s", updateConfig.FailureAction)
}
if updateConfig.Monitor != 0 {
converted.Monitor = gogotypes.DurationProto(updateConfig.Monitor)
}
return converted, nil
}

View file

@ -132,7 +132,7 @@ func (c *Cluster) CreateService(s types.ServiceSpec, encodedAuth string) (*apity
}
// UpdateService updates existing service to match new properties.
func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec types.ServiceSpec, encodedAuth string, registryAuthFrom string) (*apitypes.ServiceUpdateResponse, error) {
func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec types.ServiceSpec, flags apitypes.ServiceUpdateOptions) (*apitypes.ServiceUpdateResponse, error) {
var resp *apitypes.ServiceUpdateResponse
err := c.lockedManagerAction(func(ctx context.Context, state nodeState) error {
@ -157,13 +157,14 @@ func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec typ
return errors.New("service does not use container tasks")
}
encodedAuth := flags.EncodedRegistryAuth
if encodedAuth != "" {
newCtnr.PullOptions = &swarmapi.ContainerSpec_PullOptions{RegistryAuth: encodedAuth}
} else {
// this is needed because if the encodedAuth isn't being updated then we
// shouldn't lose it, and continue to use the one that was already present
var ctnr *swarmapi.ContainerSpec
switch registryAuthFrom {
switch flags.RegistryAuthFrom {
case apitypes.RegistryAuthFromSpec, "":
ctnr = currentService.Spec.Task.GetContainer()
case apitypes.RegistryAuthFromPreviousSpec:
@ -208,6 +209,16 @@ func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec typ
}
}
var rollback swarmapi.UpdateServiceRequest_Rollback
switch flags.Rollback {
case "", "none":
rollback = swarmapi.UpdateServiceRequest_NONE
case "previous":
rollback = swarmapi.UpdateServiceRequest_PREVIOUS
default:
return fmt.Errorf("unrecognized rollback option %s", flags.Rollback)
}
_, err = state.controlClient.UpdateService(
ctx,
&swarmapi.UpdateServiceRequest{
@ -216,6 +227,7 @@ func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec typ
ServiceVersion: &swarmapi.Version{
Index: version,
},
Rollback: rollback,
},
)
return err

View file

@ -22,6 +22,8 @@ keywords: "API, Docker, rcli, REST, documentation"
* `GET /containers/json` now supports `publish` and `expose` filters to filter containers that expose or publish certain ports.
* `POST /services/create` and `POST /services/(id or name)/update` now accept the `ReadOnly` parameter, which mounts the container's root filesystem as read only.
* `POST /build` now accepts `extrahosts` parameter to specify a host to ip mapping to use during the build.
* `POST /services/create` and `POST /services/(id or name)/update` now accept a `rollback` value for `FailureAction`.
* `POST /services/create` and `POST /services/(id or name)/update` now accept an optional `RollbackConfig` object which specifies rollback options.
## v1.26 API changes

View file

@ -21,54 +21,61 @@ Usage: docker service create [OPTIONS] IMAGE [COMMAND] [ARG...]
Create a new service
Options:
--constraint list Placement constraints (default [])
--container-label list Container labels (default [])
--dns list Set custom DNS servers (default [])
--dns-option list Set DNS options (default [])
--dns-search list Set custom DNS search domains (default [])
--endpoint-mode string Endpoint mode ("vip"|"dnsrr") (default "vip")
-e, --env list Set environment variables (default [])
--env-file list Read in a file of environment variables (default [])
--group list Set one or more supplementary user groups for the container (default [])
--health-cmd string Command to run to check health
--health-interval duration Time between running the check (ns|us|ms|s|m|h)
--health-retries int Consecutive failures needed to report unhealthy
--health-timeout duration Maximum time to allow one check to run (ns|us|ms|s|m|h)
--help Print usage
--host list Set one or more custom host-to-IP mappings (host:ip) (default [])
--hostname string Container hostname
-l, --label list Service labels (default [])
--limit-cpu decimal Limit CPUs (default 0.000)
--limit-memory bytes Limit Memory
--log-driver string Logging driver for service
--log-opt list Logging driver options (default [])
--mode string Service mode (replicated or global) (default "replicated")
--mount mount Attach a filesystem mount to the service
--name string Service name
--network list Network attachments (default [])
--no-healthcheck Disable any container-specified HEALTHCHECK
--placement-pref pref Add a placement preference
-p, --publish port Publish a port as a node port
--read-only Mount the container's root filesystem as read only
--replicas uint Number of tasks
--reserve-cpu decimal Reserve CPUs (default 0.000)
--reserve-memory bytes Reserve Memory
--restart-condition string Restart when condition is met ("none"|"on-failure"|"any")
--restart-delay duration Delay between restart attempts (ns|us|ms|s|m|h)
--restart-max-attempts uint Maximum number of restarts before giving up
--restart-window duration Window used to evaluate the restart policy (ns|us|ms|s|m|h)
--secret secret Specify secrets to expose to the service
--stop-grace-period duration Time to wait before force killing a container (ns|us|ms|s|m|h)
--stop-signal string Signal to stop the container
-t, --tty Allocate a pseudo-TTY
--update-delay duration Delay between updates (ns|us|ms|s|m|h) (default 0s)
--update-failure-action string Action on update failure ("pause"|"continue") (default "pause")
--update-max-failure-ratio float Failure rate to tolerate during an update
--update-monitor duration Duration after each task update to monitor for failure (ns|us|ms|s|m|h) (default 0s)
--update-parallelism uint Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
-u, --user string Username or UID (format: <name|uid>[:<group|gid>])
--with-registry-auth Send registry authentication details to swarm agents
-w, --workdir string Working directory inside the container
--constraint list Placement constraints (default [])
--container-label list Container labels (default [])
--dns list Set custom DNS servers (default [])
--dns-option list Set DNS options (default [])
--dns-search list Set custom DNS search domains (default [])
--endpoint-mode string Endpoint mode ("vip"|"dnsrr") (default "vip")
-e, --env list Set environment variables (default [])
--env-file list Read in a file of environment variables (default [])
--group list Set one or more supplementary user groups for the container (default [])
--health-cmd string Command to run to check health
--health-interval duration Time between running the check (ns|us|ms|s|m|h)
--health-retries int Consecutive failures needed to report unhealthy
--health-timeout duration Maximum time to allow one check to run (ns|us|ms|s|m|h)
--help Print usage
--host list Set one or more custom host-to-IP mappings (host:ip) (default [])
--hostname string Container hostname
-l, --label list Service labels (default [])
--limit-cpu decimal Limit CPUs (default 0.000)
--limit-memory bytes Limit Memory
--log-driver string Logging driver for service
--log-opt list Logging driver options (default [])
--mode string Service mode (replicated or global) (default "replicated")
--mount mount Attach a filesystem mount to the service
--name string Service name
--network list Network attachments (default [])
--no-healthcheck Disable any container-specified HEALTHCHECK
--placement-pref pref Add a placement preference
-p, --publish port Publish a port as a node port
--read-only Mount the container's root filesystem as read only
--replicas uint Number of tasks
--reserve-cpu decimal Reserve CPUs (default 0.000)
--reserve-memory bytes Reserve Memory
--restart-condition string Restart when condition is met ("none"|"on-failure"|"any")
--restart-delay duration Delay between restart attempts (ns|us|ms|s|m|h)
--restart-max-attempts uint Maximum number of restarts before giving up
--restart-window duration Window used to evaluate the restart policy (ns|us|ms|s|m|h)
--rollback-delay duration Delay between task rollbacks (ns|us|ms|s|m|h) (default 0s)
--rollback-failure-action string Action on rollback failure ("pause"|"continue") (default "pause")
--rollback-max-failure-ratio float Failure rate to tolerate during a rollback
--rollback-monitor duration Duration after each task rollback to monitor for failure
(ns|us|ms|s|m|h) (default 0s)
--rollback-parallelism uint Maximum number of tasks rolled back simultaneously (0 to roll
back all at once) (default 1)
--secret secret Specify secrets to expose to the service
--stop-grace-period duration Time to wait before force killing a container (ns|us|ms|s|m|h)
--stop-signal string Signal to stop the container
-t, --tty Allocate a pseudo-TTY
--update-delay duration Delay between updates (ns|us|ms|s|m|h) (default 0s)
--update-failure-action string Action on update failure ("pause"|"continue"|"rollback") (default "pause")
--update-max-failure-ratio float Failure rate to tolerate during an update
--update-monitor duration Duration after each task update to monitor for failure (ns|us|ms|s|m|h) (default 0s)
--update-parallelism uint Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
-u, --user string Username or UID (format: <name|uid>[:<group|gid>])
--with-registry-auth Send registry authentication details to swarm agents
-w, --workdir string Working directory inside the container
```
## Description

View file

@ -21,67 +21,74 @@ Usage: docker service update [OPTIONS] SERVICE
Update a service
Options:
--args string Service command args
--constraint-add list Add or update a placement constraint (default [])
--constraint-rm list Remove a constraint (default [])
--container-label-add list Add or update a container label (default [])
--container-label-rm list Remove a container label by its key (default [])
--dns-add list Add or update a custom DNS server (default [])
--dns-option-add list Add or update a DNS option (default [])
--dns-option-rm list Remove a DNS option (default [])
--dns-rm list Remove a custom DNS server (default [])
--dns-search-add list Add or update a custom DNS search domain (default [])
--dns-search-rm list Remove a DNS search domain (default [])
--endpoint-mode string Endpoint mode ("vip"|"dnsrr") (default "vip")
--env-add list Add or update an environment variable (default [])
--env-rm list Remove an environment variable (default [])
--force Force update even if no changes require it
--group-add list Add an additional supplementary user group to the container (default [])
--group-rm list Remove a previously added supplementary user group from the container (default [])
--health-cmd string Command to run to check health
--health-interval duration Time between running the check (ns|us|ms|s|m|h)
--health-retries int Consecutive failures needed to report unhealthy
--health-timeout duration Maximum time to allow one check to run (ns|us|ms|s|m|h)
--help Print usage
--host-add list Add or update a custom host-to-IP mapping (host:ip) (default [])
--host-rm list Remove a custom host-to-IP mapping (host:ip) (default [])
--hostname string Container hostname
--image string Service image tag
--label-add list Add or update a service label (default [])
--label-rm list Remove a label by its key (default [])
--limit-cpu decimal Limit CPUs (default 0.000)
--limit-memory bytes Limit Memory
--log-driver string Logging driver for service
--log-opt list Logging driver options (default [])
--mount-add mount Add or update a mount on a service
--mount-rm list Remove a mount by its target path (default [])
--no-healthcheck Disable any container-specified HEALTHCHECK
--placement-pref-add pref Add a placement preference
--placement-pref-rm pref Remove a placement preference
--publish-add port Add or update a published port
--publish-rm port Remove a published port by its target port
--read-only Mount the container's root filesystem as read only
--replicas uint Number of tasks
--reserve-cpu decimal Reserve CPUs (default 0.000)
--reserve-memory bytes Reserve Memory
--restart-condition string Restart when condition is met ("none"|"on-failure"|"any")
--restart-delay duration Delay between restart attempts (ns|us|ms|s|m|h)
--restart-max-attempts uint Maximum number of restarts before giving up
--restart-window duration Window used to evaluate the restart policy (ns|us|ms|s|m|h)
--rollback Rollback to previous specification
--secret-add secret Add or update a secret on a service
--secret-rm list Remove a secret (default [])
--stop-grace-period duration Time to wait before force killing a container (ns|us|ms|s|m|h)
--stop-signal string Signal to stop the container
-t, --tty Allocate a pseudo-TTY
--update-delay duration Delay between updates (ns|us|ms|s|m|h) (default 0s)
--update-failure-action string Action on update failure ("pause"|"continue") (default "pause")
--update-max-failure-ratio float Failure rate to tolerate during an update
--update-monitor duration Duration after each task update to monitor for failure (ns|us|ms|s|m|h) (default 0s)
--update-parallelism uint Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
-u, --user string Username or UID (format: <name|uid>[:<group|gid>])
--with-registry-auth Send registry authentication details to swarm agents
-w, --workdir string Working directory inside the container
--args string Service command args
--constraint-add list Add or update a placement constraint (default [])
--constraint-rm list Remove a constraint (default [])
--container-label-add list Add or update a container label (default [])
--container-label-rm list Remove a container label by its key (default [])
--dns-add list Add or update a custom DNS server (default [])
--dns-option-add list Add or update a DNS option (default [])
--dns-option-rm list Remove a DNS option (default [])
--dns-rm list Remove a custom DNS server (default [])
--dns-search-add list Add or update a custom DNS search domain (default [])
--dns-search-rm list Remove a DNS search domain (default [])
--endpoint-mode string Endpoint mode ("vip"|"dnsrr") (default "vip")
--env-add list Add or update an environment variable (default [])
--env-rm list Remove an environment variable (default [])
--force Force update even if no changes require it
--group-add list Add an additional supplementary user group to the container (default [])
--group-rm list Remove a previously added supplementary user group from the container (default [])
--health-cmd string Command to run to check health
--health-interval duration Time between running the check (ns|us|ms|s|m|h)
--health-retries int Consecutive failures needed to report unhealthy
--health-timeout duration Maximum time to allow one check to run (ns|us|ms|s|m|h)
--help Print usage
--host-add list Add or update a custom host-to-IP mapping (host:ip) (default [])
--host-rm list Remove a custom host-to-IP mapping (host:ip) (default [])
--hostname string Container hostname
--image string Service image tag
--label-add list Add or update a service label (default [])
--label-rm list Remove a label by its key (default [])
--limit-cpu decimal Limit CPUs (default 0.000)
--limit-memory bytes Limit Memory
--log-driver string Logging driver for service
--log-opt list Logging driver options (default [])
--mount-add mount Add or update a mount on a service
--mount-rm list Remove a mount by its target path (default [])
--no-healthcheck Disable any container-specified HEALTHCHECK
--placement-pref-add pref Add a placement preference
--placement-pref-rm pref Remove a placement preference
--publish-add port Add or update a published port
--publish-rm port Remove a published port by its target port
--read-only Mount the container's root filesystem as read only
--replicas uint Number of tasks
--reserve-cpu decimal Reserve CPUs (default 0.000)
--reserve-memory bytes Reserve Memory
--restart-condition string Restart when condition is met ("none"|"on-failure"|"any")
--restart-delay duration Delay between restart attempts (ns|us|ms|s|m|h)
--restart-max-attempts uint Maximum number of restarts before giving up
--restart-window duration Window used to evaluate the restart policy (ns|us|ms|s|m|h)
--rollback Rollback to previous specification
--rollback-delay duration Delay between task rollbacks (ns|us|ms|s|m|h) (default 0s)
--rollback-failure-action string Action on rollback failure ("pause"|"continue") (default "pause")
--rollback-max-failure-ratio float Failure rate to tolerate during a rollback
--rollback-monitor duration Duration after each task rollback to monitor for failure
(ns|us|ms|s|m|h) (default 0s)
--rollback-parallelism uint Maximum number of tasks rolled back simultaneously (0 to roll
back all at once) (default 1)
--secret-add secret Add or update a secret on a service
--secret-rm list Remove a secret (default [])
--stop-grace-period duration Time to wait before force killing a container (ns|us|ms|s|m|h)
--stop-signal string Signal to stop the container
-t, --tty Allocate a pseudo-TTY
--update-delay duration Delay between updates (ns|us|ms|s|m|h) (default 0s)
--update-failure-action string Action on update failure ("pause"|"continue"|"rollback") (default "pause")
--update-max-failure-ratio float Failure rate to tolerate during an update
--update-monitor duration Duration after each task update to monitor for failure (ns|us|ms|s|m|h) (default 0s)
--update-parallelism uint Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
-u, --user string Username or UID (format: <name|uid>[:<group|gid>])
--with-registry-auth Send registry authentication details to swarm agents
-w, --workdir string Working directory inside the container
```
## Description
@ -202,6 +209,26 @@ web
```
Services can also be set up to roll back to the previous version automatically
when an update fails. To set up a service for automatic rollback, use
`--update-failure-action=rollback`. A rollback will be triggered if the fraction
of the tasks which failed to update successfully exceeds the value given with
`--update-max-failure-ratio`.
The rate, parallelism, and other parameters of a rollback operation are
determined by the values passed with the following flags:
- `--rollback-delay`
- `--rollback-failure-action`
- `--rollback-max-failure-ratio`
- `--rollback-monitor`
- `--rollback-parallelism`
For example, a service set up with `--update-parallelism 1 --rollback-parallelism 3`
will update one task at a time during a normal update, but during a rollback, 3
tasks at a time will get rolled back. These rollback parameters are respected both
during automatic rollbacks and for rollbacks initiated manually using `--rollback`.
### Add or remove secrets
Use the `--secret-add` or `--secret-rm` options add or remove a service's

View file

@ -138,6 +138,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServicesUpdate(c *check.C) {
// create service
instances := 5
parallelism := 2
rollbackParallelism := 3
id := daemons[0].CreateService(c, serviceForUpdate, setInstances(instances))
// wait for tasks ready
@ -161,19 +162,15 @@ func (s *DockerSwarmSuite) TestAPISwarmServicesUpdate(c *check.C) {
map[string]int{image2: instances})
// Roll back to the previous version. This uses the CLI because
// rollback is a client-side operation.
// rollback used to be a client-side operation.
out, err := daemons[0].Cmd("service", "update", "--rollback", id)
c.Assert(err, checker.IsNil, check.Commentf(out))
// first batch
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].CheckRunningTaskImages, checker.DeepEquals,
map[string]int{image2: instances - parallelism, image1: parallelism})
map[string]int{image2: instances - rollbackParallelism, image1: rollbackParallelism})
// 2nd batch
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].CheckRunningTaskImages, checker.DeepEquals,
map[string]int{image2: instances - 2*parallelism, image1: 2 * parallelism})
// 3nd batch
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].CheckRunningTaskImages, checker.DeepEquals,
map[string]int{image1: instances})
}
@ -210,7 +207,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServicesFailedUpdate(c *check.C) {
c.Assert(v, checker.Equals, instances-2)
// Roll back to the previous version. This uses the CLI because
// rollback is a client-side operation.
// rollback used to be a client-side operation.
out, err := daemons[0].Cmd("service", "update", "--rollback", id)
c.Assert(err, checker.IsNil, check.Commentf(out))

View file

@ -556,6 +556,11 @@ func serviceForUpdate(s *swarm.Service) {
Delay: 4 * time.Second,
FailureAction: swarm.UpdateFailureActionContinue,
},
RollbackConfig: &swarm.UpdateConfig{
Parallelism: 3,
Delay: 4 * time.Second,
FailureAction: swarm.UpdateFailureActionContinue,
},
}
s.Spec.Name = "updatetest"
}