Topology-aware scheduling

This adds support for placement preferences in Swarm services.

- Convert PlacementPreferences between GRPC API and HTTP API
- Add --placement-pref, --placement-pref-add and --placement-pref-rm to CLI
- Add support for placement preferences in service inspect --pretty
- Add integration test

Signed-off-by: Aaron Lehmann <aaron.lehmann@docker.com>
This commit is contained in:
Aaron Lehmann 2017-01-19 15:27:37 -08:00
parent 52ce5c2548
commit 17288c611a
12 changed files with 319 additions and 15 deletions

View file

@ -2047,6 +2047,18 @@ definitions:
type: "array"
items:
type: "string"
Preferences:
description: "Preferences provide a way to make the scheduler aware of factors such as topology. They are provided in order from highest to lowest precedence."
type: "array"
items:
type: "object"
properties:
Spread:
type: "object"
properties:
SpreadDescriptor:
description: "label descriptor, such as engine.labels.az"
type: "string"
ForceUpdate:
description: "A counter that triggers an update even if no relevant parameters have been changed."
type: "integer"

View file

@ -81,7 +81,21 @@ type ResourceRequirements struct {
// Placement represents orchestration parameters.
type Placement struct {
Constraints []string `json:",omitempty"`
Constraints []string `json:",omitempty"`
Preferences []PlacementPreference `json:",omitempty"`
}
// PlacementPreference provides a way to make the scheduler aware of factors
// such as topology.
type PlacementPreference struct {
Spread *SpreadOver
}
// SpreadOver is a scheduling preference that instructs the scheduler to spread
// tasks evenly over groups of nodes identified by labels.
type SpreadOver struct {
// label descriptor, such as engine.labels.az
SpreadDescriptor string
}
// RestartPolicy represents the restart policy.

View file

@ -39,9 +39,12 @@ UpdateStatus:
Message: {{ .UpdateStatusMessage }}
{{- end }}
Placement:
{{- if .TaskPlacementConstraints -}}
{{- if .TaskPlacementConstraints }}
Constraints: {{ .TaskPlacementConstraints }}
{{- end }}
{{- if .TaskPlacementPreferences }}
Preferences: {{ .TaskPlacementPreferences }}
{{- end }}
{{- if .HasUpdateConfig }}
UpdateConfig:
Parallelism: {{ .UpdateParallelism }}
@ -211,6 +214,19 @@ func (ctx *serviceInspectContext) TaskPlacementConstraints() []string {
return nil
}
func (ctx *serviceInspectContext) TaskPlacementPreferences() []string {
if ctx.Service.Spec.TaskTemplate.Placement == nil {
return nil
}
var strings []string
for _, pref := range ctx.Service.Spec.TaskTemplate.Placement.Preferences {
if pref.Spread != nil {
strings = append(strings, "spread="+pref.Spread.SpreadDescriptor)
}
}
return strings
}
func (ctx *serviceInspectContext) HasUpdateConfig() bool {
return ctx.Service.Spec.UpdateConfig != nil
}

View file

@ -37,6 +37,8 @@ func newCreateCommand(dockerCli *command.DockerCli) *cobra.Command {
flags.Var(&opts.envFile, flagEnvFile, "Read in a file of environment variables")
flags.Var(&opts.mounts, flagMount, "Attach a filesystem mount to the service")
flags.Var(&opts.constraints, flagConstraint, "Placement constraints")
flags.Var(&opts.placementPrefs, flagPlacementPref, "Add a placement preference")
flags.SetAnnotation(flagPlacementPref, "version", []string{"1.27"})
flags.Var(&opts.networks, flagNetwork, "Network attachments")
flags.Var(&opts.secrets, flagSecret, "Specify secrets to expose to the service")
flags.SetAnnotation(flagSecret, "version", []string{"1.25"})

View file

@ -1,6 +1,7 @@
package service
import (
"errors"
"fmt"
"strconv"
"strings"
@ -117,6 +118,45 @@ func (f *floatValue) Value() float32 {
return float32(*f)
}
// placementPrefOpts holds a list of placement preferences.
type placementPrefOpts struct {
prefs []swarm.PlacementPreference
strings []string
}
func (opts *placementPrefOpts) String() string {
if len(opts.strings) == 0 {
return ""
}
return fmt.Sprintf("%v", opts.strings)
}
// Set validates the input value and adds it to the internal slices.
// Note: in the future strategies other than "spread", may be supported,
// as well as additional comma-separated options.
func (opts *placementPrefOpts) Set(value string) error {
fields := strings.Split(value, "=")
if len(fields) != 2 {
return errors.New(`placement preference must be of the format "<strategy>=<arg>"`)
}
if fields[0] != "spread" {
return fmt.Errorf("unsupported placement preference %s (only spread is supported)", fields[0])
}
opts.prefs = append(opts.prefs, swarm.PlacementPreference{
Spread: &swarm.SpreadOver{
SpreadDescriptor: fields[1],
},
})
opts.strings = append(opts.strings, value)
return nil
}
// Type returns a string name for this Option type
func (opts *placementPrefOpts) Type() string {
return "pref"
}
type updateOptions struct {
parallelism uint64
delay time.Duration
@ -283,11 +323,12 @@ type serviceOptions struct {
replicas Uint64Opt
mode string
restartPolicy restartPolicyOptions
constraints opts.ListOpts
update updateOptions
networks opts.ListOpts
endpoint endpointOptions
restartPolicy restartPolicyOptions
constraints opts.ListOpts
placementPrefs placementPrefOpts
update updateOptions
networks opts.ListOpts
endpoint endpointOptions
registryAuth bool
@ -398,6 +439,7 @@ func (opts *serviceOptions) ToService() (swarm.ServiceSpec, error) {
RestartPolicy: opts.restartPolicy.ToRestartPolicy(),
Placement: &swarm.Placement{
Constraints: opts.constraints.GetAll(),
Preferences: opts.placementPrefs.prefs,
},
LogDriver: opts.logDriver.toLogDriver(),
},
@ -473,6 +515,9 @@ func addServiceFlags(cmd *cobra.Command, opts *serviceOptions) {
}
const (
flagPlacementPref = "placement-pref"
flagPlacementPrefAdd = "placement-pref-add"
flagPlacementPrefRemove = "placement-pref-rm"
flagConstraint = "constraint"
flagConstraintRemove = "constraint-rm"
flagConstraintAdd = "constraint-add"

View file

@ -69,6 +69,10 @@ func newUpdateCommand(dockerCli *command.DockerCli) *cobra.Command {
flags.SetAnnotation(flagSecretAdd, "version", []string{"1.25"})
flags.Var(&serviceOpts.mounts, flagMountAdd, "Add or update a mount on a service")
flags.Var(&serviceOpts.constraints, flagConstraintAdd, "Add or update a placement constraint")
flags.Var(&serviceOpts.placementPrefs, flagPlacementPrefAdd, "Add a placement preference")
flags.SetAnnotation(flagPlacementPrefAdd, "version", []string{"1.27"})
flags.Var(&placementPrefOpts{}, flagPlacementPrefRemove, "Remove a placement preference")
flags.SetAnnotation(flagPlacementPrefRemove, "version", []string{"1.27"})
flags.Var(&serviceOpts.endpoint.publishPorts, flagPublishAdd, "Add or update a published port")
flags.Var(&serviceOpts.groups, flagGroupAdd, "Add an additional supplementary user group to the container")
flags.SetAnnotation(flagGroupAdd, "version", []string{"1.25"})
@ -260,7 +264,14 @@ func updateService(flags *pflag.FlagSet, spec *swarm.ServiceSpec) error {
if task.Placement == nil {
task.Placement = &swarm.Placement{}
}
updatePlacement(flags, task.Placement)
updatePlacementConstraints(flags, task.Placement)
}
if anyChanged(flags, flagPlacementPrefAdd, flagPlacementPrefRemove) {
if task.Placement == nil {
task.Placement = &swarm.Placement{}
}
updatePlacementPreferences(flags, task.Placement)
}
if err := updateReplicas(flags, &spec.Mode); err != nil {
@ -372,7 +383,7 @@ func anyChanged(flags *pflag.FlagSet, fields ...string) bool {
return false
}
func updatePlacement(flags *pflag.FlagSet, placement *swarm.Placement) {
func updatePlacementConstraints(flags *pflag.FlagSet, placement *swarm.Placement) {
if flags.Changed(flagConstraintAdd) {
values := flags.Lookup(flagConstraintAdd).Value.(*opts.ListOpts).GetAll()
placement.Constraints = append(placement.Constraints, values...)
@ -391,6 +402,35 @@ func updatePlacement(flags *pflag.FlagSet, placement *swarm.Placement) {
placement.Constraints = newConstraints
}
func updatePlacementPreferences(flags *pflag.FlagSet, placement *swarm.Placement) {
var newPrefs []swarm.PlacementPreference
if flags.Changed(flagPlacementPrefRemove) {
for _, existing := range placement.Preferences {
removed := false
for _, removal := range flags.Lookup(flagPlacementPrefRemove).Value.(*placementPrefOpts).prefs {
if removal.Spread != nil && existing.Spread != nil && removal.Spread.SpreadDescriptor == existing.Spread.SpreadDescriptor {
removed = true
break
}
}
if !removed {
newPrefs = append(newPrefs, existing)
}
}
} else {
newPrefs = placement.Preferences
}
if flags.Changed(flagPlacementPrefAdd) {
for _, addition := range flags.Lookup(flagPlacementPrefAdd).Value.(*placementPrefOpts).prefs {
newPrefs = append(newPrefs, addition)
}
}
placement.Preferences = newPrefs
}
func updateContainerLabels(flags *pflag.FlagSet, field *map[string]string) {
if flags.Changed(flagContainerLabelAdd) {
if *field == nil {

View file

@ -51,7 +51,7 @@ func TestUpdateLabelsRemoveALabelThatDoesNotExist(t *testing.T) {
assert.Equal(t, len(labels), 1)
}
func TestUpdatePlacement(t *testing.T) {
func TestUpdatePlacementConstraints(t *testing.T) {
flags := newUpdateCommand(nil).Flags()
flags.Set("constraint-add", "node=toadd")
flags.Set("constraint-rm", "node!=toremove")
@ -60,12 +60,38 @@ func TestUpdatePlacement(t *testing.T) {
Constraints: []string{"node!=toremove", "container=tokeep"},
}
updatePlacement(flags, placement)
updatePlacementConstraints(flags, placement)
assert.Equal(t, len(placement.Constraints), 2)
assert.Equal(t, placement.Constraints[0], "container=tokeep")
assert.Equal(t, placement.Constraints[1], "node=toadd")
}
func TestUpdatePlacementPrefs(t *testing.T) {
flags := newUpdateCommand(nil).Flags()
flags.Set("placement-pref-add", "spread=node.labels.dc")
flags.Set("placement-pref-rm", "spread=node.labels.rack")
placement := &swarm.Placement{
Preferences: []swarm.PlacementPreference{
{
Spread: &swarm.SpreadOver{
SpreadDescriptor: "node.labels.rack",
},
},
{
Spread: &swarm.SpreadOver{
SpreadDescriptor: "node.labels.row",
},
},
},
}
updatePlacementPreferences(flags, placement)
assert.Equal(t, len(placement.Preferences), 2)
assert.Equal(t, placement.Preferences[0].Spread.SpreadDescriptor, "node.labels.row")
assert.Equal(t, placement.Preferences[1].Spread.SpreadDescriptor, "node.labels.dc")
}
func TestUpdateEnvironment(t *testing.T) {
flags := newUpdateCommand(nil).Flags()
flags.Set("env-add", "toadd=newenv")

View file

@ -162,8 +162,21 @@ func ServiceSpecToGRPC(s types.ServiceSpec) (swarmapi.ServiceSpec, error) {
spec.Task.Restart = restartPolicy
if s.TaskTemplate.Placement != nil {
var preferences []*swarmapi.PlacementPreference
for _, pref := range s.TaskTemplate.Placement.Preferences {
if pref.Spread != nil {
preferences = append(preferences, &swarmapi.PlacementPreference{
Preference: &swarmapi.PlacementPreference_Spread{
Spread: &swarmapi.SpreadOver{
SpreadDescriptor: pref.Spread.SpreadDescriptor,
},
},
})
}
}
spec.Task.Placement = &swarmapi.Placement{
Constraints: s.TaskTemplate.Placement.Constraints,
Preferences: preferences,
}
}
@ -351,10 +364,21 @@ func restartPolicyToGRPC(p *types.RestartPolicy) (*swarmapi.RestartPolicy, error
}
func placementFromGRPC(p *swarmapi.Placement) *types.Placement {
var r *types.Placement
if p != nil {
r = &types.Placement{}
r.Constraints = p.Constraints
if p == nil {
return nil
}
r := &types.Placement{
Constraints: p.Constraints,
}
for _, pref := range p.Preferences {
if spread := pref.GetSpread(); spread != nil {
r.Preferences = append(r.Preferences, types.PlacementPreference{
Spread: &types.SpreadOver{
SpreadDescriptor: spread.SpreadDescriptor,
},
})
}
}
return r

View file

@ -47,6 +47,7 @@ Options:
--name string Service name
--network list Network attachments (default [])
--no-healthcheck Disable any container-specified HEALTHCHECK
--placement-pref pref Add a placement preference
-p, --publish port Publish a port as a node port
--read-only Mount the container's root filesystem as read only
--replicas uint Number of tasks
@ -435,6 +436,77 @@ $ docker service create \
redis:3.0.6
```
### Specify service placement preferences (--placement-pref)
You can set up the service to divide tasks evenly over different categories of
nodes. One example of where this can be useful is to balance tasks over a set
of datacenters or availability zones. The example below illustrates this:
```bash
$ docker service create \
--replicas 9 \
--name redis_2 \
--placement-pref 'spread=node.labels.datacenter' \
redis:3.0.6
```
This uses `--placement-pref` with a `spread` strategy (currently the only
supported strategy) to spread tasks evenly over the values of the `datacenter`
node label. In this example, we assume that every node has a `datacenter` node
label attached to it. If there are three different values of this label among
nodes in the swarm, one third of the tasks will be placed on the nodes
associated with each value. This is true even if there are more nodes with one
value than another. For example, consider the following set of nodes:
- Three nodes with `node.labels.datacenter=east`
- Two nodes with `node.labels.datacenter=south`
- One node with `node.labels.datacenter=west`
Since we are spreading over the values of the `datacenter` label and the
service has 9 replicas, 3 replicas will end up in each datacenter. There are
three nodes associated with the value `east`, so each one will get one of the
three replicas reserved for this value. There are two nodes with the value
`south`, and the three replicas for this value will be divided between them,
with one receiving two replicas and another receiving just one. Finally, `west`
has a single node that will get all three replicas reserved for `west`.
If the nodes in one category (for example, those with
`node.labels.datacenter=south`) can't handle their fair share of tasks due to
constraints or resource limitations, the extra tasks will be assigned to other
nodes instead, if possible.
Both engine labels and node labels are supported by placement preferences. The
example above uses a node label, because the label is referenced with
`node.labels.datacenter`. To spread over the values of an engine label, use
`--placement-pref spread=engine.labels.<labelname>`.
It is possible to add multiple placement preferences to a service. This
establishes a hierarchy of preferences, so that tasks are first divided over
one category, and then further divided over additional categories. One example
of where this may be useful is dividing tasks fairly between datacenters, and
then splitting the tasks within each datacenter over a choice of racks. To add
multiple placement preferences, specify the `--placement-pref` flag multiple
times. The order is significant, and the placement preferences will be applied
in the order given when making scheduling decisions.
The following example sets up a service with multiple placement preferences.
Tasks are spread first over the various datacenters, and then over racks
(as indicated by the respective labels):
```bash
$ docker service create \
--replicas 9 \
--name redis_2 \
--placement-pref 'spread=node.labels.datacenter' \
--placement-pref 'spread=node.labels.rack' \
redis:3.0.6
```
When updating a service with `docker service update`, `--placement-pref-add`
appends a new placement preference after all existing placement preferences.
`--placement-pref-rm` removes an existing placement preference that matches the
argument.
### Attach a service to an existing network (--network)
You can use overlay networks to connect one or more services within the swarm.

View file

@ -56,6 +56,8 @@ Options:
--mount-add mount Add or update a mount on a service
--mount-rm list Remove a mount by its target path (default [])
--no-healthcheck Disable any container-specified HEALTHCHECK
--placement-pref-add pref Add a placement preference
--placement-pref-rm pref Remove a placement preference
--publish-add port Add or update a published port
--publish-rm port Remove a published port by its target port
--read-only Mount the container's root filesystem as read only

View file

@ -365,6 +365,48 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
}
}
func (s *DockerSwarmSuite) TestAPISwarmServicePlacementPrefs(c *check.C) {
const nodeCount = 3
var daemons [nodeCount]*daemon.Swarm
for i := 0; i < nodeCount; i++ {
daemons[i] = s.AddDaemon(c, true, i == 0)
}
// wait for nodes ready
waitAndAssert(c, 5*time.Second, daemons[0].CheckNodeReadyCount, checker.Equals, nodeCount)
nodes := daemons[0].ListNodes(c)
c.Assert(len(nodes), checker.Equals, nodeCount)
// add labels to nodes
daemons[0].UpdateNode(c, nodes[0].ID, func(n *swarm.Node) {
n.Spec.Annotations.Labels = map[string]string{
"rack": "a",
}
})
for i := 1; i < nodeCount; i++ {
daemons[0].UpdateNode(c, nodes[i].ID, func(n *swarm.Node) {
n.Spec.Annotations.Labels = map[string]string{
"rack": "b",
}
})
}
// create service
instances := 4
prefs := []swarm.PlacementPreference{{Spread: &swarm.SpreadOver{SpreadDescriptor: "node.labels.rack"}}}
id := daemons[0].CreateService(c, simpleTestService, setPlacementPrefs(prefs), setInstances(instances))
// wait for tasks ready
waitAndAssert(c, defaultReconciliationTimeout, daemons[0].CheckServiceRunningTasks(id), checker.Equals, instances)
tasks := daemons[0].GetServiceTasks(c, id)
// validate all tasks are running on nodes[0]
tasksOnNode := make(map[string]int)
for _, task := range tasks {
tasksOnNode[task.NodeID]++
}
c.Assert(tasksOnNode[nodes[0].ID], checker.Equals, 2)
c.Assert(tasksOnNode[nodes[1].ID], checker.Equals, 1)
c.Assert(tasksOnNode[nodes[2].ID], checker.Equals, 1)
}
func (s *DockerSwarmSuite) TestAPISwarmServicesStateReporting(c *check.C) {
testRequires(c, SameHostDaemon)
testRequires(c, DaemonIsLinux)

View file

@ -596,6 +596,15 @@ func setConstraints(constraints []string) daemon.ServiceConstructor {
}
}
func setPlacementPrefs(prefs []swarm.PlacementPreference) daemon.ServiceConstructor {
return func(s *swarm.Service) {
if s.Spec.TaskTemplate.Placement == nil {
s.Spec.TaskTemplate.Placement = &swarm.Placement{}
}
s.Spec.TaskTemplate.Placement.Preferences = prefs
}
}
func setGlobalMode(s *swarm.Service) {
s.Spec.Mode = swarm.ServiceMode{
Global: &swarm.GlobalService{},