Update SwarmKit to ed384f3b3957f65e3111bd020f9815f3d4296fa2
This fix updates SwarmKit to ed384f3b3957f65e3111bd020f9815f3d4296fa2. Notable changes since last update (3ca4775ba4a5519e2225c3337c7db8901ec39d26): 1. Fix duplicated ports allocation with restarted swarm. (Docker issue #29247) 2. Topology-aware scheduling (Docker PR #30725) Docker issue #29247 was labeled 1.13.1, though it is advised that related SwarmKit changes only to be merged to master (based on the feedback https://github.com/docker/swarmkit/pull/1802#issuecomment-274143500) This fix fixes #29247 (master only). This fix is related to #30725. Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
This commit is contained in:
parent
d4be5a2f67
commit
6fb3a9d33c
11 changed files with 1047 additions and 351 deletions
|
@ -102,7 +102,7 @@ github.com/docker/containerd 78fb8f45890a601e0fd9051cf9f9f74923e950fd
|
||||||
github.com/tonistiigi/fifo 1405643975692217d6720f8b54aeee1bf2cd5cf4
|
github.com/tonistiigi/fifo 1405643975692217d6720f8b54aeee1bf2cd5cf4
|
||||||
|
|
||||||
# cluster
|
# cluster
|
||||||
github.com/docker/swarmkit 3ca4775ba4a5519e2225c3337c7db8901ec39d26
|
github.com/docker/swarmkit ed384f3b3957f65e3111bd020f9815f3d4296fa2
|
||||||
github.com/golang/mock bd3c8e81be01eef76d4b503f5e687d2d1354d2d9
|
github.com/golang/mock bd3c8e81be01eef76d4b503f5e687d2d1354d2d9
|
||||||
github.com/gogo/protobuf 8d70fb3182befc465c4a1eac8ad4d38ff49778e2
|
github.com/gogo/protobuf 8d70fb3182befc465c4a1eac8ad4d38ff49778e2
|
||||||
github.com/cloudflare/cfssl 7fb22c8cba7ecaf98e4082d22d65800cf45e042a
|
github.com/cloudflare/cfssl 7fb22c8cba7ecaf98e4082d22d65800cf45e042a
|
||||||
|
|
1032
vendor/github.com/docker/swarmkit/api/types.pb.go
generated
vendored
1032
vendor/github.com/docker/swarmkit/api/types.pb.go
generated
vendored
File diff suppressed because it is too large
Load diff
21
vendor/github.com/docker/swarmkit/api/types.proto
generated
vendored
21
vendor/github.com/docker/swarmkit/api/types.proto
generated
vendored
|
@ -709,10 +709,29 @@ message EncryptionConfig {
|
||||||
bool auto_lock_managers = 1;
|
bool auto_lock_managers = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message SpreadOver {
|
||||||
|
string spread_descriptor = 1; // label descriptor, such as engine.labels.az
|
||||||
|
// TODO: support node information beyond engine and node labels
|
||||||
|
|
||||||
|
// TODO: in the future, add a map that provides weights for weighted
|
||||||
|
// spreading.
|
||||||
|
}
|
||||||
|
|
||||||
|
message PlacementPreference {
|
||||||
|
oneof Preference {
|
||||||
|
SpreadOver spread = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Placement specifies task distribution constraints.
|
// Placement specifies task distribution constraints.
|
||||||
message Placement {
|
message Placement {
|
||||||
// constraints specifies a set of requirements a node should meet for a task.
|
// Constraints specifies a set of requirements a node should meet for a task.
|
||||||
repeated string constraints = 1;
|
repeated string constraints = 1;
|
||||||
|
|
||||||
|
// Preferences provide a way to make the scheduler aware of factors
|
||||||
|
// such as topology. They are provided in order from highest to lowest
|
||||||
|
// precedence.
|
||||||
|
repeated PlacementPreference preferences = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
// JoinToken contains the join tokens for workers and managers.
|
// JoinToken contains the join tokens for workers and managers.
|
||||||
|
|
2
vendor/github.com/docker/swarmkit/manager/allocator/network.go
generated
vendored
2
vendor/github.com/docker/swarmkit/manager/allocator/network.go
generated
vendored
|
@ -231,7 +231,7 @@ func (a *Allocator) doNetworkInit(ctx context.Context) (err error) {
|
||||||
|
|
||||||
var allocatedServices []*api.Service
|
var allocatedServices []*api.Service
|
||||||
for _, s := range services {
|
for _, s := range services {
|
||||||
if nc.nwkAllocator.IsServiceAllocated(s) {
|
if nc.nwkAllocator.IsServiceAllocated(s, networkallocator.OnInit) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
21
vendor/github.com/docker/swarmkit/manager/allocator/networkallocator/networkallocator.go
generated
vendored
21
vendor/github.com/docker/swarmkit/manager/allocator/networkallocator/networkallocator.go
generated
vendored
|
@ -289,8 +289,25 @@ func (na *NetworkAllocator) PortsAllocatedInHostPublishMode(s *api.Service) bool
|
||||||
return na.portAllocator.portsAllocatedInHostPublishMode(s)
|
return na.portAllocator.portsAllocatedInHostPublishMode(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ServiceAllocationOpts is struct used for functional options in IsServiceAllocated
|
||||||
|
type ServiceAllocationOpts struct {
|
||||||
|
OnInit bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// OnInit is called for allocator initialization stage
|
||||||
|
func OnInit(options *ServiceAllocationOpts) {
|
||||||
|
options.OnInit = true
|
||||||
|
}
|
||||||
|
|
||||||
// IsServiceAllocated returns if the passed service has its network resources allocated or not.
|
// IsServiceAllocated returns if the passed service has its network resources allocated or not.
|
||||||
func (na *NetworkAllocator) IsServiceAllocated(s *api.Service) bool {
|
// init bool indicates if the func is called during allocator initialization stage.
|
||||||
|
func (na *NetworkAllocator) IsServiceAllocated(s *api.Service, flags ...func(*ServiceAllocationOpts)) bool {
|
||||||
|
var options ServiceAllocationOpts
|
||||||
|
|
||||||
|
for _, flag := range flags {
|
||||||
|
flag(&options)
|
||||||
|
}
|
||||||
|
|
||||||
// If endpoint mode is VIP and allocator does not have the
|
// If endpoint mode is VIP and allocator does not have the
|
||||||
// service in VIP allocated set then it is not allocated.
|
// service in VIP allocated set then it is not allocated.
|
||||||
if (len(s.Spec.Task.Networks) != 0 || len(s.Spec.Networks) != 0) &&
|
if (len(s.Spec.Task.Networks) != 0 || len(s.Spec.Networks) != 0) &&
|
||||||
|
@ -313,7 +330,7 @@ func (na *NetworkAllocator) IsServiceAllocated(s *api.Service) bool {
|
||||||
|
|
||||||
if (s.Spec.Endpoint != nil && len(s.Spec.Endpoint.Ports) != 0) ||
|
if (s.Spec.Endpoint != nil && len(s.Spec.Endpoint.Ports) != 0) ||
|
||||||
(s.Endpoint != nil && len(s.Endpoint.Ports) != 0) {
|
(s.Endpoint != nil && len(s.Endpoint.Ports) != 0) {
|
||||||
return na.portAllocator.isPortsAllocated(s)
|
return na.portAllocator.isPortsAllocatedOnInit(s, options.OnInit)
|
||||||
}
|
}
|
||||||
|
|
||||||
return true
|
return true
|
||||||
|
|
11
vendor/github.com/docker/swarmkit/manager/allocator/networkallocator/portallocator.go
generated
vendored
11
vendor/github.com/docker/swarmkit/manager/allocator/networkallocator/portallocator.go
generated
vendored
|
@ -297,6 +297,10 @@ func (pa *portAllocator) portsAllocatedInHostPublishMode(s *api.Service) bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pa *portAllocator) isPortsAllocated(s *api.Service) bool {
|
func (pa *portAllocator) isPortsAllocated(s *api.Service) bool {
|
||||||
|
return pa.isPortsAllocatedOnInit(s, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (pa *portAllocator) isPortsAllocatedOnInit(s *api.Service, onInit bool) bool {
|
||||||
// If service has no user-defined endpoint and allocated endpoint,
|
// If service has no user-defined endpoint and allocated endpoint,
|
||||||
// we assume it is allocated and return true.
|
// we assume it is allocated and return true.
|
||||||
if s.Endpoint == nil && s.Spec.Endpoint == nil {
|
if s.Endpoint == nil && s.Spec.Endpoint == nil {
|
||||||
|
@ -345,6 +349,13 @@ func (pa *portAllocator) isPortsAllocated(s *api.Service) bool {
|
||||||
if portConfig.PublishedPort == 0 && portStates.delState(portConfig) == nil {
|
if portConfig.PublishedPort == 0 && portStates.delState(portConfig) == nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If SwarmPort was not defined by user and the func
|
||||||
|
// is called during allocator initialization state then
|
||||||
|
// we are not allocated.
|
||||||
|
if portConfig.PublishedPort == 0 && onInit {
|
||||||
|
return false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true
|
return true
|
||||||
|
|
14
vendor/github.com/docker/swarmkit/manager/constraint/constraint.go
generated
vendored
14
vendor/github.com/docker/swarmkit/manager/constraint/constraint.go
generated
vendored
|
@ -13,8 +13,10 @@ const (
|
||||||
eq = iota
|
eq = iota
|
||||||
noteq
|
noteq
|
||||||
|
|
||||||
nodeLabelPrefix = "node.labels."
|
// NodeLabelPrefix is the constraint key prefix for node labels.
|
||||||
engineLabelPrefix = "engine.labels."
|
NodeLabelPrefix = "node.labels."
|
||||||
|
// EngineLabelPrefix is the constraint key prefix for engine labels.
|
||||||
|
EngineLabelPrefix = "engine.labels."
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -168,14 +170,14 @@ func NodeMatches(constraints []Constraint, n *api.Node) bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
// node labels constraint in form like 'node.labels.key==value'
|
// node labels constraint in form like 'node.labels.key==value'
|
||||||
case len(constraint.key) > len(nodeLabelPrefix) && strings.EqualFold(constraint.key[:len(nodeLabelPrefix)], nodeLabelPrefix):
|
case len(constraint.key) > len(NodeLabelPrefix) && strings.EqualFold(constraint.key[:len(NodeLabelPrefix)], NodeLabelPrefix):
|
||||||
if n.Spec.Annotations.Labels == nil {
|
if n.Spec.Annotations.Labels == nil {
|
||||||
if !constraint.Match("") {
|
if !constraint.Match("") {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
label := constraint.key[len(nodeLabelPrefix):]
|
label := constraint.key[len(NodeLabelPrefix):]
|
||||||
// label itself is case sensitive
|
// label itself is case sensitive
|
||||||
val := n.Spec.Annotations.Labels[label]
|
val := n.Spec.Annotations.Labels[label]
|
||||||
if !constraint.Match(val) {
|
if !constraint.Match(val) {
|
||||||
|
@ -183,14 +185,14 @@ func NodeMatches(constraints []Constraint, n *api.Node) bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
// engine labels constraint in form like 'engine.labels.key!=value'
|
// engine labels constraint in form like 'engine.labels.key!=value'
|
||||||
case len(constraint.key) > len(engineLabelPrefix) && strings.EqualFold(constraint.key[:len(engineLabelPrefix)], engineLabelPrefix):
|
case len(constraint.key) > len(EngineLabelPrefix) && strings.EqualFold(constraint.key[:len(EngineLabelPrefix)], EngineLabelPrefix):
|
||||||
if n.Description == nil || n.Description.Engine == nil || n.Description.Engine.Labels == nil {
|
if n.Description == nil || n.Description.Engine == nil || n.Description.Engine.Labels == nil {
|
||||||
if !constraint.Match("") {
|
if !constraint.Match("") {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
label := constraint.key[len(engineLabelPrefix):]
|
label := constraint.key[len(EngineLabelPrefix):]
|
||||||
val := n.Description.Engine.Labels[label]
|
val := n.Description.Engine.Labels[label]
|
||||||
if !constraint.Match(val) {
|
if !constraint.Match(val) {
|
||||||
return false
|
return false
|
||||||
|
|
55
vendor/github.com/docker/swarmkit/manager/scheduler/decision_tree.go
generated
vendored
Normal file
55
vendor/github.com/docker/swarmkit/manager/scheduler/decision_tree.go
generated
vendored
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
package scheduler
|
||||||
|
|
||||||
|
import (
|
||||||
|
"container/heap"
|
||||||
|
)
|
||||||
|
|
||||||
|
type decisionTree struct {
|
||||||
|
// Count of tasks for the service scheduled to this subtree
|
||||||
|
tasks int
|
||||||
|
|
||||||
|
// Non-leaf point to the next level of the tree. The key is the
|
||||||
|
// value that the subtree covers.
|
||||||
|
next map[string]*decisionTree
|
||||||
|
|
||||||
|
// Leaf nodes contain a list of nodes
|
||||||
|
nodeHeap nodeMaxHeap
|
||||||
|
}
|
||||||
|
|
||||||
|
// orderedNodes returns the nodes in this decision tree entry, sorted best
|
||||||
|
// (lowest) first according to the sorting function. Must be called on a leaf
|
||||||
|
// of the decision tree.
|
||||||
|
//
|
||||||
|
// The caller may modify the nodes in the returned slice. This has the effect
|
||||||
|
// of changing the nodes in the decision tree entry. The next node to
|
||||||
|
// findBestNodes on this decisionTree entry will take into account the changes
|
||||||
|
// that were made to the nodes.
|
||||||
|
func (dt *decisionTree) orderedNodes(meetsConstraints func(*NodeInfo) bool, nodeLess func(*NodeInfo, *NodeInfo) bool) []NodeInfo {
|
||||||
|
if dt.nodeHeap.length != len(dt.nodeHeap.nodes) {
|
||||||
|
// We already collapsed the heap into a sorted slice, so
|
||||||
|
// re-heapify. There may have been modifications to the nodes
|
||||||
|
// so we can't return dt.nodeHeap.nodes as-is. We also need to
|
||||||
|
// reevaluate constraints because of the possible modifications.
|
||||||
|
for i := 0; i < len(dt.nodeHeap.nodes); {
|
||||||
|
if meetsConstraints(&dt.nodeHeap.nodes[i]) {
|
||||||
|
i++
|
||||||
|
} else {
|
||||||
|
last := len(dt.nodeHeap.nodes) - 1
|
||||||
|
dt.nodeHeap.nodes[i] = dt.nodeHeap.nodes[last]
|
||||||
|
dt.nodeHeap.nodes = dt.nodeHeap.nodes[:last]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dt.nodeHeap.length = len(dt.nodeHeap.nodes)
|
||||||
|
heap.Init(&dt.nodeHeap)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Popping every element orders the nodes from best to worst. The
|
||||||
|
// first pop gets the worst node (since this a max-heap), and puts it
|
||||||
|
// at position n-1. Then the next pop puts the next-worst at n-2, and
|
||||||
|
// so on.
|
||||||
|
for dt.nodeHeap.Len() > 0 {
|
||||||
|
heap.Pop(&dt.nodeHeap)
|
||||||
|
}
|
||||||
|
|
||||||
|
return dt.nodeHeap.nodes
|
||||||
|
}
|
31
vendor/github.com/docker/swarmkit/manager/scheduler/nodeheap.go
generated
vendored
Normal file
31
vendor/github.com/docker/swarmkit/manager/scheduler/nodeheap.go
generated
vendored
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
package scheduler
|
||||||
|
|
||||||
|
type nodeMaxHeap struct {
|
||||||
|
nodes []NodeInfo
|
||||||
|
lessFunc func(*NodeInfo, *NodeInfo) bool
|
||||||
|
length int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h nodeMaxHeap) Len() int {
|
||||||
|
return h.length
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h nodeMaxHeap) Swap(i, j int) {
|
||||||
|
h.nodes[i], h.nodes[j] = h.nodes[j], h.nodes[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h nodeMaxHeap) Less(i, j int) bool {
|
||||||
|
// reversed to make a max-heap
|
||||||
|
return h.lessFunc(&h.nodes[j], &h.nodes[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *nodeMaxHeap) Push(x interface{}) {
|
||||||
|
h.nodes = append(h.nodes, x.(NodeInfo))
|
||||||
|
h.length++
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *nodeMaxHeap) Pop() interface{} {
|
||||||
|
h.length--
|
||||||
|
// return value is never used
|
||||||
|
return nil
|
||||||
|
}
|
121
vendor/github.com/docker/swarmkit/manager/scheduler/nodeset.go
generated
vendored
121
vendor/github.com/docker/swarmkit/manager/scheduler/nodeset.go
generated
vendored
|
@ -3,9 +3,11 @@ package scheduler
|
||||||
import (
|
import (
|
||||||
"container/heap"
|
"container/heap"
|
||||||
"errors"
|
"errors"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/docker/swarmkit/api"
|
"github.com/docker/swarmkit/api"
|
||||||
|
"github.com/docker/swarmkit/manager/constraint"
|
||||||
)
|
)
|
||||||
|
|
||||||
var errNodeNotFound = errors.New("node not found in scheduler dataset")
|
var errNodeNotFound = errors.New("node not found in scheduler dataset")
|
||||||
|
@ -56,73 +58,74 @@ func (ns *nodeSet) remove(nodeID string) {
|
||||||
delete(ns.nodes, nodeID)
|
delete(ns.nodes, nodeID)
|
||||||
}
|
}
|
||||||
|
|
||||||
type nodeMaxHeap struct {
|
func (ns *nodeSet) tree(serviceID string, preferences []*api.PlacementPreference, maxAssignments int, meetsConstraints func(*NodeInfo) bool, nodeLess func(*NodeInfo, *NodeInfo) bool) decisionTree {
|
||||||
nodes []NodeInfo
|
var root decisionTree
|
||||||
lessFunc func(*NodeInfo, *NodeInfo) bool
|
|
||||||
length int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (h nodeMaxHeap) Len() int {
|
if maxAssignments == 0 {
|
||||||
return h.length
|
return root
|
||||||
}
|
|
||||||
|
|
||||||
func (h nodeMaxHeap) Swap(i, j int) {
|
|
||||||
h.nodes[i], h.nodes[j] = h.nodes[j], h.nodes[i]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (h nodeMaxHeap) Less(i, j int) bool {
|
|
||||||
// reversed to make a max-heap
|
|
||||||
return h.lessFunc(&h.nodes[j], &h.nodes[i])
|
|
||||||
}
|
|
||||||
|
|
||||||
func (h *nodeMaxHeap) Push(x interface{}) {
|
|
||||||
h.nodes = append(h.nodes, x.(NodeInfo))
|
|
||||||
h.length++
|
|
||||||
}
|
|
||||||
|
|
||||||
func (h *nodeMaxHeap) Pop() interface{} {
|
|
||||||
h.length--
|
|
||||||
// return value is never used
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// findBestNodes returns n nodes (or < n if fewer nodes are available) that
|
|
||||||
// rank best (lowest) according to the sorting function.
|
|
||||||
func (ns *nodeSet) findBestNodes(n int, meetsConstraints func(*NodeInfo) bool, nodeLess func(*NodeInfo, *NodeInfo) bool) []NodeInfo {
|
|
||||||
if n == 0 {
|
|
||||||
return []NodeInfo{}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
nodeHeap := nodeMaxHeap{lessFunc: nodeLess}
|
|
||||||
|
|
||||||
// TODO(aaronl): Is is possible to avoid checking constraints on every
|
|
||||||
// node? Perhaps we should try to schedule with n*2 nodes that weren't
|
|
||||||
// prescreened, and repeat the selection if there weren't enough nodes
|
|
||||||
// meeting the constraints.
|
|
||||||
for _, node := range ns.nodes {
|
for _, node := range ns.nodes {
|
||||||
// If there are fewer then n nodes in the heap, we add this
|
tree := &root
|
||||||
// node if it meets the constraints. Otherwise, the heap has
|
for _, pref := range preferences {
|
||||||
// n nodes, and if this node is better than the worst node in
|
// Only spread is supported so far
|
||||||
// the heap, we replace the worst node and then fix the heap.
|
spread := pref.GetSpread()
|
||||||
if nodeHeap.Len() < n {
|
if spread == nil {
|
||||||
if meetsConstraints(&node) {
|
continue
|
||||||
heap.Push(&nodeHeap, node)
|
|
||||||
}
|
}
|
||||||
} else if nodeLess(&node, &nodeHeap.nodes[0]) {
|
|
||||||
|
descriptor := spread.SpreadDescriptor
|
||||||
|
var value string
|
||||||
|
switch {
|
||||||
|
case len(descriptor) > len(constraint.NodeLabelPrefix) && strings.EqualFold(descriptor[:len(constraint.NodeLabelPrefix)], constraint.NodeLabelPrefix):
|
||||||
|
if node.Spec.Annotations.Labels != nil {
|
||||||
|
value = node.Spec.Annotations.Labels[descriptor[len(constraint.NodeLabelPrefix):]]
|
||||||
|
}
|
||||||
|
case len(descriptor) > len(constraint.EngineLabelPrefix) && strings.EqualFold(descriptor[:len(constraint.EngineLabelPrefix)], constraint.EngineLabelPrefix):
|
||||||
|
if node.Description != nil && node.Description.Engine != nil && node.Description.Engine.Labels != nil {
|
||||||
|
value = node.Description.Engine.Labels[descriptor[len(constraint.EngineLabelPrefix):]]
|
||||||
|
}
|
||||||
|
// TODO(aaronl): Support other items from constraint
|
||||||
|
// syntax like node ID, hostname, os/arch, etc?
|
||||||
|
default:
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// If value is still uninitialized, the value used for
|
||||||
|
// the node at this level of the tree is "". This makes
|
||||||
|
// sure that the tree structure is not affected by
|
||||||
|
// which properties nodes have and don't have.
|
||||||
|
|
||||||
|
if node.DesiredRunningTasksCountByService != nil {
|
||||||
|
tree.tasks += node.DesiredRunningTasksCountByService[serviceID]
|
||||||
|
}
|
||||||
|
|
||||||
|
if tree.next == nil {
|
||||||
|
tree.next = make(map[string]*decisionTree)
|
||||||
|
}
|
||||||
|
next := tree.next[value]
|
||||||
|
if next == nil {
|
||||||
|
next = &decisionTree{}
|
||||||
|
tree.next[value] = next
|
||||||
|
}
|
||||||
|
tree = next
|
||||||
|
}
|
||||||
|
|
||||||
|
if tree.nodeHeap.lessFunc == nil {
|
||||||
|
tree.nodeHeap.lessFunc = nodeLess
|
||||||
|
}
|
||||||
|
|
||||||
|
if tree.nodeHeap.Len() < maxAssignments {
|
||||||
if meetsConstraints(&node) {
|
if meetsConstraints(&node) {
|
||||||
nodeHeap.nodes[0] = node
|
heap.Push(&tree.nodeHeap, node)
|
||||||
heap.Fix(&nodeHeap, 0)
|
}
|
||||||
|
} else if nodeLess(&node, &tree.nodeHeap.nodes[0]) {
|
||||||
|
if meetsConstraints(&node) {
|
||||||
|
tree.nodeHeap.nodes[0] = node
|
||||||
|
heap.Fix(&tree.nodeHeap, 0)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Popping every element orders the nodes from best to worst. The
|
return root
|
||||||
// first pop gets the worst node (since this a max-heap), and puts it
|
|
||||||
// at position n-1. Then the next pop puts the next-worst at n-2, and
|
|
||||||
// so on.
|
|
||||||
for nodeHeap.Len() > 0 {
|
|
||||||
heap.Pop(&nodeHeap)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nodeHeap.nodes
|
|
||||||
}
|
}
|
||||||
|
|
88
vendor/github.com/docker/swarmkit/manager/scheduler/scheduler.go
generated
vendored
88
vendor/github.com/docker/swarmkit/manager/scheduler/scheduler.go
generated
vendored
|
@ -531,21 +531,82 @@ func (s *Scheduler) scheduleTaskGroup(ctx context.Context, taskGroup map[string]
|
||||||
return a.DesiredRunningTasksCount < b.DesiredRunningTasksCount
|
return a.DesiredRunningTasksCount < b.DesiredRunningTasksCount
|
||||||
}
|
}
|
||||||
|
|
||||||
nodes := s.nodeSet.findBestNodes(len(taskGroup), s.pipeline.Process, nodeLess)
|
var prefs []*api.PlacementPreference
|
||||||
nodeCount := len(nodes)
|
if t.Spec.Placement != nil {
|
||||||
if nodeCount == 0 {
|
prefs = t.Spec.Placement.Preferences
|
||||||
s.noSuitableNode(ctx, taskGroup, schedulingDecisions)
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tree := s.nodeSet.tree(t.ServiceID, prefs, len(taskGroup), s.pipeline.Process, nodeLess)
|
||||||
|
|
||||||
|
s.scheduleNTasksOnSubtree(ctx, len(taskGroup), taskGroup, &tree, schedulingDecisions, nodeLess)
|
||||||
|
if len(taskGroup) != 0 {
|
||||||
|
s.noSuitableNode(ctx, taskGroup, schedulingDecisions)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Scheduler) scheduleNTasksOnSubtree(ctx context.Context, n int, taskGroup map[string]*api.Task, tree *decisionTree, schedulingDecisions map[string]schedulingDecision, nodeLess func(a *NodeInfo, b *NodeInfo) bool) int {
|
||||||
|
if tree.next == nil {
|
||||||
|
nodes := tree.orderedNodes(s.pipeline.Process, nodeLess)
|
||||||
|
if len(nodes) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
return s.scheduleNTasksOnNodes(ctx, n, taskGroup, nodes, schedulingDecisions, nodeLess)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Walk the tree and figure out how the tasks should be split at each
|
||||||
|
// level.
|
||||||
|
tasksScheduled := 0
|
||||||
|
tasksInUsableBranches := tree.tasks
|
||||||
|
var noRoom map[*decisionTree]struct{}
|
||||||
|
|
||||||
|
// Try to make branches even until either all branches are
|
||||||
|
// full, or all tasks have been scheduled.
|
||||||
|
for tasksScheduled != n && len(noRoom) != len(tree.next) {
|
||||||
|
desiredTasksPerBranch := (tasksInUsableBranches + n - tasksScheduled) / (len(tree.next) - len(noRoom))
|
||||||
|
remainder := (tasksInUsableBranches + n - tasksScheduled) % (len(tree.next) - len(noRoom))
|
||||||
|
|
||||||
|
for _, subtree := range tree.next {
|
||||||
|
if noRoom != nil {
|
||||||
|
if _, ok := noRoom[subtree]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
subtreeTasks := subtree.tasks
|
||||||
|
if subtreeTasks < desiredTasksPerBranch || (subtreeTasks == desiredTasksPerBranch && remainder > 0) {
|
||||||
|
tasksToAssign := desiredTasksPerBranch - subtreeTasks
|
||||||
|
if remainder > 0 {
|
||||||
|
tasksToAssign++
|
||||||
|
}
|
||||||
|
res := s.scheduleNTasksOnSubtree(ctx, tasksToAssign, taskGroup, subtree, schedulingDecisions, nodeLess)
|
||||||
|
if res < tasksToAssign {
|
||||||
|
if noRoom == nil {
|
||||||
|
noRoom = make(map[*decisionTree]struct{})
|
||||||
|
}
|
||||||
|
noRoom[subtree] = struct{}{}
|
||||||
|
tasksInUsableBranches -= subtreeTasks
|
||||||
|
} else if remainder > 0 {
|
||||||
|
remainder--
|
||||||
|
}
|
||||||
|
tasksScheduled += res
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tasksScheduled
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Scheduler) scheduleNTasksOnNodes(ctx context.Context, n int, taskGroup map[string]*api.Task, nodes []NodeInfo, schedulingDecisions map[string]schedulingDecision, nodeLess func(a *NodeInfo, b *NodeInfo) bool) int {
|
||||||
|
tasksScheduled := 0
|
||||||
failedConstraints := make(map[int]bool) // key is index in nodes slice
|
failedConstraints := make(map[int]bool) // key is index in nodes slice
|
||||||
nodeIter := 0
|
nodeIter := 0
|
||||||
|
nodeCount := len(nodes)
|
||||||
for taskID, t := range taskGroup {
|
for taskID, t := range taskGroup {
|
||||||
n := &nodes[nodeIter%nodeCount]
|
node := &nodes[nodeIter%nodeCount]
|
||||||
|
|
||||||
log.G(ctx).WithField("task.id", t.ID).Debugf("assigning to node %s", n.ID)
|
log.G(ctx).WithField("task.id", t.ID).Debugf("assigning to node %s", node.ID)
|
||||||
newT := *t
|
newT := *t
|
||||||
newT.NodeID = n.ID
|
newT.NodeID = node.ID
|
||||||
newT.Status = api.TaskStatus{
|
newT.Status = api.TaskStatus{
|
||||||
State: api.TaskStateAssigned,
|
State: api.TaskStateAssigned,
|
||||||
Timestamp: ptypes.MustTimestampProto(time.Now()),
|
Timestamp: ptypes.MustTimestampProto(time.Now()),
|
||||||
|
@ -553,7 +614,7 @@ func (s *Scheduler) scheduleTaskGroup(ctx context.Context, taskGroup map[string]
|
||||||
}
|
}
|
||||||
s.allTasks[t.ID] = &newT
|
s.allTasks[t.ID] = &newT
|
||||||
|
|
||||||
nodeInfo, err := s.nodeSet.nodeInfo(n.ID)
|
nodeInfo, err := s.nodeSet.nodeInfo(node.ID)
|
||||||
if err == nil && nodeInfo.addTask(&newT) {
|
if err == nil && nodeInfo.addTask(&newT) {
|
||||||
s.nodeSet.updateNode(nodeInfo)
|
s.nodeSet.updateNode(nodeInfo)
|
||||||
nodes[nodeIter%nodeCount] = nodeInfo
|
nodes[nodeIter%nodeCount] = nodeInfo
|
||||||
|
@ -561,6 +622,10 @@ func (s *Scheduler) scheduleTaskGroup(ctx context.Context, taskGroup map[string]
|
||||||
|
|
||||||
schedulingDecisions[taskID] = schedulingDecision{old: t, new: &newT}
|
schedulingDecisions[taskID] = schedulingDecision{old: t, new: &newT}
|
||||||
delete(taskGroup, taskID)
|
delete(taskGroup, taskID)
|
||||||
|
tasksScheduled++
|
||||||
|
if tasksScheduled == n {
|
||||||
|
return tasksScheduled
|
||||||
|
}
|
||||||
|
|
||||||
if nodeIter+1 < nodeCount {
|
if nodeIter+1 < nodeCount {
|
||||||
// First pass fills the nodes until they have the same
|
// First pass fills the nodes until they have the same
|
||||||
|
@ -581,11 +646,12 @@ func (s *Scheduler) scheduleTaskGroup(ctx context.Context, taskGroup map[string]
|
||||||
nodeIter++
|
nodeIter++
|
||||||
if nodeIter-origNodeIter == nodeCount {
|
if nodeIter-origNodeIter == nodeCount {
|
||||||
// None of the nodes meet the constraints anymore.
|
// None of the nodes meet the constraints anymore.
|
||||||
s.noSuitableNode(ctx, taskGroup, schedulingDecisions)
|
return tasksScheduled
|
||||||
return
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return tasksScheduled
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Scheduler) noSuitableNode(ctx context.Context, taskGroup map[string]*api.Task, schedulingDecisions map[string]schedulingDecision) {
|
func (s *Scheduler) noSuitableNode(ctx context.Context, taskGroup map[string]*api.Task, schedulingDecisions map[string]schedulingDecision) {
|
||||||
|
|
Loading…
Reference in a new issue