moby/libnetwork/endpoint.go

1200 lines
32 KiB
Go
Raw Normal View History

add more //go:build directives to prevent downgrading to go1.16 language This is a follow-up to 2cf230951fe29e2c2eda9eed43042524435c761a, adding more directives to adjust for some new code added since: Before this patch: make -C ./internal/gocompat/ GO111MODULE=off go generate . GO111MODULE=on go mod tidy GO111MODULE=on go test -v # github.com/docker/docker/internal/sliceutil internal/sliceutil/sliceutil.go:3:12: type parameter requires go1.18 or later (-lang was set to go1.16; check go.mod) internal/sliceutil/sliceutil.go:3:14: predeclared comparable requires go1.18 or later (-lang was set to go1.16; check go.mod) internal/sliceutil/sliceutil.go:4:19: invalid map key type T (missing comparable constraint) # github.com/docker/docker/libnetwork libnetwork/endpoint.go:252:17: implicit function instantiation requires go1.18 or later (-lang was set to go1.16; check go.mod) # github.com/docker/docker/daemon daemon/container_operations.go:682:9: implicit function instantiation requires go1.18 or later (-lang was set to go1.16; check go.mod) daemon/inspect.go:42:18: implicit function instantiation requires go1.18 or later (-lang was set to go1.16; check go.mod) With this patch: make -C ./internal/gocompat/ GO111MODULE=off go generate . GO111MODULE=on go mod tidy GO111MODULE=on go test -v === RUN TestModuleCompatibllity main_test.go:321: all packages have the correct go version specified through //go:build --- PASS: TestModuleCompatibllity (0.00s) PASS ok gocompat 0.031s make: Leaving directory '/go/src/github.com/docker/docker/internal/gocompat' Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2024-01-25 10:18:44 +00:00
// FIXME(thaJeztah): remove once we are a module; the go:build directive prevents go from downgrading language version to go1.16:
//go:build go1.19
package libnetwork
import (
"context"
"encoding/json"
"fmt"
"net"
"sync"
"github.com/containerd/log"
"github.com/docker/docker/internal/sliceutil"
"github.com/docker/docker/libnetwork/datastore"
"github.com/docker/docker/libnetwork/ipamapi"
"github.com/docker/docker/libnetwork/netlabel"
"github.com/docker/docker/libnetwork/options"
"github.com/docker/docker/libnetwork/scope"
"github.com/docker/docker/libnetwork/types"
)
// ByNetworkType sorts a [Endpoint] slice based on the network-type
// they're attached to. It implements [sort.Interface] and can be used
// with [sort.Stable] or [sort.Sort]. It is used by [Sandbox.ResolveName]
// when resolving names in swarm mode. In swarm mode, services with exposed
// ports are connected to user overlay network, ingress network, and local
// ("docker_gwbridge") networks. Name resolution should prioritize returning
// the VIP/IPs on user overlay network over ingress and local networks.
//
// ByNetworkType re-orders the endpoints based on the network-type they
// are attached to:
//
// 1. dynamic networks (user overlay networks)
// 2. ingress network(s)
// 3. local networks ("docker_gwbridge")
type ByNetworkType []*Endpoint
func (ep ByNetworkType) Len() int { return len(ep) }
func (ep ByNetworkType) Swap(i, j int) { ep[i], ep[j] = ep[j], ep[i] }
func (ep ByNetworkType) Less(i, j int) bool {
return getNetworkType(ep[i].getNetwork()) < getNetworkType(ep[j].getNetwork())
}
// Define the order in which resolution should happen if an endpoint is
// attached to multiple network-types. It is used by [ByNetworkType].
const (
typeDynamic = iota
typeIngress
typeLocal
)
func getNetworkType(nw *Network) int {
switch {
case nw.ingress:
return typeIngress
case nw.dynamic:
return typeDynamic
default:
return typeLocal
}
}
// EndpointOption is an option setter function type used to pass various options to Network
// and Endpoint interfaces methods. The various setter functions of type EndpointOption are
// provided by libnetwork, they look like <Create|Join|Leave>Option[...](...)
type EndpointOption func(ep *Endpoint)
// Endpoint represents a logical connection between a network and a sandbox.
type Endpoint struct {
name string
id string
network *Network
iface *EndpointInterface
joinInfo *endpointJoinInfo
sandboxID string
exposedPorts []types.TransportPort
// dnsNames holds all the non-fully qualified DNS names associated to this endpoint. Order matters: first entry
// will be used for the PTR records associated to the endpoint's IPv4 and IPv6 addresses.
dnsNames []string
disableResolution bool
generic map[string]interface{}
prefAddress net.IP
prefAddressV6 net.IP
ipamOptions map[string]string
aliases map[string]string
svcID string
svcName string
virtualIP net.IP
svcAliases []string
ingressPorts []*PortConfig
dbIndex uint64
dbExists bool
serviceEnabled bool
loadBalancer bool
mu sync.Mutex
}
func (ep *Endpoint) MarshalJSON() ([]byte, error) {
ep.mu.Lock()
defer ep.mu.Unlock()
epMap := make(map[string]interface{})
epMap["name"] = ep.name
epMap["id"] = ep.id
epMap["ep_iface"] = ep.iface
epMap["joinInfo"] = ep.joinInfo
epMap["exposed_ports"] = ep.exposedPorts
if ep.generic != nil {
epMap["generic"] = ep.generic
}
epMap["sandbox"] = ep.sandboxID
epMap["dnsNames"] = ep.dnsNames
epMap["disableResolution"] = ep.disableResolution
epMap["svcName"] = ep.svcName
epMap["svcID"] = ep.svcID
epMap["virtualIP"] = ep.virtualIP.String()
epMap["ingressPorts"] = ep.ingressPorts
epMap["svcAliases"] = ep.svcAliases
epMap["loadBalancer"] = ep.loadBalancer
return json.Marshal(epMap)
}
func (ep *Endpoint) UnmarshalJSON(b []byte) (err error) {
ep.mu.Lock()
defer ep.mu.Unlock()
var epMap map[string]interface{}
if err := json.Unmarshal(b, &epMap); err != nil {
return err
}
ep.name = epMap["name"].(string)
ep.id = epMap["id"].(string)
// TODO(cpuguy83): So yeah, this isn't checking any errors anywhere.
// Seems like we should be checking errors even because of memory related issues that can arise.
// Alas it seems like given the nature of this data we could introduce problems if we start checking these errors.
//
// If anyone ever comes here and figures out one way or another if we can/should be checking these errors and it turns out we can't... then please document *why*
ib, _ := json.Marshal(epMap["ep_iface"])
json.Unmarshal(ib, &ep.iface) //nolint:errcheck
jb, _ := json.Marshal(epMap["joinInfo"])
json.Unmarshal(jb, &ep.joinInfo) //nolint:errcheck
tb, _ := json.Marshal(epMap["exposed_ports"])
var tPorts []types.TransportPort
json.Unmarshal(tb, &tPorts) //nolint:errcheck
ep.exposedPorts = tPorts
cb, _ := json.Marshal(epMap["sandbox"])
json.Unmarshal(cb, &ep.sandboxID) //nolint:errcheck
if v, ok := epMap["generic"]; ok {
ep.generic = v.(map[string]interface{})
if opt, ok := ep.generic[netlabel.PortMap]; ok {
pblist := []types.PortBinding{}
for i := 0; i < len(opt.([]interface{})); i++ {
pb := types.PortBinding{}
tmp := opt.([]interface{})[i].(map[string]interface{})
bytes, err := json.Marshal(tmp)
if err != nil {
log.G(context.TODO()).Error(err)
break
}
err = json.Unmarshal(bytes, &pb)
if err != nil {
log.G(context.TODO()).Error(err)
break
}
pblist = append(pblist, pb)
}
ep.generic[netlabel.PortMap] = pblist
}
if opt, ok := ep.generic[netlabel.ExposedPorts]; ok {
tplist := []types.TransportPort{}
for i := 0; i < len(opt.([]interface{})); i++ {
tp := types.TransportPort{}
tmp := opt.([]interface{})[i].(map[string]interface{})
bytes, err := json.Marshal(tmp)
if err != nil {
log.G(context.TODO()).Error(err)
break
}
err = json.Unmarshal(bytes, &tp)
if err != nil {
log.G(context.TODO()).Error(err)
break
}
tplist = append(tplist, tp)
}
ep.generic[netlabel.ExposedPorts] = tplist
}
}
var anonymous bool
if v, ok := epMap["anonymous"]; ok {
anonymous = v.(bool)
}
if v, ok := epMap["disableResolution"]; ok {
ep.disableResolution = v.(bool)
}
if sn, ok := epMap["svcName"]; ok {
ep.svcName = sn.(string)
}
if si, ok := epMap["svcID"]; ok {
ep.svcID = si.(string)
}
if vip, ok := epMap["virtualIP"]; ok {
ep.virtualIP = net.ParseIP(vip.(string))
}
if v, ok := epMap["loadBalancer"]; ok {
ep.loadBalancer = v.(bool)
}
sal, _ := json.Marshal(epMap["svcAliases"])
var svcAliases []string
json.Unmarshal(sal, &svcAliases) //nolint:errcheck
ep.svcAliases = svcAliases
pc, _ := json.Marshal(epMap["ingressPorts"])
var ingressPorts []*PortConfig
json.Unmarshal(pc, &ingressPorts) //nolint:errcheck
ep.ingressPorts = ingressPorts
ma, _ := json.Marshal(epMap["myAliases"])
var myAliases []string
json.Unmarshal(ma, &myAliases) //nolint:errcheck
_, hasDNSNames := epMap["dnsNames"]
dn, _ := json.Marshal(epMap["dnsNames"])
var dnsNames []string
json.Unmarshal(dn, &dnsNames)
ep.dnsNames = dnsNames
// TODO(aker): remove this migration code in v27
if !hasDNSNames {
// The field dnsNames was introduced in v25.0. If we don't have it, the on-disk state was written by an older
// daemon, thus we need to populate dnsNames based off of myAliases and anonymous values.
if !anonymous {
myAliases = append([]string{ep.name}, myAliases...)
}
ep.dnsNames = sliceutil.Dedup(myAliases)
}
return nil
}
func (ep *Endpoint) New() datastore.KVObject {
return &Endpoint{network: ep.getNetwork()}
}
func (ep *Endpoint) CopyTo(o datastore.KVObject) error {
ep.mu.Lock()
defer ep.mu.Unlock()
dstEp := o.(*Endpoint)
dstEp.name = ep.name
dstEp.id = ep.id
dstEp.sandboxID = ep.sandboxID
dstEp.dbIndex = ep.dbIndex
dstEp.dbExists = ep.dbExists
dstEp.disableResolution = ep.disableResolution
dstEp.svcName = ep.svcName
dstEp.svcID = ep.svcID
dstEp.virtualIP = ep.virtualIP
dstEp.loadBalancer = ep.loadBalancer
dstEp.svcAliases = make([]string, len(ep.svcAliases))
copy(dstEp.svcAliases, ep.svcAliases)
dstEp.ingressPorts = make([]*PortConfig, len(ep.ingressPorts))
copy(dstEp.ingressPorts, ep.ingressPorts)
if ep.iface != nil {
dstEp.iface = &EndpointInterface{}
if err := ep.iface.CopyTo(dstEp.iface); err != nil {
return err
}
}
if ep.joinInfo != nil {
dstEp.joinInfo = &endpointJoinInfo{}
if err := ep.joinInfo.CopyTo(dstEp.joinInfo); err != nil {
return err
}
}
dstEp.exposedPorts = make([]types.TransportPort, len(ep.exposedPorts))
copy(dstEp.exposedPorts, ep.exposedPorts)
dstEp.dnsNames = make([]string, len(ep.dnsNames))
copy(dstEp.dnsNames, ep.dnsNames)
dstEp.generic = options.Generic{}
for k, v := range ep.generic {
dstEp.generic[k] = v
}
return nil
}
// ID returns the system-generated id for this endpoint.
func (ep *Endpoint) ID() string {
ep.mu.Lock()
defer ep.mu.Unlock()
return ep.id
}
// Name returns the name of this endpoint.
func (ep *Endpoint) Name() string {
ep.mu.Lock()
defer ep.mu.Unlock()
return ep.name
}
// Network returns the name of the network to which this endpoint is attached.
func (ep *Endpoint) Network() string {
if ep.network == nil {
return ""
}
return ep.network.name
}
// getDNSNames returns a copy of the DNS names associated to this endpoint. The first entry is the one used for PTR
// records.
func (ep *Endpoint) getDNSNames() []string {
ep.mu.Lock()
defer ep.mu.Unlock()
dnsNames := make([]string, len(ep.dnsNames))
copy(dnsNames, ep.dnsNames)
return dnsNames
}
// isServiceEnabled check if service is enabled on the endpoint
func (ep *Endpoint) isServiceEnabled() bool {
ep.mu.Lock()
defer ep.mu.Unlock()
return ep.serviceEnabled
}
// enableService sets service enabled on the endpoint
func (ep *Endpoint) enableService() {
ep.mu.Lock()
defer ep.mu.Unlock()
ep.serviceEnabled = true
}
// disableService disables service on the endpoint
func (ep *Endpoint) disableService() {
ep.mu.Lock()
defer ep.mu.Unlock()
ep.serviceEnabled = false
}
func (ep *Endpoint) needResolver() bool {
ep.mu.Lock()
defer ep.mu.Unlock()
return !ep.disableResolution
}
// endpoint Key structure : endpoint/network-id/endpoint-id
func (ep *Endpoint) Key() []string {
if ep.network == nil {
return nil
}
return []string{datastore.EndpointKeyPrefix, ep.network.id, ep.id}
}
func (ep *Endpoint) KeyPrefix() []string {
if ep.network == nil {
return nil
}
return []string{datastore.EndpointKeyPrefix, ep.network.id}
}
func (ep *Endpoint) Value() []byte {
b, err := json.Marshal(ep)
if err != nil {
return nil
}
return b
}
func (ep *Endpoint) SetValue(value []byte) error {
return json.Unmarshal(value, ep)
}
func (ep *Endpoint) Index() uint64 {
ep.mu.Lock()
defer ep.mu.Unlock()
return ep.dbIndex
}
func (ep *Endpoint) SetIndex(index uint64) {
ep.mu.Lock()
defer ep.mu.Unlock()
ep.dbIndex = index
ep.dbExists = true
}
func (ep *Endpoint) Exists() bool {
ep.mu.Lock()
defer ep.mu.Unlock()
return ep.dbExists
}
func (ep *Endpoint) Skip() bool {
return ep.getNetwork().Skip()
}
func (ep *Endpoint) processOptions(options ...EndpointOption) {
ep.mu.Lock()
defer ep.mu.Unlock()
for _, opt := range options {
if opt != nil {
opt(ep)
}
}
}
func (ep *Endpoint) getNetwork() *Network {
ep.mu.Lock()
defer ep.mu.Unlock()
return ep.network
}
func (ep *Endpoint) getNetworkFromStore() (*Network, error) {
if ep.network == nil {
return nil, fmt.Errorf("invalid network object in endpoint %s", ep.Name())
}
return ep.network.getController().getNetworkFromStore(ep.network.id)
}
// Join joins the sandbox to the endpoint and populates into the sandbox
// the network resources allocated for the endpoint.
func (ep *Endpoint) Join(sb *Sandbox, options ...EndpointOption) error {
if sb == nil || sb.ID() == "" || sb.Key() == "" {
return types.InvalidParameterErrorf("invalid Sandbox passed to endpoint join: %v", sb)
}
sb.joinLeaveStart()
defer sb.joinLeaveEnd()
return ep.sbJoin(sb, options...)
}
func (ep *Endpoint) sbJoin(sb *Sandbox, options ...EndpointOption) (err error) {
n, err := ep.getNetworkFromStore()
if err != nil {
return fmt.Errorf("failed to get network from store during join: %v", err)
}
ep, err = n.getEndpointFromStore(ep.ID())
if err != nil {
return fmt.Errorf("failed to get endpoint from store during join: %v", err)
}
ep.mu.Lock()
if ep.sandboxID != "" {
ep.mu.Unlock()
return types.ForbiddenErrorf("another container is attached to the same network endpoint")
}
ep.network = n
ep.sandboxID = sb.ID()
ep.joinInfo = &endpointJoinInfo{}
epid := ep.id
ep.mu.Unlock()
defer func() {
if err != nil {
ep.mu.Lock()
ep.sandboxID = ""
ep.mu.Unlock()
}
}()
nid := n.ID()
ep.processOptions(options...)
d, err := n.driver(true)
if err != nil {
return fmt.Errorf("failed to get driver during join: %v", err)
}
err = d.Join(nid, epid, sb.Key(), ep, sb.Labels())
if err != nil {
return err
}
defer func() {
if err != nil {
if e := d.Leave(nid, epid); e != nil {
log.G(context.TODO()).Warnf("driver leave failed while rolling back join: %v", e)
}
}
}()
if !n.getController().isAgent() {
if !n.getController().isSwarmNode() || n.Scope() != scope.Swarm || !n.driverIsMultihost() {
n.updateSvcRecord(ep, true)
}
}
if err := sb.updateHostsFile(ep.getEtcHostsAddrs()); err != nil {
return err
}
if err = sb.updateDNS(n.enableIPv6); err != nil {
return err
}
// Current endpoint providing external connectivity for the sandbox
extEp := sb.getGatewayEndpoint()
sb.addEndpoint(ep)
defer func() {
if err != nil {
sb.removeEndpoint(ep)
}
}()
if err = sb.populateNetworkResources(ep); err != nil {
return err
}
if err = n.getController().updateToStore(ep); err != nil {
return err
}
if err = ep.addDriverInfoToCluster(); err != nil {
return err
}
defer func() {
if err != nil {
if e := ep.deleteDriverInfoFromCluster(); e != nil {
log.G(context.TODO()).Errorf("Could not delete endpoint state for endpoint %s from cluster on join failure: %v", ep.Name(), e)
}
}
}()
Add endpoint load-balancing mode This is the heart of the scalability change for services in libnetwork. The present routing mesh adds load-balancing rules for a network to every container connected to the network. This newer approach creates a load-balancing endpoint per network per node. For every service on a network, libnetwork assigns the VIP of the service to the endpoint's interface as an alias. This endpoint must have a unique IP address in order to route return traffic to it. Traffic destined for a service's VIP arrives at the load-balancing endpoint on the VIP and from there, Linux load balances it among backend destinations while SNATing said traffic to the endpoint's unique IP address. The net result of this scheme is that each node in a swarm need only have one set of load balancing state per service instead of one per container on the node. This scheme is very similar to how services currently operate on Windows nodes in libnetwork. It (as with Windows nodes) costs the use of extra IP addresses in a network (one per node) and an extra network hop in the stack, although, always in the stack local to the container. In order to prevent existing deployments from suddenly failing if they failed to allocate sufficient address space to include per-node load-balancing endpoint IP addresses, this patch preserves the existing functionality and activates the new functionality on a per-network basis depending on whether the network has a load-balancing endpoint. Eventually, moby should always set this option when creating new networks and should only omit it for networks created as part of a swarm that are not marked to use endpoint load balancing. This patch also normalizes the code to treat "load" and "balancer" as two separate words from the perspectives of variable/function naming. This means that the 'b' in "balancer" must be capitalized. Signed-off-by: Chris Telfer <ctelfer@docker.com>
2018-04-10 16:34:41 +00:00
// Load balancing endpoints should never have a default gateway nor
// should they alter the status of a network's default gateway
if ep.loadBalancer && !sb.ingress {
return nil
}
if sb.needDefaultGW() && sb.getEndpointInGWNetwork() == nil {
return sb.setupDefaultGW()
}
moveExtConn := sb.getGatewayEndpoint() != extEp
if moveExtConn {
if extEp != nil {
log.G(context.TODO()).Debugf("Revoking external connectivity on endpoint %s (%s)", extEp.Name(), extEp.ID())
extN, err := extEp.getNetworkFromStore()
if err != nil {
return fmt.Errorf("failed to get network from store for revoking external connectivity during join: %v", err)
}
extD, err := extN.driver(true)
if err != nil {
return fmt.Errorf("failed to get driver for revoking external connectivity during join: %v", err)
}
if err = extD.RevokeExternalConnectivity(extEp.network.ID(), extEp.ID()); err != nil {
return types.InternalErrorf(
"driver failed revoking external connectivity on endpoint %s (%s): %v",
extEp.Name(), extEp.ID(), err)
}
defer func() {
if err != nil {
if e := extD.ProgramExternalConnectivity(extEp.network.ID(), extEp.ID(), sb.Labels()); e != nil {
log.G(context.TODO()).Warnf("Failed to roll-back external connectivity on endpoint %s (%s): %v",
extEp.Name(), extEp.ID(), e)
}
}
}()
}
if !n.internal {
log.G(context.TODO()).Debugf("Programming external connectivity on endpoint %s (%s)", ep.Name(), ep.ID())
if err = d.ProgramExternalConnectivity(n.ID(), ep.ID(), sb.Labels()); err != nil {
return types.InternalErrorf(
"driver failed programming external connectivity on endpoint %s (%s): %v",
ep.Name(), ep.ID(), err)
}
}
}
if !sb.needDefaultGW() {
if e := sb.clearDefaultGW(); e != nil {
log.G(context.TODO()).Warnf("Failure while disconnecting sandbox %s (%s) from gateway network: %v",
sb.ID(), sb.ContainerID(), e)
}
}
return nil
}
func (ep *Endpoint) rename(name string) error {
ep.mu.Lock()
ep.name = name
ep.mu.Unlock()
// Update the store with the updated name
if err := ep.getNetwork().getController().updateToStore(ep); err != nil {
return err
}
return nil
}
func (ep *Endpoint) UpdateDNSNames(dnsNames []string) error {
nw := ep.getNetwork()
c := nw.getController()
sb, ok := ep.getSandbox()
if !ok {
log.G(context.TODO()).WithFields(log.Fields{
"sandboxID": ep.sandboxID,
"endpointID": ep.ID(),
}).Warn("DNSNames update aborted, sandbox is not present anymore")
return nil
}
if c.isAgent() {
if err := ep.deleteServiceInfoFromCluster(sb, true, "UpdateDNSNames"); err != nil {
return types.InternalErrorf("could not delete service state for endpoint %s from cluster on UpdateDNSNames: %v", ep.Name(), err)
}
ep.dnsNames = dnsNames
if err := ep.addServiceInfoToCluster(sb); err != nil {
return types.InternalErrorf("could not add service state for endpoint %s to cluster on UpdateDNSNames: %v", ep.Name(), err)
}
} else {
nw.updateSvcRecord(ep, false)
ep.dnsNames = dnsNames
nw.updateSvcRecord(ep, true)
}
// Update the store with the updated name
if err := c.updateToStore(ep); err != nil {
return err
}
return nil
}
func (ep *Endpoint) hasInterface(iName string) bool {
ep.mu.Lock()
defer ep.mu.Unlock()
return ep.iface != nil && ep.iface.srcName == iName
}
// Leave detaches the network resources populated in the sandbox.
func (ep *Endpoint) Leave(sb *Sandbox) error {
if sb == nil || sb.ID() == "" || sb.Key() == "" {
return types.InvalidParameterErrorf("invalid Sandbox passed to endpoint leave: %v", sb)
}
sb.joinLeaveStart()
defer sb.joinLeaveEnd()
return ep.sbLeave(sb, false)
}
func (ep *Endpoint) sbLeave(sb *Sandbox, force bool) error {
n, err := ep.getNetworkFromStore()
if err != nil {
return fmt.Errorf("failed to get network from store during leave: %v", err)
}
ep, err = n.getEndpointFromStore(ep.ID())
if err != nil {
return fmt.Errorf("failed to get endpoint from store during leave: %v", err)
}
ep.mu.Lock()
sid := ep.sandboxID
ep.mu.Unlock()
if sid == "" {
return types.ForbiddenErrorf("cannot leave endpoint with no attached sandbox")
}
if sid != sb.ID() {
return types.ForbiddenErrorf("unexpected sandbox ID in leave request. Expected %s. Got %s", ep.sandboxID, sb.ID())
}
d, err := n.driver(!force)
if err != nil {
return fmt.Errorf("failed to get driver during endpoint leave: %v", err)
}
ep.mu.Lock()
ep.sandboxID = ""
ep.network = n
ep.mu.Unlock()
// Current endpoint providing external connectivity to the sandbox
extEp := sb.getGatewayEndpoint()
moveExtConn := extEp != nil && (extEp.ID() == ep.ID())
if d != nil {
if moveExtConn {
log.G(context.TODO()).Debugf("Revoking external connectivity on endpoint %s (%s)", ep.Name(), ep.ID())
if err := d.RevokeExternalConnectivity(n.id, ep.id); err != nil {
log.G(context.TODO()).Warnf("driver failed revoking external connectivity on endpoint %s (%s): %v",
ep.Name(), ep.ID(), err)
}
}
if err := d.Leave(n.id, ep.id); err != nil {
if _, ok := err.(types.MaskableError); !ok {
log.G(context.TODO()).Warnf("driver error disconnecting container %s : %v", ep.name, err)
}
}
}
if err := ep.deleteServiceInfoFromCluster(sb, true, "sbLeave"); err != nil {
log.G(context.TODO()).Warnf("Failed to clean up service info on container %s disconnect: %v", ep.name, err)
Gracefully remove LB endpoints from services This patch attempts to allow endpoints to complete servicing connections while being removed from a service. The change adds a flag to the endpoint.deleteServiceInfoFromCluster() method to indicate whether this removal should fully remove connectivity through the load balancer to the endpoint or should just disable directing further connections to the endpoint. If the flag is 'false', then the load balancer assigns a weight of 0 to the endpoint but does not remove it as a linux load balancing destination. It does remove the endpoint as a docker load balancing endpoint but tracks it in a special map of "disabled-but-not- destroyed" load balancing endpoints. This allows traffic to continue flowing, at least under Linux. If the flag is 'true', then the code removes the endpoint entirely as a load balancing destination. The sandbox.DisableService() method invokes deleteServiceInfoFromCluster() with the flag sent to 'false', while the endpoint.sbLeave() method invokes it with the flag set to 'true' to complete the removal on endpoint finalization. Renaming the endpoint invokes deleteServiceInfoFromCluster() with the flag set to 'true' because renaming attempts to completely remove and then re-add each endpoint service entry. The controller.rmServiceBinding() method, which carries out the operation, similarly gets a new flag for whether to fully remove the endpoint. If the flag is false, it does the job of moving the endpoint from the load balancing set to the 'disabled' set. It then removes or de-weights the entry in the OS load balancing table via network.rmLBBackend(). It removes the service entirely via said method ONLY IF there are no more live or disabled load balancing endpoints. Similarly network.addLBBackend() requires slight tweaking to properly manage the disabled set. Finally, this change requires propagating the status of disabled service endpoints via the networkDB. Accordingly, the patch includes both code to generate and handle service update messages. It also augments the service structure with a ServiceDisabled boolean to convey whether an endpoint should ultimately be removed or just disabled. This, naturally, required a rebuild of the protocol buffer code as well. Signed-off-by: Chris Telfer <ctelfer@docker.com>
2018-02-14 22:04:23 +00:00
}
if err := sb.clearNetworkResources(ep); err != nil {
log.G(context.TODO()).Warnf("Failed to clean up network resources on container %s disconnect: %v", ep.name, err)
}
// Update the store about the sandbox detach only after we
// have completed sb.clearNetworkresources above to avoid
// spurious logs when cleaning up the sandbox when the daemon
// ungracefully exits and restarts before completing sandbox
// detach but after store has been updated.
if err := n.getController().updateToStore(ep); err != nil {
return err
}
if e := ep.deleteDriverInfoFromCluster(); e != nil {
log.G(context.TODO()).Errorf("Failed to delete endpoint state for endpoint %s from cluster: %v", ep.Name(), e)
}
sb.deleteHostsEntries(n.getSvcRecords(ep))
if !sb.inDelete && sb.needDefaultGW() && sb.getEndpointInGWNetwork() == nil {
return sb.setupDefaultGW()
}
// New endpoint providing external connectivity for the sandbox
extEp = sb.getGatewayEndpoint()
if moveExtConn && extEp != nil {
log.G(context.TODO()).Debugf("Programming external connectivity on endpoint %s (%s)", extEp.Name(), extEp.ID())
extN, err := extEp.getNetworkFromStore()
if err != nil {
return fmt.Errorf("failed to get network from store for programming external connectivity during leave: %v", err)
}
extD, err := extN.driver(true)
if err != nil {
return fmt.Errorf("failed to get driver for programming external connectivity during leave: %v", err)
}
if err := extD.ProgramExternalConnectivity(extEp.network.ID(), extEp.ID(), sb.Labels()); err != nil {
log.G(context.TODO()).Warnf("driver failed programming external connectivity on endpoint %s: (%s) %v",
extEp.Name(), extEp.ID(), err)
}
}
if !sb.needDefaultGW() {
if err := sb.clearDefaultGW(); err != nil {
log.G(context.TODO()).Warnf("Failure while disconnecting sandbox %s (%s) from gateway network: %v",
sb.ID(), sb.ContainerID(), err)
}
}
return nil
}
// Delete deletes and detaches this endpoint from the network.
func (ep *Endpoint) Delete(force bool) error {
var err error
n, err := ep.getNetworkFromStore()
if err != nil {
return fmt.Errorf("failed to get network during Delete: %v", err)
}
ep, err = n.getEndpointFromStore(ep.ID())
if err != nil {
return fmt.Errorf("failed to get endpoint from store during Delete: %v", err)
}
ep.mu.Lock()
epid := ep.id
name := ep.name
sbid := ep.sandboxID
ep.mu.Unlock()
sb, _ := n.getController().SandboxByID(sbid)
if sb != nil && !force {
return &ActiveContainerError{name: name, id: epid}
}
if sb != nil {
if e := ep.sbLeave(sb, force); e != nil {
log.G(context.TODO()).Warnf("failed to leave sandbox for endpoint %s : %v", name, e)
}
}
if err = n.getController().deleteFromStore(ep); err != nil {
return err
}
defer func() {
if err != nil && !force {
ep.dbExists = false
if e := n.getController().updateToStore(ep); e != nil {
log.G(context.TODO()).Warnf("failed to recreate endpoint in store %s : %v", name, e)
}
}
}()
if !n.getController().isSwarmNode() || n.Scope() != scope.Swarm || !n.driverIsMultihost() {
n.updateSvcRecord(ep, false)
}
if err = ep.deleteEndpoint(force); err != nil && !force {
return err
}
ep.releaseAddress()
if err := n.getEpCnt().DecEndpointCnt(); err != nil {
log.G(context.TODO()).Warnf("failed to decrement endpoint count for ep %s: %v", ep.ID(), err)
}
return nil
}
func (ep *Endpoint) deleteEndpoint(force bool) error {
ep.mu.Lock()
n := ep.network
name := ep.name
epid := ep.id
ep.mu.Unlock()
driver, err := n.driver(!force)
if err != nil {
return fmt.Errorf("failed to delete endpoint: %v", err)
}
if driver == nil {
return nil
}
if err := driver.DeleteEndpoint(n.id, epid); err != nil {
if _, ok := err.(types.ForbiddenError); ok {
return err
}
if _, ok := err.(types.MaskableError); !ok {
log.G(context.TODO()).Warnf("driver error deleting endpoint %s : %v", name, err)
}
}
return nil
}
func (ep *Endpoint) getSandbox() (*Sandbox, bool) {
c := ep.network.getController()
ep.mu.Lock()
sid := ep.sandboxID
ep.mu.Unlock()
c.mu.Lock()
ps, ok := c.sandboxes[sid]
c.mu.Unlock()
return ps, ok
}
// Return a list of this endpoint's addresses to add to '/etc/hosts'.
func (ep *Endpoint) getEtcHostsAddrs() []string {
ep.mu.Lock()
defer ep.mu.Unlock()
// Do not update hosts file with internal network's endpoint IP
if n := ep.network; n == nil || n.ingress || n.Name() == libnGWNetwork {
return nil
}
var addresses []string
if ep.iface.addr != nil {
addresses = append(addresses, ep.iface.addr.IP.String())
}
if ep.iface.addrv6 != nil {
addresses = append(addresses, ep.iface.addrv6.IP.String())
}
return addresses
}
// EndpointOptionGeneric function returns an option setter for a Generic option defined
// in a Dictionary of Key-Value pair
func EndpointOptionGeneric(generic map[string]interface{}) EndpointOption {
return func(ep *Endpoint) {
for k, v := range generic {
ep.generic[k] = v
}
}
}
var (
linkLocalMask = net.CIDRMask(16, 32)
linkLocalMaskIPv6 = net.CIDRMask(64, 128)
)
// CreateOptionIpam function returns an option setter for the ipam configuration for this endpoint
func CreateOptionIpam(ipV4, ipV6 net.IP, llIPs []net.IP, ipamOptions map[string]string) EndpointOption {
return func(ep *Endpoint) {
ep.prefAddress = ipV4
ep.prefAddressV6 = ipV6
if len(llIPs) != 0 {
for _, ip := range llIPs {
nw := &net.IPNet{IP: ip, Mask: linkLocalMask}
if ip.To4() == nil {
nw.Mask = linkLocalMaskIPv6
}
ep.iface.llAddrs = append(ep.iface.llAddrs, nw)
}
}
ep.ipamOptions = ipamOptions
}
}
// CreateOptionExposedPorts function returns an option setter for the container exposed
// ports option to be passed to [Network.CreateEndpoint] method.
func CreateOptionExposedPorts(exposedPorts []types.TransportPort) EndpointOption {
return func(ep *Endpoint) {
// Defensive copy
eps := make([]types.TransportPort, len(exposedPorts))
copy(eps, exposedPorts)
// Store endpoint label and in generic because driver needs it
ep.exposedPorts = eps
ep.generic[netlabel.ExposedPorts] = eps
}
}
// CreateOptionPortMapping function returns an option setter for the mapping
// ports option to be passed to [Network.CreateEndpoint] method.
func CreateOptionPortMapping(portBindings []types.PortBinding) EndpointOption {
return func(ep *Endpoint) {
// Store a copy of the bindings as generic data to pass to the driver
pbs := make([]types.PortBinding, len(portBindings))
copy(pbs, portBindings)
ep.generic[netlabel.PortMap] = pbs
}
}
// CreateOptionDNS function returns an option setter for dns entry option to
// be passed to container Create method.
func CreateOptionDNS(dns []string) EndpointOption {
return func(ep *Endpoint) {
ep.generic[netlabel.DNSServers] = dns
}
}
// CreateOptionDNSNames specifies the list of (non fully qualified) DNS names associated to an endpoint. These will be
// used to populate the embedded DNS server. Order matters: first name will be used to generate PTR records.
func CreateOptionDNSNames(names []string) EndpointOption {
return func(ep *Endpoint) {
ep.dnsNames = names
}
}
// CreateOptionDisableResolution function returns an option setter to indicate
// this endpoint doesn't want embedded DNS server functionality
func CreateOptionDisableResolution() EndpointOption {
return func(ep *Endpoint) {
ep.disableResolution = true
}
}
// CreateOptionAlias function returns an option setter for setting endpoint alias
func CreateOptionAlias(name string, alias string) EndpointOption {
return func(ep *Endpoint) {
if ep.aliases == nil {
ep.aliases = make(map[string]string)
}
ep.aliases[alias] = name
}
}
// CreateOptionService function returns an option setter for setting service binding configuration
func CreateOptionService(name, id string, vip net.IP, ingressPorts []*PortConfig, aliases []string) EndpointOption {
return func(ep *Endpoint) {
ep.svcName = name
ep.svcID = id
ep.virtualIP = vip
ep.ingressPorts = ingressPorts
ep.svcAliases = aliases
}
}
// CreateOptionLoadBalancer function returns an option setter for denoting the endpoint is a load balancer for a network
func CreateOptionLoadBalancer() EndpointOption {
return func(ep *Endpoint) {
ep.loadBalancer = true
}
}
// JoinOptionPriority function returns an option setter for priority option to
// be passed to the endpoint.Join() method.
func JoinOptionPriority(prio int) EndpointOption {
return func(ep *Endpoint) {
// ep lock already acquired
c := ep.network.getController()
c.mu.Lock()
sb, ok := c.sandboxes[ep.sandboxID]
c.mu.Unlock()
if !ok {
log.G(context.TODO()).Errorf("Could not set endpoint priority value during Join to endpoint %s: No sandbox id present in endpoint", ep.id)
return
}
sb.epPriority[ep.id] = prio
}
}
func (ep *Endpoint) assignAddress(ipam ipamapi.Ipam, assignIPv4, assignIPv6 bool) error {
var err error
n := ep.getNetwork()
if n.hasSpecialDriver() {
return nil
}
log.G(context.TODO()).Debugf("Assigning addresses for endpoint %s's interface on network %s", ep.Name(), n.Name())
if assignIPv4 {
if err = ep.assignAddressVersion(4, ipam); err != nil {
return err
}
}
if assignIPv6 {
err = ep.assignAddressVersion(6, ipam)
}
return err
}
func (ep *Endpoint) assignAddressVersion(ipVer int, ipam ipamapi.Ipam) error {
var (
poolID *string
address **net.IPNet
prefAdd net.IP
progAdd net.IP
)
n := ep.getNetwork()
switch ipVer {
case 4:
poolID = &ep.iface.v4PoolID
address = &ep.iface.addr
prefAdd = ep.prefAddress
case 6:
poolID = &ep.iface.v6PoolID
address = &ep.iface.addrv6
prefAdd = ep.prefAddressV6
default:
return types.InternalErrorf("incorrect ip version number passed: %d", ipVer)
}
ipInfo := n.getIPInfo(ipVer)
// ipv6 address is not mandatory
if len(ipInfo) == 0 && ipVer == 6 {
return nil
}
// The address to program may be chosen by the user or by the network driver in one specific
// case to support backward compatibility with `docker daemon --fixed-cidrv6` use case
if prefAdd != nil {
progAdd = prefAdd
} else if *address != nil {
progAdd = (*address).IP
}
for _, d := range ipInfo {
if progAdd != nil && !d.Pool.Contains(progAdd) {
continue
}
addr, _, err := ipam.RequestAddress(d.PoolID, progAdd, ep.ipamOptions)
if err == nil {
ep.mu.Lock()
*address = addr
*poolID = d.PoolID
ep.mu.Unlock()
return nil
}
if err != ipamapi.ErrNoAvailableIPs || progAdd != nil {
return err
}
}
if progAdd != nil {
return types.InvalidParameterErrorf("invalid address %s: It does not belong to any of this network's subnets", prefAdd)
}
return fmt.Errorf("no available IPv%d addresses on this network's address pools: %s (%s)", ipVer, n.Name(), n.ID())
}
func (ep *Endpoint) releaseAddress() {
n := ep.getNetwork()
if n.hasSpecialDriver() {
return
}
log.G(context.TODO()).Debugf("Releasing addresses for endpoint %s's interface on network %s", ep.Name(), n.Name())
ipam, _, err := n.getController().getIPAMDriver(n.ipamType)
if err != nil {
log.G(context.TODO()).Warnf("Failed to retrieve ipam driver to release interface address on delete of endpoint %s (%s): %v", ep.Name(), ep.ID(), err)
return
}
if ep.iface.addr != nil {
if err := ipam.ReleaseAddress(ep.iface.v4PoolID, ep.iface.addr.IP); err != nil {
log.G(context.TODO()).Warnf("Failed to release ip address %s on delete of endpoint %s (%s): %v", ep.iface.addr.IP, ep.Name(), ep.ID(), err)
}
}
if ep.iface.addrv6 != nil {
if err := ipam.ReleaseAddress(ep.iface.v6PoolID, ep.iface.addrv6.IP); err != nil {
log.G(context.TODO()).Warnf("Failed to release ip address %s on delete of endpoint %s (%s): %v", ep.iface.addrv6.IP, ep.Name(), ep.ID(), err)
}
}
}
func (c *Controller) cleanupLocalEndpoints() error {
// Get used endpoints
eps := make(map[string]interface{})
for _, sb := range c.sandboxes {
for _, ep := range sb.endpoints {
eps[ep.id] = true
}
}
nl, err := c.getNetworks()
if err != nil {
return fmt.Errorf("could not get list of networks: %v", err)
}
for _, n := range nl {
if n.ConfigOnly() {
continue
}
epl, err := n.getEndpointsFromStore()
if err != nil {
log.G(context.TODO()).Warnf("Could not get list of endpoints in network %s during endpoint cleanup: %v", n.name, err)
continue
}
for _, ep := range epl {
if _, ok := eps[ep.id]; ok {
continue
}
log.G(context.TODO()).Infof("Removing stale endpoint %s (%s)", ep.name, ep.id)
if err := ep.Delete(true); err != nil {
log.G(context.TODO()).Warnf("Could not delete local endpoint %s during endpoint cleanup: %v", ep.name, err)
}
}
epl, err = n.getEndpointsFromStore()
if err != nil {
log.G(context.TODO()).Warnf("Could not get list of endpoints in network %s for count update: %v", n.name, err)
continue
}
epCnt := n.getEpCnt().EndpointCnt()
if epCnt != uint64(len(epl)) {
log.G(context.TODO()).Infof("Fixing inconsistent endpoint_cnt for network %s. Expected=%d, Actual=%d", n.name, len(epl), epCnt)
if err := n.getEpCnt().setCnt(uint64(len(epl))); err != nil {
log.G(context.TODO()).WithField("network", n.name).WithError(err).Warn("Error while fixing inconsistent endpoint_cnt for network")
}
}
}
return nil
}