moby/daemon/network.go

1099 lines
34 KiB
Go
Raw Permalink Normal View History

package daemon // import "github.com/docker/docker/daemon"
import (
"context"
"errors"
"fmt"
"net"
"sort"
"strconv"
"strings"
"sync"
"github.com/containerd/log"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/backend"
containertypes "github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/events"
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/api/types/network"
"github.com/docker/docker/container"
clustertypes "github.com/docker/docker/daemon/cluster/provider"
"github.com/docker/docker/daemon/config"
internalnetwork "github.com/docker/docker/daemon/network"
"github.com/docker/docker/errdefs"
"github.com/docker/docker/libnetwork"
lncluster "github.com/docker/docker/libnetwork/cluster"
"github.com/docker/docker/libnetwork/driverapi"
"github.com/docker/docker/libnetwork/ipamapi"
"github.com/docker/docker/libnetwork/netlabel"
"github.com/docker/docker/libnetwork/networkdb"
"github.com/docker/docker/libnetwork/options"
networktypes "github.com/docker/docker/libnetwork/types"
"github.com/docker/docker/opts"
"github.com/docker/docker/pkg/plugingetter"
"github.com/docker/docker/runconfig"
"github.com/docker/go-connections/nat"
)
// PredefinedNetworkError is returned when user tries to create predefined network that already exists.
type PredefinedNetworkError string
func (pnr PredefinedNetworkError) Error() string {
return fmt.Sprintf("operation is not permitted on predefined %s network ", string(pnr))
}
// Forbidden denotes the type of this error
func (pnr PredefinedNetworkError) Forbidden() {}
// NetworkControllerEnabled checks if the networking stack is enabled.
// This feature depends on OS primitives and it's disabled in systems like Windows.
func (daemon *Daemon) NetworkControllerEnabled() bool {
return daemon.netController != nil
}
// NetworkController returns the network controller created by the daemon.
func (daemon *Daemon) NetworkController() *libnetwork.Controller {
return daemon.netController
}
// FindNetwork returns a network based on:
// 1. Full ID
// 2. Full Name
// 3. Partial ID
// as long as there is no ambiguity
func (daemon *Daemon) FindNetwork(term string) (*libnetwork.Network, error) {
var listByFullName, listByPartialID []*libnetwork.Network
for _, nw := range daemon.getAllNetworks() {
nwID := nw.ID()
if nwID == term {
return nw, nil
}
if strings.HasPrefix(nw.ID(), term) {
listByPartialID = append(listByPartialID, nw)
}
if nw.Name() == term {
listByFullName = append(listByFullName, nw)
}
Fix network name masking network ID on delete If a network is created with a name that matches another network's ID, the network with that name was masking the other network's ID. As a result, it was not possible to remove the network with a given ID. This patch changes the order in which networks are matched to be what we use for other cases; 1. Match on full ID 2. Match on full Name 3. Match on Partial ID Before this patch: $ docker network create foo 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b $ docker network create 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b 4a698333f1197f20224583abce14876d7f25fdfe416a8545927006c315915a2a $ docker network ls NETWORK ID NAME DRIVER SCOPE 4a698333f119 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b bridge local d1e40d43a2c0 bridge bridge local 336717eac9ea foo bridge local 13cf280a1bbf host host local d9e4c03728a0 none null local $ docker network rm 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b 4a698333f1197f20224583abce14876d7f25fdfe416a8545927006c315915a2a $ docker network ls NETWORK ID NAME DRIVER SCOPE d1e40d43a2c0 bridge bridge local 336717eac9ea foo bridge local 13cf280a1bbf host host local d9e4c03728a0 none null local After this patch: $ docker network create foo 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 $ docker network create 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 6cbc749a529cd2d9d3b10566c84e56c4203dd88b67417437b5fc7a6e955dd48f $ docker network ls NETWORK ID NAME DRIVER SCOPE 6cbc749a529c 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 bridge local 166c943dbeb5 bridge bridge local 2d1791a7def4 foo bridge local 6c45b8aa6d8e host host local b11c96b51ea7 none null local $ docker network rm 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 $ docker network ls NETWORK ID NAME DRIVER SCOPE 6cbc749a529c 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 bridge local 166c943dbeb5 bridge bridge local 6c45b8aa6d8e host host local b11c96b51ea7 none null local Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2017-08-14 20:23:57 +00:00
}
switch {
case len(listByFullName) == 1:
return listByFullName[0], nil
case len(listByFullName) > 1:
return nil, errdefs.InvalidParameter(fmt.Errorf("network %s is ambiguous (%d matches found on name)", term, len(listByFullName)))
case len(listByPartialID) == 1:
return listByPartialID[0], nil
case len(listByPartialID) > 1:
return nil, errdefs.InvalidParameter(fmt.Errorf("network %s is ambiguous (%d matches found based on ID prefix)", term, len(listByPartialID)))
}
// Be very careful to change the error type here, the
// libnetwork.ErrNoSuchNetwork error is used by the controller
// to retry the creation of the network as managed through the swarm manager
return nil, errdefs.NotFound(libnetwork.ErrNoSuchNetwork(term))
}
Fix network name masking network ID on delete If a network is created with a name that matches another network's ID, the network with that name was masking the other network's ID. As a result, it was not possible to remove the network with a given ID. This patch changes the order in which networks are matched to be what we use for other cases; 1. Match on full ID 2. Match on full Name 3. Match on Partial ID Before this patch: $ docker network create foo 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b $ docker network create 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b 4a698333f1197f20224583abce14876d7f25fdfe416a8545927006c315915a2a $ docker network ls NETWORK ID NAME DRIVER SCOPE 4a698333f119 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b bridge local d1e40d43a2c0 bridge bridge local 336717eac9ea foo bridge local 13cf280a1bbf host host local d9e4c03728a0 none null local $ docker network rm 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b 4a698333f1197f20224583abce14876d7f25fdfe416a8545927006c315915a2a $ docker network ls NETWORK ID NAME DRIVER SCOPE d1e40d43a2c0 bridge bridge local 336717eac9ea foo bridge local 13cf280a1bbf host host local d9e4c03728a0 none null local After this patch: $ docker network create foo 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 $ docker network create 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 6cbc749a529cd2d9d3b10566c84e56c4203dd88b67417437b5fc7a6e955dd48f $ docker network ls NETWORK ID NAME DRIVER SCOPE 6cbc749a529c 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 bridge local 166c943dbeb5 bridge bridge local 2d1791a7def4 foo bridge local 6c45b8aa6d8e host host local b11c96b51ea7 none null local $ docker network rm 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 $ docker network ls NETWORK ID NAME DRIVER SCOPE 6cbc749a529c 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 bridge local 166c943dbeb5 bridge bridge local 6c45b8aa6d8e host host local b11c96b51ea7 none null local Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2017-08-14 20:23:57 +00:00
// GetNetworkByID function returns a network whose ID matches the given ID.
// It fails with an error if no matching network is found.
func (daemon *Daemon) GetNetworkByID(id string) (*libnetwork.Network, error) {
Fix network name masking network ID on delete If a network is created with a name that matches another network's ID, the network with that name was masking the other network's ID. As a result, it was not possible to remove the network with a given ID. This patch changes the order in which networks are matched to be what we use for other cases; 1. Match on full ID 2. Match on full Name 3. Match on Partial ID Before this patch: $ docker network create foo 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b $ docker network create 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b 4a698333f1197f20224583abce14876d7f25fdfe416a8545927006c315915a2a $ docker network ls NETWORK ID NAME DRIVER SCOPE 4a698333f119 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b bridge local d1e40d43a2c0 bridge bridge local 336717eac9ea foo bridge local 13cf280a1bbf host host local d9e4c03728a0 none null local $ docker network rm 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b 4a698333f1197f20224583abce14876d7f25fdfe416a8545927006c315915a2a $ docker network ls NETWORK ID NAME DRIVER SCOPE d1e40d43a2c0 bridge bridge local 336717eac9ea foo bridge local 13cf280a1bbf host host local d9e4c03728a0 none null local After this patch: $ docker network create foo 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 $ docker network create 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 6cbc749a529cd2d9d3b10566c84e56c4203dd88b67417437b5fc7a6e955dd48f $ docker network ls NETWORK ID NAME DRIVER SCOPE 6cbc749a529c 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 bridge local 166c943dbeb5 bridge bridge local 2d1791a7def4 foo bridge local 6c45b8aa6d8e host host local b11c96b51ea7 none null local $ docker network rm 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 $ docker network ls NETWORK ID NAME DRIVER SCOPE 6cbc749a529c 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 bridge local 166c943dbeb5 bridge bridge local 6c45b8aa6d8e host host local b11c96b51ea7 none null local Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2017-08-14 20:23:57 +00:00
c := daemon.netController
if c == nil {
return nil, fmt.Errorf("netcontroller is nil: %w", libnetwork.ErrNoSuchNetwork(id))
}
Fix network name masking network ID on delete If a network is created with a name that matches another network's ID, the network with that name was masking the other network's ID. As a result, it was not possible to remove the network with a given ID. This patch changes the order in which networks are matched to be what we use for other cases; 1. Match on full ID 2. Match on full Name 3. Match on Partial ID Before this patch: $ docker network create foo 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b $ docker network create 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b 4a698333f1197f20224583abce14876d7f25fdfe416a8545927006c315915a2a $ docker network ls NETWORK ID NAME DRIVER SCOPE 4a698333f119 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b bridge local d1e40d43a2c0 bridge bridge local 336717eac9ea foo bridge local 13cf280a1bbf host host local d9e4c03728a0 none null local $ docker network rm 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b 4a698333f1197f20224583abce14876d7f25fdfe416a8545927006c315915a2a $ docker network ls NETWORK ID NAME DRIVER SCOPE d1e40d43a2c0 bridge bridge local 336717eac9ea foo bridge local 13cf280a1bbf host host local d9e4c03728a0 none null local After this patch: $ docker network create foo 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 $ docker network create 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 6cbc749a529cd2d9d3b10566c84e56c4203dd88b67417437b5fc7a6e955dd48f $ docker network ls NETWORK ID NAME DRIVER SCOPE 6cbc749a529c 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 bridge local 166c943dbeb5 bridge bridge local 2d1791a7def4 foo bridge local 6c45b8aa6d8e host host local b11c96b51ea7 none null local $ docker network rm 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 $ docker network ls NETWORK ID NAME DRIVER SCOPE 6cbc749a529c 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 bridge local 166c943dbeb5 bridge bridge local 6c45b8aa6d8e host host local b11c96b51ea7 none null local Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2017-08-14 20:23:57 +00:00
return c.NetworkByID(id)
}
// GetNetworkByName function returns a network for a given network name.
// If no network name is given, the default network is returned.
func (daemon *Daemon) GetNetworkByName(name string) (*libnetwork.Network, error) {
c := daemon.netController
if c == nil {
return nil, libnetwork.ErrNoSuchNetwork(name)
}
if name == "" {
name = c.Config().DefaultNetwork
}
return c.NetworkByName(name)
}
Fix network name masking network ID on delete If a network is created with a name that matches another network's ID, the network with that name was masking the other network's ID. As a result, it was not possible to remove the network with a given ID. This patch changes the order in which networks are matched to be what we use for other cases; 1. Match on full ID 2. Match on full Name 3. Match on Partial ID Before this patch: $ docker network create foo 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b $ docker network create 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b 4a698333f1197f20224583abce14876d7f25fdfe416a8545927006c315915a2a $ docker network ls NETWORK ID NAME DRIVER SCOPE 4a698333f119 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b bridge local d1e40d43a2c0 bridge bridge local 336717eac9ea foo bridge local 13cf280a1bbf host host local d9e4c03728a0 none null local $ docker network rm 336717eac9eaa3da6557042a04efc803f7e8862ce6cf96f6b9565265ba5c618b 4a698333f1197f20224583abce14876d7f25fdfe416a8545927006c315915a2a $ docker network ls NETWORK ID NAME DRIVER SCOPE d1e40d43a2c0 bridge bridge local 336717eac9ea foo bridge local 13cf280a1bbf host host local d9e4c03728a0 none null local After this patch: $ docker network create foo 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 $ docker network create 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 6cbc749a529cd2d9d3b10566c84e56c4203dd88b67417437b5fc7a6e955dd48f $ docker network ls NETWORK ID NAME DRIVER SCOPE 6cbc749a529c 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 bridge local 166c943dbeb5 bridge bridge local 2d1791a7def4 foo bridge local 6c45b8aa6d8e host host local b11c96b51ea7 none null local $ docker network rm 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 $ docker network ls NETWORK ID NAME DRIVER SCOPE 6cbc749a529c 2d1791a7def4e2a1ef0f6b83c6add333df0bb4ced2f196c584cb64e6bd94b835 bridge local 166c943dbeb5 bridge bridge local 6c45b8aa6d8e host host local b11c96b51ea7 none null local Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2017-08-14 20:23:57 +00:00
// GetNetworksByIDPrefix returns a list of networks whose ID partially matches zero or more networks
func (daemon *Daemon) GetNetworksByIDPrefix(partialID string) []*libnetwork.Network {
c := daemon.netController
if c == nil {
return nil
}
list := []*libnetwork.Network{}
l := func(nw *libnetwork.Network) bool {
if strings.HasPrefix(nw.ID(), partialID) {
list = append(list, nw)
}
return false
}
c.WalkNetworks(l)
return list
}
// getAllNetworks returns a list containing all networks
func (daemon *Daemon) getAllNetworks() []*libnetwork.Network {
c := daemon.netController
if c == nil {
return nil
}
ctx := context.TODO()
return c.Networks(ctx)
}
type ingressJob struct {
create *clustertypes.NetworkCreateRequest
ip net.IP
jobDone chan struct{}
}
var (
ingressWorkerOnce sync.Once
ingressJobsChannel chan *ingressJob
ingressID string
)
func (daemon *Daemon) startIngressWorker() {
ingressJobsChannel = make(chan *ingressJob, 100)
go func() {
//nolint: gosimple
for {
select {
case r := <-ingressJobsChannel:
if r.create != nil {
daemon: reload runtimes w/o breaking containers The existing runtimes reload logic went to great lengths to replace the directory containing runtime wrapper scripts as atomically as possible within the limitations of the Linux filesystem ABI. Trouble is, atomically swapping the wrapper scripts directory solves the wrong problem! The runtime configuration is "locked in" when a container is started, including the path to the runC binary. If a container is started with a runtime which requires a daemon-managed wrapper script and then the daemon is reloaded with a config which no longer requires the wrapper script (i.e. some args -> no args, or the runtime is dropped from the config), that container would become unmanageable. Any attempts to stop, exec or otherwise perform lifecycle management operations on the container are likely to fail due to the wrapper script no longer existing at its original path. Atomically swapping the wrapper scripts is also incompatible with the read-copy-update paradigm for reloading configuration. A handler in the daemon could retain a reference to the pre-reload configuration for an indeterminate amount of time after the daemon configuration has been reloaded and updated. It is possible for the daemon to attempt to start a container using a deleted wrapper script if a request to run a container races a reload. Solve the problem of deleting referenced wrapper scripts by ensuring that all wrapper scripts are *immutable* for the lifetime of the daemon process. Any given runtime wrapper script must always exist with the same contents, no matter how many times the daemon config is reloaded, or what changes are made to the config. This is accomplished by using everyone's favourite design pattern: content-addressable storage. Each wrapper script file name is suffixed with the SHA-256 digest of its contents to (probabilistically) guarantee immutability without needing any concurrency control. Stale runtime wrapper scripts are only cleaned up on the next daemon restart. Split the derived runtimes configuration from the user-supplied configuration to have a place to store derived state without mutating the user-supplied configuration or exposing daemon internals in API struct types. Hold the derived state and the user-supplied configuration in a single struct value so that they can be updated as an atomic unit. Signed-off-by: Cory Snider <csnider@mirantis.com>
2022-08-31 20:12:30 +00:00
daemon.setupIngress(&daemon.config().Config, r.create, r.ip, ingressID)
ingressID = r.create.ID
} else {
daemon.releaseIngress(ingressID)
ingressID = ""
}
close(r.jobDone)
}
}
}()
}
// enqueueIngressJob adds a ingress add/rm request to the worker queue.
// It guarantees the worker is started.
func (daemon *Daemon) enqueueIngressJob(job *ingressJob) {
ingressWorkerOnce.Do(daemon.startIngressWorker)
ingressJobsChannel <- job
}
// SetupIngress setups ingress networking.
// The function returns a channel which will signal the caller when the programming is completed.
func (daemon *Daemon) SetupIngress(create clustertypes.NetworkCreateRequest, nodeIP string) (<-chan struct{}, error) {
ip, _, err := net.ParseCIDR(nodeIP)
if err != nil {
return nil, err
}
done := make(chan struct{})
daemon.enqueueIngressJob(&ingressJob{&create, ip, done})
return done, nil
}
// ReleaseIngress releases the ingress networking.
// The function returns a channel which will signal the caller when the programming is completed.
func (daemon *Daemon) ReleaseIngress() (<-chan struct{}, error) {
done := make(chan struct{})
daemon.enqueueIngressJob(&ingressJob{nil, nil, done})
return done, nil
}
func (daemon *Daemon) setupIngress(cfg *config.Config, create *clustertypes.NetworkCreateRequest, ip net.IP, staleID string) {
controller := daemon.netController
controller.AgentInitWait()
if staleID != "" && staleID != create.ID {
daemon.releaseIngress(staleID)
}
if _, err := daemon.createNetwork(cfg, create.NetworkCreateRequest, create.ID, true); err != nil {
// If it is any other error other than already
// exists error log error and return.
if _, ok := err.(libnetwork.NetworkNameError); !ok {
log.G(context.TODO()).Errorf("Failed creating ingress network: %v", err)
return
}
// Otherwise continue down the call to create or recreate sandbox.
}
_, err := daemon.GetNetworkByID(create.ID)
if err != nil {
log.G(context.TODO()).Errorf("Failed getting ingress network by id after creating: %v", err)
}
}
func (daemon *Daemon) releaseIngress(id string) {
controller := daemon.netController
if id == "" {
return
}
n, err := controller.NetworkByID(id)
if err != nil {
log.G(context.TODO()).Errorf("failed to retrieve ingress network %s: %v", id, err)
return
}
Update moby to use scalable-lb libnetwork APIs This patch is required for the updated version of libnetwork and entails two minor changes. First, it uses the new libnetwork.NetworkDeleteOptionRemoveLB option to the network.Delete() method to automatically remove the load balancing endpoint for ingress networks. This allows removal of the deleteLoadBalancerSandbox() function whose functionality is now within libnetwork. The second change is to allocate a load balancer endpoint IP address for all overlay networks rather than just "ingress" and windows overlay networks. Swarmkit is already performing this allocation, but moby was not making use of these IP addresses for Linux overlay networks (except ingress). The current version of libnetwork makes use of these IP addresses by creating a load balancing sandbox and endpoint similar to ingress's for all overlay network and putting all load balancing state for a given node in that sandbox only. This reduces the amount of linux kernel state required per node. In the prior scheme, libnetwork would program each container's network namespace with every piece of load balancing state for every other container that shared *any* network with the first container. This meant that the amount of kernel state on a given node scaled with the square of the number of services in the cluster and with the square of the number of containers per service. With the new scheme, kernel state at each node scales linearly with the number of services and the number of containers per service. This also reduces the number of system calls required to add or remove tasks and containers. Previously the number of system calls required grew linearly with the number of other tasks that shared a network with the container. Now the number of system calls grows linearly only with the number of networks that the task/container is attached to. This results in a significant performance improvement when adding and removing services to a cluster that already heavily loaded. The primary disadvantage to this scheme is that it requires the allocation of an additional IP address per node per subnet for every node in the cluster that has a task on the given subnet. However, as mentioned, swarmkit is already allocating these IP addresses for every node and they are going unused. Future swarmkit modifications should be examined to only allocate said IP addresses when nodes actually require them. Signed-off-by: Chris Telfer <ctelfer@docker.com>
2018-04-19 15:39:51 +00:00
if err := n.Delete(libnetwork.NetworkDeleteOptionRemoveLB); err != nil {
log.G(context.TODO()).Errorf("Failed to delete ingress network %s: %v", n.ID(), err)
return
}
}
// SetNetworkBootstrapKeys sets the bootstrap keys.
func (daemon *Daemon) SetNetworkBootstrapKeys(keys []*networktypes.EncryptionKey) error {
if err := daemon.netController.SetKeys(keys); err != nil {
return err
}
// Upon successful key setting dispatch the keys available event
daemon.cluster.SendClusterEvent(lncluster.EventNetworkKeysAvailable)
return nil
}
// UpdateAttachment notifies the attacher about the attachment config.
func (daemon *Daemon) UpdateAttachment(networkName, networkID, containerID string, config *network.NetworkingConfig) error {
if daemon.clusterProvider == nil {
return fmt.Errorf("cluster provider is not initialized")
}
if err := daemon.clusterProvider.UpdateAttachment(networkName, containerID, config); err != nil {
return daemon.clusterProvider.UpdateAttachment(networkID, containerID, config)
}
return nil
}
// WaitForDetachment makes the cluster manager wait for detachment of
// the container from the network.
func (daemon *Daemon) WaitForDetachment(ctx context.Context, networkName, networkID, taskID, containerID string) error {
if daemon.clusterProvider == nil {
return fmt.Errorf("cluster provider is not initialized")
}
return daemon.clusterProvider.WaitForDetachment(ctx, networkName, networkID, taskID, containerID)
}
// CreateManagedNetwork creates an agent network.
func (daemon *Daemon) CreateManagedNetwork(create clustertypes.NetworkCreateRequest) error {
daemon: reload runtimes w/o breaking containers The existing runtimes reload logic went to great lengths to replace the directory containing runtime wrapper scripts as atomically as possible within the limitations of the Linux filesystem ABI. Trouble is, atomically swapping the wrapper scripts directory solves the wrong problem! The runtime configuration is "locked in" when a container is started, including the path to the runC binary. If a container is started with a runtime which requires a daemon-managed wrapper script and then the daemon is reloaded with a config which no longer requires the wrapper script (i.e. some args -> no args, or the runtime is dropped from the config), that container would become unmanageable. Any attempts to stop, exec or otherwise perform lifecycle management operations on the container are likely to fail due to the wrapper script no longer existing at its original path. Atomically swapping the wrapper scripts is also incompatible with the read-copy-update paradigm for reloading configuration. A handler in the daemon could retain a reference to the pre-reload configuration for an indeterminate amount of time after the daemon configuration has been reloaded and updated. It is possible for the daemon to attempt to start a container using a deleted wrapper script if a request to run a container races a reload. Solve the problem of deleting referenced wrapper scripts by ensuring that all wrapper scripts are *immutable* for the lifetime of the daemon process. Any given runtime wrapper script must always exist with the same contents, no matter how many times the daemon config is reloaded, or what changes are made to the config. This is accomplished by using everyone's favourite design pattern: content-addressable storage. Each wrapper script file name is suffixed with the SHA-256 digest of its contents to (probabilistically) guarantee immutability without needing any concurrency control. Stale runtime wrapper scripts are only cleaned up on the next daemon restart. Split the derived runtimes configuration from the user-supplied configuration to have a place to store derived state without mutating the user-supplied configuration or exposing daemon internals in API struct types. Hold the derived state and the user-supplied configuration in a single struct value so that they can be updated as an atomic unit. Signed-off-by: Cory Snider <csnider@mirantis.com>
2022-08-31 20:12:30 +00:00
_, err := daemon.createNetwork(&daemon.config().Config, create.NetworkCreateRequest, create.ID, true)
return err
}
// CreateNetwork creates a network with the given name, driver and other optional parameters
func (daemon *Daemon) CreateNetwork(create types.NetworkCreateRequest) (*types.NetworkCreateResponse, error) {
daemon: reload runtimes w/o breaking containers The existing runtimes reload logic went to great lengths to replace the directory containing runtime wrapper scripts as atomically as possible within the limitations of the Linux filesystem ABI. Trouble is, atomically swapping the wrapper scripts directory solves the wrong problem! The runtime configuration is "locked in" when a container is started, including the path to the runC binary. If a container is started with a runtime which requires a daemon-managed wrapper script and then the daemon is reloaded with a config which no longer requires the wrapper script (i.e. some args -> no args, or the runtime is dropped from the config), that container would become unmanageable. Any attempts to stop, exec or otherwise perform lifecycle management operations on the container are likely to fail due to the wrapper script no longer existing at its original path. Atomically swapping the wrapper scripts is also incompatible with the read-copy-update paradigm for reloading configuration. A handler in the daemon could retain a reference to the pre-reload configuration for an indeterminate amount of time after the daemon configuration has been reloaded and updated. It is possible for the daemon to attempt to start a container using a deleted wrapper script if a request to run a container races a reload. Solve the problem of deleting referenced wrapper scripts by ensuring that all wrapper scripts are *immutable* for the lifetime of the daemon process. Any given runtime wrapper script must always exist with the same contents, no matter how many times the daemon config is reloaded, or what changes are made to the config. This is accomplished by using everyone's favourite design pattern: content-addressable storage. Each wrapper script file name is suffixed with the SHA-256 digest of its contents to (probabilistically) guarantee immutability without needing any concurrency control. Stale runtime wrapper scripts are only cleaned up on the next daemon restart. Split the derived runtimes configuration from the user-supplied configuration to have a place to store derived state without mutating the user-supplied configuration or exposing daemon internals in API struct types. Hold the derived state and the user-supplied configuration in a single struct value so that they can be updated as an atomic unit. Signed-off-by: Cory Snider <csnider@mirantis.com>
2022-08-31 20:12:30 +00:00
return daemon.createNetwork(&daemon.config().Config, create, "", false)
}
func (daemon *Daemon) createNetwork(cfg *config.Config, create types.NetworkCreateRequest, id string, agent bool) (*types.NetworkCreateResponse, error) {
if runconfig.IsPreDefinedNetwork(create.Name) {
return nil, PredefinedNetworkError(create.Name)
}
c := daemon.netController
driver := create.Driver
if driver == "" {
driver = c.Config().DefaultDriver
}
if driver == "overlay" && !daemon.cluster.IsManager() && !agent {
return nil, errdefs.Forbidden(errors.New(`This node is not a swarm manager. Use "docker swarm init" or "docker swarm join" to connect this node to swarm and try again.`))
}
networkOptions := make(map[string]string)
for k, v := range create.Options {
networkOptions[k] = v
}
if defaultOpts, ok := cfg.DefaultNetworkOpts[driver]; create.ConfigFrom == nil && ok {
for k, v := range defaultOpts {
if _, ok := networkOptions[k]; !ok {
log.G(context.TODO()).WithFields(log.Fields{"driver": driver, "network": id, k: v}).Debug("Applying network default option")
networkOptions[k] = v
}
}
}
nwOptions := []libnetwork.NetworkOption{
libnetwork.NetworkOptionEnableIPv6(create.EnableIPv6),
libnetwork.NetworkOptionDriverOpts(networkOptions),
libnetwork.NetworkOptionLabels(create.Labels),
libnetwork.NetworkOptionAttachable(create.Attachable),
libnetwork.NetworkOptionIngress(create.Ingress),
libnetwork.NetworkOptionScope(create.Scope),
}
if create.ConfigOnly {
nwOptions = append(nwOptions, libnetwork.NetworkOptionConfigOnly())
}
if err := network.ValidateIPAM(create.IPAM, create.EnableIPv6); err != nil {
if agent {
// This function is called with agent=false for all networks. For swarm-scoped
// networks, the configuration is validated but ManagerRedirectError is returned
// and the network is not created. Then, each time a swarm-scoped network is
// needed, this function is called again with agent=true.
//
// Non-swarm networks created before ValidateIPAM was introduced continue to work
// as they did before-upgrade, even if they would fail the new checks on creation
// (for example, by having host-bits set in their subnet). Those networks are not
// seen again here.
//
// By dropping errors for agent networks, existing swarm-scoped networks also
// continue to behave as they did before upgrade - but new networks are still
// validated.
log.G(context.TODO()).WithFields(log.Fields{
"error": err,
"network": create.Name,
}).Warn("Continuing with validation errors in agent IPAM")
} else {
return nil, errdefs.InvalidParameter(err)
}
}
if create.IPAM != nil {
ipam := create.IPAM
v4Conf, v6Conf, err := getIpamConfig(ipam.Config)
if err != nil {
return nil, err
}
nwOptions = append(nwOptions, libnetwork.NetworkOptionIpam(ipam.Driver, "", v4Conf, v6Conf, ipam.Options))
}
if create.Internal {
nwOptions = append(nwOptions, libnetwork.NetworkOptionInternalNetwork())
}
if agent {
nwOptions = append(nwOptions, libnetwork.NetworkOptionDynamic())
nwOptions = append(nwOptions, libnetwork.NetworkOptionPersist(false))
}
if create.ConfigFrom != nil {
nwOptions = append(nwOptions, libnetwork.NetworkOptionConfigFrom(create.ConfigFrom.Network))
}
Update moby to use scalable-lb libnetwork APIs This patch is required for the updated version of libnetwork and entails two minor changes. First, it uses the new libnetwork.NetworkDeleteOptionRemoveLB option to the network.Delete() method to automatically remove the load balancing endpoint for ingress networks. This allows removal of the deleteLoadBalancerSandbox() function whose functionality is now within libnetwork. The second change is to allocate a load balancer endpoint IP address for all overlay networks rather than just "ingress" and windows overlay networks. Swarmkit is already performing this allocation, but moby was not making use of these IP addresses for Linux overlay networks (except ingress). The current version of libnetwork makes use of these IP addresses by creating a load balancing sandbox and endpoint similar to ingress's for all overlay network and putting all load balancing state for a given node in that sandbox only. This reduces the amount of linux kernel state required per node. In the prior scheme, libnetwork would program each container's network namespace with every piece of load balancing state for every other container that shared *any* network with the first container. This meant that the amount of kernel state on a given node scaled with the square of the number of services in the cluster and with the square of the number of containers per service. With the new scheme, kernel state at each node scales linearly with the number of services and the number of containers per service. This also reduces the number of system calls required to add or remove tasks and containers. Previously the number of system calls required grew linearly with the number of other tasks that shared a network with the container. Now the number of system calls grows linearly only with the number of networks that the task/container is attached to. This results in a significant performance improvement when adding and removing services to a cluster that already heavily loaded. The primary disadvantage to this scheme is that it requires the allocation of an additional IP address per node per subnet for every node in the cluster that has a task on the given subnet. However, as mentioned, swarmkit is already allocating these IP addresses for every node and they are going unused. Future swarmkit modifications should be examined to only allocate said IP addresses when nodes actually require them. Signed-off-by: Chris Telfer <ctelfer@docker.com>
2018-04-19 15:39:51 +00:00
if agent && driver == "overlay" {
nodeIP, exists := daemon.GetAttachmentStore().GetIPForNetwork(id)
if !exists {
return nil, fmt.Errorf("failed to find a load balancer IP to use for network: %v", id)
}
nwOptions = append(nwOptions, libnetwork.NetworkOptionLBEndpoint(nodeIP))
}
n, err := c.NewNetwork(driver, create.Name, id, nwOptions...)
if err != nil {
return nil, err
}
daemon.pluginRefCount(driver, driverapi.NetworkPluginEndpointType, plugingetter.Acquire)
if create.IPAM != nil {
daemon.pluginRefCount(create.IPAM.Driver, ipamapi.PluginEndpointType, plugingetter.Acquire)
}
daemon.LogNetworkEvent(n, events.ActionCreate)
return &types.NetworkCreateResponse{
libnet: Make sure network names are unique Fixes #18864, #20648, #33561, #40901. [This GH comment][1] makes clear network name uniqueness has never been enforced due to the eventually consistent nature of Classic Swarm datastores: > there is no guaranteed way to check for duplicates across a cluster of > docker hosts. And this is further confirmed by other comments made by @mrjana in that same issue, eg. [this one][2]: > we want to adopt a schema which can pave the way in the future for a > completely decentralized cluster of docker hosts (if scalability is > needed). This decentralized model is what Classic Swarm was trying to be. It's been superseded since then by Docker Swarm, which has a centralized control plane. To circumvent this drawback, the `NetworkCreate` endpoint accepts a `CheckDuplicate` flag. However it's not perfectly reliable as it won't catch concurrent requests. Due to this design decision, API clients like Compose have to implement workarounds to make sure names are really unique (eg. docker/compose#9585). And the daemon itself has seen a string of issues due to that decision, including some that aren't fixed to this day (for instance moby/moby#40901): > The problem is, that if you specify a network for a container using > the ID, it will add that network to the container but it will then > change it to reference the network by using the name. To summarize, this "feature" is broken, has no practical use and is a source of pain for Docker users and API consumers. So let's just remove it for _all_ API versions. [1]: https://github.com/moby/moby/issues/18864#issuecomment-167201414 [2]: https://github.com/moby/moby/issues/18864#issuecomment-167202589 Signed-off-by: Albin Kerouanton <albinker@gmail.com>
2023-08-16 18:11:10 +00:00
ID: n.ID(),
}, nil
}
func (daemon *Daemon) pluginRefCount(driver, capability string, mode int) {
var builtinDrivers []string
if capability == driverapi.NetworkPluginEndpointType {
builtinDrivers = daemon.netController.BuiltinDrivers()
} else if capability == ipamapi.PluginEndpointType {
builtinDrivers = daemon.netController.BuiltinIPAMDrivers()
}
for _, d := range builtinDrivers {
if d == driver {
return
}
}
if daemon.PluginStore != nil {
_, err := daemon.PluginStore.Get(driver, capability, mode)
if err != nil {
log.G(context.TODO()).WithError(err).WithFields(log.Fields{"mode": mode, "driver": driver}).Error("Error handling plugin refcount operation")
}
}
}
func getIpamConfig(data []network.IPAMConfig) ([]*libnetwork.IpamConf, []*libnetwork.IpamConf, error) {
ipamV4Cfg := []*libnetwork.IpamConf{}
ipamV6Cfg := []*libnetwork.IpamConf{}
for _, d := range data {
iCfg := libnetwork.IpamConf{}
iCfg.PreferredPool = d.Subnet
iCfg.SubPool = d.IPRange
iCfg.Gateway = d.Gateway
iCfg.AuxAddresses = d.AuxAddress
ip, _, err := net.ParseCIDR(d.Subnet)
if err != nil {
return nil, nil, fmt.Errorf("Invalid subnet %s : %v", d.Subnet, err)
}
if ip.To4() != nil {
ipamV4Cfg = append(ipamV4Cfg, &iCfg)
} else {
ipamV6Cfg = append(ipamV6Cfg, &iCfg)
}
}
return ipamV4Cfg, ipamV6Cfg, nil
}
// UpdateContainerServiceConfig updates a service configuration.
func (daemon *Daemon) UpdateContainerServiceConfig(containerName string, serviceConfig *clustertypes.ServiceConfig) error {
ctr, err := daemon.GetContainer(containerName)
if err != nil {
return err
}
ctr.NetworkSettings.Service = serviceConfig
return nil
}
// ConnectContainerToNetwork connects the given container to the given
// network. If either cannot be found, an err is returned. If the
// network cannot be set up, an err is returned.
func (daemon *Daemon) ConnectContainerToNetwork(containerName, networkName string, endpointConfig *network.EndpointSettings) error {
ctr, err := daemon.GetContainer(containerName)
if err != nil {
return err
}
return daemon.ConnectToNetwork(ctr, networkName, endpointConfig)
}
// DisconnectContainerFromNetwork disconnects the given container from
// the given network. If either cannot be found, an err is returned.
func (daemon *Daemon) DisconnectContainerFromNetwork(containerName string, networkName string, force bool) error {
ctr, err := daemon.GetContainer(containerName)
if err != nil {
if force {
return daemon.ForceEndpointDelete(containerName, networkName)
}
return err
}
return daemon.DisconnectFromNetwork(ctr, networkName, force)
}
// GetNetworkDriverList returns the list of plugins drivers
// registered for network.
func (daemon *Daemon) GetNetworkDriverList(ctx context.Context) []string {
if !daemon.NetworkControllerEnabled() {
return nil
}
pluginList := daemon.netController.BuiltinDrivers()
managedPlugins := daemon.PluginStore.GetAllManagedPluginsByCap(driverapi.NetworkPluginEndpointType)
for _, plugin := range managedPlugins {
pluginList = append(pluginList, plugin.Name())
}
pluginMap := make(map[string]bool)
for _, plugin := range pluginList {
pluginMap[plugin] = true
}
networks := daemon.netController.Networks(ctx)
for _, nw := range networks {
if !pluginMap[nw.Type()] {
pluginList = append(pluginList, nw.Type())
pluginMap[nw.Type()] = true
}
}
sort.Strings(pluginList)
return pluginList
}
// DeleteManagedNetwork deletes an agent network.
// The requirement of networkID is enforced.
func (daemon *Daemon) DeleteManagedNetwork(networkID string) error {
n, err := daemon.GetNetworkByID(networkID)
if err != nil {
return err
}
return daemon.deleteNetwork(n, true)
}
// DeleteNetwork destroys a network unless it's one of docker's predefined networks.
func (daemon *Daemon) DeleteNetwork(networkID string) error {
n, err := daemon.GetNetworkByID(networkID)
if err != nil {
return fmt.Errorf("could not find network by ID: %w", err)
}
return daemon.deleteNetwork(n, false)
}
func (daemon *Daemon) deleteNetwork(nw *libnetwork.Network, dynamic bool) error {
if runconfig.IsPreDefinedNetwork(nw.Name()) && !dynamic {
Remove static errors from errors package. Moving all strings to the errors package wasn't a good idea after all. Our custom implementation of Go errors predates everything that's nice and good about working with errors in Go. Take as an example what we have to do to get an error message: ```go func GetErrorMessage(err error) string { switch err.(type) { case errcode.Error: e, _ := err.(errcode.Error) return e.Message case errcode.ErrorCode: ec, _ := err.(errcode.ErrorCode) return ec.Message() default: return err.Error() } } ``` This goes against every good practice for Go development. The language already provides a simple, intuitive and standard way to get error messages, that is calling the `Error()` method from an error. Reinventing the error interface is a mistake. Our custom implementation also makes very hard to reason about errors, another nice thing about Go. I found several (>10) error declarations that we don't use anywhere. This is a clear sign about how little we know about the errors we return. I also found several error usages where the number of arguments was different than the parameters declared in the error, another clear example of how difficult is to reason about errors. Moreover, our custom implementation didn't really make easier for people to return custom HTTP status code depending on the errors. Again, it's hard to reason about when to set custom codes and how. Take an example what we have to do to extract the message and status code from an error before returning a response from the API: ```go switch err.(type) { case errcode.ErrorCode: daError, _ := err.(errcode.ErrorCode) statusCode = daError.Descriptor().HTTPStatusCode errMsg = daError.Message() case errcode.Error: // For reference, if you're looking for a particular error // then you can do something like : // import ( derr "github.com/docker/docker/errors" ) // if daError.ErrorCode() == derr.ErrorCodeNoSuchContainer { ... } daError, _ := err.(errcode.Error) statusCode = daError.ErrorCode().Descriptor().HTTPStatusCode errMsg = daError.Message default: // This part of will be removed once we've // converted everything over to use the errcode package // FIXME: this is brittle and should not be necessary. // If we need to differentiate between different possible error types, // we should create appropriate error types with clearly defined meaning errStr := strings.ToLower(err.Error()) for keyword, status := range map[string]int{ "not found": http.StatusNotFound, "no such": http.StatusNotFound, "bad parameter": http.StatusBadRequest, "conflict": http.StatusConflict, "impossible": http.StatusNotAcceptable, "wrong login/password": http.StatusUnauthorized, "hasn't been activated": http.StatusForbidden, } { if strings.Contains(errStr, keyword) { statusCode = status break } } } ``` You can notice two things in that code: 1. We have to explain how errors work, because our implementation goes against how easy to use Go errors are. 2. At no moment we arrived to remove that `switch` statement that was the original reason to use our custom implementation. This change removes all our status errors from the errors package and puts them back in their specific contexts. IT puts the messages back with their contexts. That way, we know right away when errors used and how to generate their messages. It uses custom interfaces to reason about errors. Errors that need to response with a custom status code MUST implementent this simple interface: ```go type errorWithStatus interface { HTTPErrorStatusCode() int } ``` This interface is very straightforward to implement. It also preserves Go errors real behavior, getting the message is as simple as using the `Error()` method. I included helper functions to generate errors that use custom status code in `errors/errors.go`. By doing this, we remove the hard dependency we have eeverywhere to our custom errors package. Yes, you can use it as a helper to generate error, but it's still very easy to generate errors without it. Please, read this fantastic blog post about errors in Go: http://dave.cheney.net/2014/12/24/inspecting-errors Signed-off-by: David Calavera <david.calavera@gmail.com>
2016-02-25 15:53:35 +00:00
err := fmt.Errorf("%s is a pre-defined network and cannot be removed", nw.Name())
return errdefs.Forbidden(err)
}
if dynamic && !nw.Dynamic() {
if runconfig.IsPreDefinedNetwork(nw.Name()) {
// Predefined networks now support swarm services. Make this
// a no-op when cluster requests to remove the predefined network.
return nil
}
err := fmt.Errorf("%s is not a dynamic network", nw.Name())
return errdefs.Forbidden(err)
}
if err := nw.Delete(); err != nil {
return fmt.Errorf("error while removing network: %w", err)
}
// If this is not a configuration only network, we need to
// update the corresponding remote drivers' reference counts
if !nw.ConfigOnly() {
daemon.pluginRefCount(nw.Type(), driverapi.NetworkPluginEndpointType, plugingetter.Release)
ipamType, _, _, _ := nw.IpamConfig()
daemon.pluginRefCount(ipamType, ipamapi.PluginEndpointType, plugingetter.Release)
daemon.LogNetworkEvent(nw, events.ActionDestroy)
}
return nil
}
// GetNetworks returns a list of all networks
func (daemon *Daemon) GetNetworks(filter filters.Args, config backend.NetworkListConfig) (networks []types.NetworkResource, err error) {
var idx map[string]*libnetwork.Network
if config.Detailed {
idx = make(map[string]*libnetwork.Network)
}
allNetworks := daemon.getAllNetworks()
networks = make([]types.NetworkResource, 0, len(allNetworks))
for _, n := range allNetworks {
nr := buildNetworkResource(n)
networks = append(networks, nr)
if config.Detailed {
idx[nr.ID] = n
}
}
networks, err = internalnetwork.FilterNetworks(networks, filter)
if err != nil {
return nil, err
}
if config.Detailed {
for i, nw := range networks {
networks[i].Containers = buildContainerAttachments(idx[nw.ID])
if config.Verbose {
networks[i].Services = buildServiceAttachments(idx[nw.ID])
}
}
}
return networks, nil
}
// buildNetworkResource builds a [types.NetworkResource] from the given
// [libnetwork.Network], to be returned by the API.
func buildNetworkResource(nw *libnetwork.Network) types.NetworkResource {
if nw == nil {
return types.NetworkResource{}
}
return types.NetworkResource{
Name: nw.Name(),
ID: nw.ID(),
Created: nw.Created(),
Scope: nw.Scope(),
Driver: nw.Type(),
EnableIPv6: nw.IPv6Enabled(),
IPAM: buildIPAMResources(nw),
Internal: nw.Internal(),
Attachable: nw.Attachable(),
Ingress: nw.Ingress(),
ConfigFrom: network.ConfigReference{Network: nw.ConfigFrom()},
ConfigOnly: nw.ConfigOnly(),
Containers: map[string]types.EndpointResource{},
Options: nw.DriverOptions(),
Labels: nw.Labels(),
Peers: buildPeerInfoResources(nw.Peers()),
}
}
// buildContainerAttachments creates a [types.EndpointResource] map of all
// containers attached to the network. It is used when listing networks in
// detailed mode.
func buildContainerAttachments(nw *libnetwork.Network) map[string]types.EndpointResource {
containers := make(map[string]types.EndpointResource)
for _, e := range nw.Endpoints() {
ei := e.Info()
if ei == nil {
continue
}
if sb := ei.Sandbox(); sb != nil {
containers[sb.ContainerID()] = buildEndpointResource(e, ei)
} else {
containers["ep-"+e.ID()] = buildEndpointResource(e, ei)
}
}
return containers
}
// buildServiceAttachments creates a [network.ServiceInfo] map of all services
// attached to the network. It is used when listing networks in "verbose" mode.
func buildServiceAttachments(nw *libnetwork.Network) map[string]network.ServiceInfo {
services := make(map[string]network.ServiceInfo)
for name, service := range nw.Services() {
tasks := make([]network.Task, 0, len(service.Tasks))
for _, t := range service.Tasks {
tasks = append(tasks, network.Task{
Name: t.Name,
EndpointID: t.EndpointID,
EndpointIP: t.EndpointIP,
Info: t.Info,
})
}
services[name] = network.ServiceInfo{
VIP: service.VIP,
Ports: service.Ports,
Tasks: tasks,
LocalLBIndex: service.LocalLBIndex,
}
}
return services
}
// buildPeerInfoResources converts a list of [networkdb.PeerInfo] to a
// [network.PeerInfo] for inclusion in API responses. It returns nil if
// the list of peers is empty.
func buildPeerInfoResources(peers []networkdb.PeerInfo) []network.PeerInfo {
if len(peers) == 0 {
return nil
}
peerInfo := make([]network.PeerInfo, 0, len(peers))
for _, peer := range peers {
peerInfo = append(peerInfo, network.PeerInfo(peer))
}
return peerInfo
}
// buildIPAMResources constructs a [network.IPAM] from the network's
// IPAM information for inclusion in API responses.
func buildIPAMResources(nw *libnetwork.Network) network.IPAM {
var ipamConfig []network.IPAMConfig
ipamDriver, ipamOptions, ipv4Conf, ipv6Conf := nw.IpamConfig()
hasIPv4Config := false
for _, cfg := range ipv4Conf {
if cfg.PreferredPool == "" {
continue
}
hasIPv4Config = true
ipamConfig = append(ipamConfig, network.IPAMConfig{
Subnet: cfg.PreferredPool,
IPRange: cfg.SubPool,
Gateway: cfg.Gateway,
AuxAddress: cfg.AuxAddresses,
})
}
hasIPv6Config := false
for _, cfg := range ipv6Conf {
if cfg.PreferredPool == "" {
continue
}
hasIPv6Config = true
ipamConfig = append(ipamConfig, network.IPAMConfig{
Subnet: cfg.PreferredPool,
IPRange: cfg.SubPool,
Gateway: cfg.Gateway,
AuxAddress: cfg.AuxAddresses,
})
}
if !hasIPv4Config || !hasIPv6Config {
ipv4Info, ipv6Info := nw.IpamInfo()
if !hasIPv4Config {
for _, info := range ipv4Info {
var gw string
if info.IPAMData.Gateway != nil {
gw = info.IPAMData.Gateway.IP.String()
}
ipamConfig = append(ipamConfig, network.IPAMConfig{
Subnet: info.IPAMData.Pool.String(),
Gateway: gw,
})
}
}
if !hasIPv6Config {
for _, info := range ipv6Info {
if info.IPAMData.Pool == nil {
continue
}
ipamConfig = append(ipamConfig, network.IPAMConfig{
Subnet: info.IPAMData.Pool.String(),
Gateway: info.IPAMData.Gateway.String(),
})
}
}
}
return network.IPAM{
Driver: ipamDriver,
Options: ipamOptions,
Config: ipamConfig,
}
}
// buildEndpointResource combines information from the endpoint and additional
// endpoint-info into a [types.EndpointResource].
func buildEndpointResource(ep *libnetwork.Endpoint, info libnetwork.EndpointInfo) types.EndpointResource {
er := types.EndpointResource{
EndpointID: ep.ID(),
Name: ep.Name(),
}
if iface := info.Iface(); iface != nil {
if mac := iface.MacAddress(); mac != nil {
er.MacAddress = mac.String()
}
if ip := iface.Address(); ip != nil && len(ip.IP) > 0 {
er.IPv4Address = ip.String()
}
if ip := iface.AddressIPv6(); ip != nil && len(ip.IP) > 0 {
er.IPv6Address = ip.String()
}
}
return er
}
// clearAttachableNetworks removes the attachable networks
// after disconnecting any connected container
func (daemon *Daemon) clearAttachableNetworks() {
for _, n := range daemon.getAllNetworks() {
if !n.Attachable() {
continue
}
for _, ep := range n.Endpoints() {
epInfo := ep.Info()
if epInfo == nil {
continue
}
sb := epInfo.Sandbox()
if sb == nil {
continue
}
containerID := sb.ContainerID()
if err := daemon.DisconnectContainerFromNetwork(containerID, n.ID(), true); err != nil {
log.G(context.TODO()).Warnf("Failed to disconnect container %s from swarm network %s on cluster leave: %v",
containerID, n.Name(), err)
}
}
if err := daemon.DeleteManagedNetwork(n.ID()); err != nil {
log.G(context.TODO()).Warnf("Failed to remove swarm network %s on cluster leave: %v", n.Name(), err)
}
}
}
// buildCreateEndpointOptions builds endpoint options from a given network.
func buildCreateEndpointOptions(c *container.Container, n *libnetwork.Network, epConfig *internalnetwork.EndpointSettings, sb *libnetwork.Sandbox, daemonDNS []string) ([]libnetwork.EndpointOption, error) {
var createOptions []libnetwork.EndpointOption
var genericOptions = make(options.Generic)
nwName := n.Name()
if epConfig != nil {
if ipam := epConfig.IPAMConfig; ipam != nil {
var ipList []net.IP
for _, ips := range ipam.LinkLocalIPs {
linkIP := net.ParseIP(ips)
if linkIP == nil && ips != "" {
return nil, fmt.Errorf("invalid link-local IP address: %s", ipam.LinkLocalIPs)
}
ipList = append(ipList, linkIP)
}
ip := net.ParseIP(ipam.IPv4Address)
if ip == nil && ipam.IPv4Address != "" {
return nil, fmt.Errorf("invalid IPv4 address: %s", ipam.IPv4Address)
}
ip6 := net.ParseIP(ipam.IPv6Address)
if ip6 == nil && ipam.IPv6Address != "" {
return nil, fmt.Errorf("invalid IPv6 address: %s", ipam.IPv6Address)
}
createOptions = append(createOptions, libnetwork.CreateOptionIpam(ip, ip6, ipList, nil))
}
createOptions = append(createOptions, libnetwork.CreateOptionDNSNames(epConfig.DNSNames))
for k, v := range epConfig.DriverOpts {
createOptions = append(createOptions, libnetwork.EndpointOptionGeneric(options.Generic{k: v}))
}
if epConfig.DesiredMacAddress != "" {
mac, err := net.ParseMAC(epConfig.DesiredMacAddress)
if err != nil {
return nil, err
}
genericOptions[netlabel.MacAddress] = mac
}
}
if svcCfg := c.NetworkSettings.Service; svcCfg != nil {
nwID := n.ID()
var vip net.IP
if virtualAddress := svcCfg.VirtualAddresses[nwID]; virtualAddress != nil {
vip = net.ParseIP(virtualAddress.IPv4)
}
var portConfigs []*libnetwork.PortConfig
for _, portConfig := range svcCfg.ExposedPorts {
portConfigs = append(portConfigs, &libnetwork.PortConfig{
Name: portConfig.Name,
Protocol: libnetwork.PortConfig_Protocol(portConfig.Protocol),
TargetPort: portConfig.TargetPort,
PublishedPort: portConfig.PublishedPort,
})
}
createOptions = append(createOptions, libnetwork.CreateOptionService(svcCfg.Name, svcCfg.ID, vip, portConfigs, svcCfg.Aliases[nwID]))
}
if !containertypes.NetworkMode(nwName).IsUserDefined() {
createOptions = append(createOptions, libnetwork.CreateOptionDisableResolution())
}
daemon: build ports-related ep options in a dedicated func The `buildCreateEndpointOptions` does a lot of things to build the list of `libnetwork.EndpointOption` from the `EndpointSettings` spec. To skip ports-related options, an early return was put in the middle of that function body. Early returns are generally great, but put in the middle of a 150-loc long function that does a lot, they're just a potential footgun. And I'm the one who pulled the trigger in 052562f. Since this commit, generic options won't be applied to endpoints if there's already one with exposed/published ports. As a consequence, only the first endpoint can have a user-defined MAC address right now. Instead of moving up the code line that adds generic options, a better change IMO is to move ports-related options, and the early-return gating those options, to a dedicated func to make `buildCreateEndpointOptions` slightly easier to read and reason about. There was actually one oddity in the original `buildCreateEndpointOptions`: the early-return also gates the addition of `CreateOptionDNS`. These options are Windows-specific; a comment is added to explain that. But the oddity is really: why are we checking if an endpoint with exposed / published ports joined this sandbox to decide whether we want to configure DNS server on the endpoint's adapter? Well, this early-return was most probably overlooked by the original author and by reviewers at the time these options were added (in commit d1e0a78) Let's fix that in a follow-up commit. Signed-off-by: Albin Kerouanton <albinker@gmail.com>
2023-11-21 15:51:51 +00:00
opts, err := buildPortsRelatedCreateEndpointOptions(c, n, sb)
if err != nil {
return nil, err
}
createOptions = append(createOptions, opts...)
// On Windows, DNS config is a per-adapter config option whereas on Linux, it's a sandbox-wide parameter; hence why
// we're dealing with DNS config both here and in buildSandboxOptions. Following DNS options are only honored by
// Windows netdrivers, whereas DNS options in buildSandboxOptions are only honored by Linux netdrivers.
if !n.Internal() {
daemon: build ports-related ep options in a dedicated func The `buildCreateEndpointOptions` does a lot of things to build the list of `libnetwork.EndpointOption` from the `EndpointSettings` spec. To skip ports-related options, an early return was put in the middle of that function body. Early returns are generally great, but put in the middle of a 150-loc long function that does a lot, they're just a potential footgun. And I'm the one who pulled the trigger in 052562f. Since this commit, generic options won't be applied to endpoints if there's already one with exposed/published ports. As a consequence, only the first endpoint can have a user-defined MAC address right now. Instead of moving up the code line that adds generic options, a better change IMO is to move ports-related options, and the early-return gating those options, to a dedicated func to make `buildCreateEndpointOptions` slightly easier to read and reason about. There was actually one oddity in the original `buildCreateEndpointOptions`: the early-return also gates the addition of `CreateOptionDNS`. These options are Windows-specific; a comment is added to explain that. But the oddity is really: why are we checking if an endpoint with exposed / published ports joined this sandbox to decide whether we want to configure DNS server on the endpoint's adapter? Well, this early-return was most probably overlooked by the original author and by reviewers at the time these options were added (in commit d1e0a78) Let's fix that in a follow-up commit. Signed-off-by: Albin Kerouanton <albinker@gmail.com>
2023-11-21 15:51:51 +00:00
if len(c.HostConfig.DNS) > 0 {
createOptions = append(createOptions, libnetwork.CreateOptionDNS(c.HostConfig.DNS))
} else if len(daemonDNS) > 0 {
createOptions = append(createOptions, libnetwork.CreateOptionDNS(daemonDNS))
}
}
createOptions = append(createOptions, libnetwork.EndpointOptionGeneric(genericOptions))
return createOptions, nil
}
// buildPortsRelatedCreateEndpointOptions returns the appropriate endpoint options to apply config related to port
// mapping and exposed ports.
func buildPortsRelatedCreateEndpointOptions(c *container.Container, n *libnetwork.Network, sb *libnetwork.Sandbox) ([]libnetwork.EndpointOption, error) {
// Port-mapping rules belong to the container & applicable only to non-internal networks.
//
// TODO(thaJeztah): Look if we can provide a more minimal function for getPortMapInfo, as it does a lot, and we only need the "length".
if n.Internal() || len(getPortMapInfo(sb)) > 0 {
daemon: build ports-related ep options in a dedicated func The `buildCreateEndpointOptions` does a lot of things to build the list of `libnetwork.EndpointOption` from the `EndpointSettings` spec. To skip ports-related options, an early return was put in the middle of that function body. Early returns are generally great, but put in the middle of a 150-loc long function that does a lot, they're just a potential footgun. And I'm the one who pulled the trigger in 052562f. Since this commit, generic options won't be applied to endpoints if there's already one with exposed/published ports. As a consequence, only the first endpoint can have a user-defined MAC address right now. Instead of moving up the code line that adds generic options, a better change IMO is to move ports-related options, and the early-return gating those options, to a dedicated func to make `buildCreateEndpointOptions` slightly easier to read and reason about. There was actually one oddity in the original `buildCreateEndpointOptions`: the early-return also gates the addition of `CreateOptionDNS`. These options are Windows-specific; a comment is added to explain that. But the oddity is really: why are we checking if an endpoint with exposed / published ports joined this sandbox to decide whether we want to configure DNS server on the endpoint's adapter? Well, this early-return was most probably overlooked by the original author and by reviewers at the time these options were added (in commit d1e0a78) Let's fix that in a follow-up commit. Signed-off-by: Albin Kerouanton <albinker@gmail.com>
2023-11-21 15:51:51 +00:00
return nil, nil
}
bindings := make(nat.PortMap)
if c.HostConfig.PortBindings != nil {
for p, b := range c.HostConfig.PortBindings {
bindings[p] = []nat.PortBinding{}
for _, bb := range b {
bindings[p] = append(bindings[p], nat.PortBinding{
HostIP: bb.HostIP,
HostPort: bb.HostPort,
})
}
}
}
// TODO(thaJeztah): Move this code to a method on nat.PortSet.
ports := make([]nat.Port, 0, len(c.Config.ExposedPorts))
for p := range c.Config.ExposedPorts {
ports = append(ports, p)
}
nat.SortPortMap(ports, bindings)
var (
exposedPorts []networktypes.TransportPort
publishedPorts []networktypes.PortBinding
)
for _, port := range ports {
portProto := networktypes.ParseProtocol(port.Proto())
portNum := uint16(port.Int())
exposedPorts = append(exposedPorts, networktypes.TransportPort{
Proto: portProto,
Port: portNum,
})
for _, binding := range bindings[port] {
newP, err := nat.NewPort(nat.SplitProtoPort(binding.HostPort))
var portStart, portEnd int
if err == nil {
portStart, portEnd, err = newP.Range()
}
if err != nil {
return nil, fmt.Errorf("error parsing HostPort value (%s): %w", binding.HostPort, err)
}
publishedPorts = append(publishedPorts, networktypes.PortBinding{
Proto: portProto,
Port: portNum,
HostIP: net.ParseIP(binding.HostIP),
HostPort: uint16(portStart),
HostPortEnd: uint16(portEnd),
})
}
if c.HostConfig.PublishAllPorts && len(bindings[port]) == 0 {
publishedPorts = append(publishedPorts, networktypes.PortBinding{
Proto: portProto,
Port: portNum,
})
}
}
daemon: build ports-related ep options in a dedicated func The `buildCreateEndpointOptions` does a lot of things to build the list of `libnetwork.EndpointOption` from the `EndpointSettings` spec. To skip ports-related options, an early return was put in the middle of that function body. Early returns are generally great, but put in the middle of a 150-loc long function that does a lot, they're just a potential footgun. And I'm the one who pulled the trigger in 052562f. Since this commit, generic options won't be applied to endpoints if there's already one with exposed/published ports. As a consequence, only the first endpoint can have a user-defined MAC address right now. Instead of moving up the code line that adds generic options, a better change IMO is to move ports-related options, and the early-return gating those options, to a dedicated func to make `buildCreateEndpointOptions` slightly easier to read and reason about. There was actually one oddity in the original `buildCreateEndpointOptions`: the early-return also gates the addition of `CreateOptionDNS`. These options are Windows-specific; a comment is added to explain that. But the oddity is really: why are we checking if an endpoint with exposed / published ports joined this sandbox to decide whether we want to configure DNS server on the endpoint's adapter? Well, this early-return was most probably overlooked by the original author and by reviewers at the time these options were added (in commit d1e0a78) Let's fix that in a follow-up commit. Signed-off-by: Albin Kerouanton <albinker@gmail.com>
2023-11-21 15:51:51 +00:00
return []libnetwork.EndpointOption{
libnetwork.CreateOptionPortMapping(publishedPorts),
libnetwork.CreateOptionExposedPorts(exposedPorts),
daemon: build ports-related ep options in a dedicated func The `buildCreateEndpointOptions` does a lot of things to build the list of `libnetwork.EndpointOption` from the `EndpointSettings` spec. To skip ports-related options, an early return was put in the middle of that function body. Early returns are generally great, but put in the middle of a 150-loc long function that does a lot, they're just a potential footgun. And I'm the one who pulled the trigger in 052562f. Since this commit, generic options won't be applied to endpoints if there's already one with exposed/published ports. As a consequence, only the first endpoint can have a user-defined MAC address right now. Instead of moving up the code line that adds generic options, a better change IMO is to move ports-related options, and the early-return gating those options, to a dedicated func to make `buildCreateEndpointOptions` slightly easier to read and reason about. There was actually one oddity in the original `buildCreateEndpointOptions`: the early-return also gates the addition of `CreateOptionDNS`. These options are Windows-specific; a comment is added to explain that. But the oddity is really: why are we checking if an endpoint with exposed / published ports joined this sandbox to decide whether we want to configure DNS server on the endpoint's adapter? Well, this early-return was most probably overlooked by the original author and by reviewers at the time these options were added (in commit d1e0a78) Let's fix that in a follow-up commit. Signed-off-by: Albin Kerouanton <albinker@gmail.com>
2023-11-21 15:51:51 +00:00
}, nil
}
// getPortMapInfo retrieves the current port-mapping programmed for the given sandbox
func getPortMapInfo(sb *libnetwork.Sandbox) nat.PortMap {
pm := nat.PortMap{}
if sb == nil {
return pm
}
for _, ep := range sb.Endpoints() {
pm, _ = getEndpointPortMapInfo(ep)
if len(pm) > 0 {
break
}
}
return pm
}
func getEndpointPortMapInfo(ep *libnetwork.Endpoint) (nat.PortMap, error) {
pm := nat.PortMap{}
driverInfo, err := ep.DriverInfo()
if err != nil {
return pm, err
}
if driverInfo == nil {
// It is not an error for epInfo to be nil
return pm, nil
}
if expData, ok := driverInfo[netlabel.ExposedPorts]; ok {
if exposedPorts, ok := expData.([]networktypes.TransportPort); ok {
for _, tp := range exposedPorts {
natPort, err := nat.NewPort(tp.Proto.String(), strconv.Itoa(int(tp.Port)))
if err != nil {
return pm, fmt.Errorf("Error parsing Port value(%v):%v", tp.Port, err)
}
pm[natPort] = nil
}
}
}
mapData, ok := driverInfo[netlabel.PortMap]
if !ok {
return pm, nil
}
if portMapping, ok := mapData.([]networktypes.PortBinding); ok {
for _, pp := range portMapping {
natPort, err := nat.NewPort(pp.Proto.String(), strconv.Itoa(int(pp.Port)))
if err != nil {
return pm, err
}
natBndg := nat.PortBinding{HostIP: pp.HostIP.String(), HostPort: strconv.Itoa(int(pp.HostPort))}
pm[natPort] = append(pm[natPort], natBndg)
}
}
return pm, nil
}
// buildEndpointInfo sets endpoint-related fields on container.NetworkSettings based on the provided network and endpoint.
func buildEndpointInfo(networkSettings *internalnetwork.Settings, n *libnetwork.Network, ep *libnetwork.Endpoint) error {
if ep == nil {
return errors.New("endpoint cannot be nil")
}
if networkSettings == nil {
return errors.New("network cannot be nil")
}
epInfo := ep.Info()
if epInfo == nil {
// It is not an error to get an empty endpoint info
return nil
}
nwName := n.Name()
if _, ok := networkSettings.Networks[nwName]; !ok {
networkSettings.Networks[nwName] = &internalnetwork.EndpointSettings{
EndpointSettings: &network.EndpointSettings{},
}
}
networkSettings.Networks[nwName].NetworkID = n.ID()
networkSettings.Networks[nwName].EndpointID = ep.ID()
iface := epInfo.Iface()
if iface == nil {
return nil
}
if iface.MacAddress() != nil {
networkSettings.Networks[nwName].MacAddress = iface.MacAddress().String()
}
if iface.Address() != nil {
ones, _ := iface.Address().Mask.Size()
networkSettings.Networks[nwName].IPAddress = iface.Address().IP.String()
networkSettings.Networks[nwName].IPPrefixLen = ones
}
if iface.AddressIPv6() != nil && iface.AddressIPv6().IP.To16() != nil {
onesv6, _ := iface.AddressIPv6().Mask.Size()
networkSettings.Networks[nwName].GlobalIPv6Address = iface.AddressIPv6().IP.String()
networkSettings.Networks[nwName].GlobalIPv6PrefixLen = onesv6
}
return nil
}
// buildJoinOptions builds endpoint Join options from a given network.
func buildJoinOptions(networkSettings *internalnetwork.Settings, n interface{ Name() string }) ([]libnetwork.EndpointOption, error) {
var joinOptions []libnetwork.EndpointOption
if epConfig, ok := networkSettings.Networks[n.Name()]; ok {
for _, str := range epConfig.Links {
name, alias, err := opts.ParseLink(str)
if err != nil {
return nil, err
}
joinOptions = append(joinOptions, libnetwork.CreateOptionAlias(name, alias))
}
for k, v := range epConfig.DriverOpts {
joinOptions = append(joinOptions, libnetwork.EndpointOptionGeneric(options.Generic{k: v}))
}
}
return joinOptions, nil
}