Pārlūkot izejas kodu

Merge pull request #46850 from robmry/46829-allow_ipv6_subnet_change

Allow overlapping change in bridge's IPv6 network.
Sebastiaan van Stijn 1 gadu atpakaļ
vecāks
revīzija
388216fc45

+ 5 - 0
daemon/config/config_linux.go

@@ -9,6 +9,7 @@ import (
 	"github.com/containerd/cgroups/v3"
 	"github.com/docker/docker/api/types/container"
 	"github.com/docker/docker/api/types/system"
+	"github.com/docker/docker/libnetwork/drivers/bridge"
 	"github.com/docker/docker/opts"
 	"github.com/docker/docker/pkg/homedir"
 	"github.com/docker/docker/pkg/rootless"
@@ -178,6 +179,10 @@ func (conf *Config) ValidatePlatformConfig() error {
 		return err
 	}
 
+	if err := bridge.ValidateFixedCIDRV6(conf.FixedCIDRv6); err != nil {
+		return errors.Wrap(err, "invalid fixed-cidr-v6")
+	}
+
 	return verifyDefaultCgroupNsMode(conf.CgroupNamespaceMode)
 }
 

+ 185 - 0
integration/networking/bridge_test.go

@@ -86,6 +86,32 @@ func TestBridgeICC(t *testing.T) {
 			},
 			linkLocal: true,
 		},
+		{
+			// As for 'LL non-internal', but ping the container by name instead of by address
+			// - the busybox test containers only have one interface with a link local
+			// address, so the zone index is not required:
+			//   RFC-4007, section 6: "[...] for nodes with only a single non-loopback
+			//   interface (e.g., a single Ethernet interface), the common case, link-local
+			//   addresses need not be qualified with a zone index."
+			// So, for this common case, LL addresses should be included in DNS config.
+			name: "IPv6 link-local address on non-internal network ping by name",
+			bridgeOpts: []func(*types.NetworkCreate){
+				network.WithIPv6(),
+				network.WithIPAM("fe80::/64", "fe80::1"),
+			},
+		},
+		{
+			name: "IPv6 nonstandard link-local subnet on non-internal network ping by name",
+			// No interfaces apart from the one on the bridge network with this non-default
+			// subnet will be on this link local subnet (it's not currently possible to
+			// configure two networks with the same LL subnet, although perhaps it should
+			// be). So, again, no zone index is required and the LL address should be
+			// included in DNS config.
+			bridgeOpts: []func(*types.NetworkCreate){
+				network.WithIPv6(),
+				network.WithIPAM("fe80:1234::/64", "fe80:1234::1"),
+			},
+		},
 		{
 			name: "IPv6 non-internal network with SLAAC LL address",
 			bridgeOpts: []func(*types.NetworkCreate){
@@ -292,3 +318,162 @@ func TestBridgeINC(t *testing.T) {
 		})
 	}
 }
+
+func TestDefaultBridgeIPv6(t *testing.T) {
+	skip.If(t, testEnv.DaemonInfo.OSType == "windows")
+
+	ctx := setupTest(t)
+
+	testcases := []struct {
+		name          string
+		fixed_cidr_v6 string
+	}{
+		{
+			name:          "IPv6 ULA",
+			fixed_cidr_v6: "fd00:1234::/64",
+		},
+		{
+			name:          "IPv6 LLA only",
+			fixed_cidr_v6: "fe80::/64",
+		},
+		{
+			name:          "IPv6 nonstandard LLA only",
+			fixed_cidr_v6: "fe80:1234::/64",
+		},
+	}
+
+	for _, tc := range testcases {
+		t.Run(tc.name, func(t *testing.T) {
+			ctx := testutil.StartSpan(ctx, t)
+
+			d := daemon.New(t)
+			d.StartWithBusybox(ctx, t,
+				"--experimental",
+				"--ip6tables",
+				"--ipv6",
+				"--fixed-cidr-v6", tc.fixed_cidr_v6,
+			)
+			defer d.Stop(t)
+
+			c := d.NewClientT(t)
+			defer c.Close()
+
+			cID := container.Run(ctx, t, c,
+				container.WithImage("busybox:latest"),
+				container.WithCmd("top"),
+			)
+			defer c.ContainerRemove(ctx, cID, containertypes.RemoveOptions{
+				Force: true,
+			})
+
+			networkName := "bridge"
+			inspect := container.Inspect(ctx, t, c, cID)
+			pingHost := inspect.NetworkSettings.Networks[networkName].GlobalIPv6Address
+
+			attachCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
+			defer cancel()
+			res := container.RunAttach(attachCtx, t, c,
+				container.WithImage("busybox:latest"),
+				container.WithCmd("ping", "-c1", "-W3", pingHost),
+			)
+			defer c.ContainerRemove(ctx, res.ContainerID, containertypes.RemoveOptions{
+				Force: true,
+			})
+
+			assert.Check(t, is.Equal(res.ExitCode, 0))
+			assert.Check(t, is.Equal(res.Stderr.String(), ""))
+			assert.Check(t, is.Contains(res.Stdout.String(), "1 packets transmitted, 1 packets received"))
+		})
+	}
+}
+
+// Check that it's possible to change 'fixed-cidr-v6' and restart the daemon.
+func TestDefaultBridgeAddresses(t *testing.T) {
+	skip.If(t, testEnv.DaemonInfo.OSType == "windows")
+
+	ctx := setupTest(t)
+	d := daemon.New(t)
+
+	type testStep struct {
+		stepName    string
+		fixedCIDRV6 string
+		expAddrs    []string
+	}
+
+	testcases := []struct {
+		name  string
+		steps []testStep
+	}{
+		{
+			name: "Unique-Local Subnet Changes",
+			steps: []testStep{
+				{
+					stepName:    "Set up initial UL prefix",
+					fixedCIDRV6: "fd1c:f1a0:5d8d:aaaa::/64",
+					expAddrs:    []string{"fd1c:f1a0:5d8d:aaaa::1/64", "fe80::1/64"},
+				},
+				{
+					// Modify that prefix, the default bridge's address must be deleted and re-added.
+					stepName:    "Modify UL prefix - address change",
+					fixedCIDRV6: "fd1c:f1a0:5d8d:bbbb::/64",
+					expAddrs:    []string{"fd1c:f1a0:5d8d:bbbb::1/64", "fe80::1/64"},
+				},
+				{
+					// Modify the prefix length, the default bridge's address should not change.
+					stepName:    "Modify UL prefix - no address change",
+					fixedCIDRV6: "fd1c:f1a0:5d8d:bbbb::/80",
+					// The prefix length displayed by 'ip a' is not updated - it's informational, and
+					// can't be changed without unnecessarily deleting and re-adding the address.
+					expAddrs: []string{"fd1c:f1a0:5d8d:bbbb::1/64", "fe80::1/64"},
+				},
+			},
+		},
+		{
+			name: "Link-Local Subnet Changes",
+			steps: []testStep{
+				{
+					stepName:    "Standard LL subnet prefix",
+					fixedCIDRV6: "fe80::/64",
+					expAddrs:    []string{"fe80::1/64"},
+				},
+				{
+					// Modify that prefix, the default bridge's address must be deleted and re-added.
+					// The bridge must still have an address in the required (standard) LL subnet.
+					stepName:    "Nonstandard LL prefix - address change",
+					fixedCIDRV6: "fe80:1234::/32",
+					expAddrs:    []string{"fe80:1234::1/32", "fe80::1/64"},
+				},
+				{
+					// Modify the prefix length, the addresses should not change.
+					stepName:    "Modify LL prefix - no address change",
+					fixedCIDRV6: "fe80:1234::/64",
+					// The prefix length displayed by 'ip a' is not updated - it's informational, and
+					// can't be changed without unnecessarily deleting and re-adding the address.
+					expAddrs: []string{"fe80:1234::1/", "fe80::1/64"},
+				},
+			},
+		},
+	}
+
+	for _, tc := range testcases {
+		t.Run(tc.name, func(t *testing.T) {
+			for _, step := range tc.steps {
+				// Check that the daemon starts - regression test for:
+				//   https://github.com/moby/moby/issues/46829
+				d.Start(t, "--experimental", "--ipv6", "--ip6tables", "--fixed-cidr-v6="+step.fixedCIDRV6)
+				d.Stop(t)
+
+				// Check that the expected addresses have been applied to the bridge. (Skip in
+				// rootless mode, because the bridge is in a different network namespace.)
+				if !testEnv.IsRootless() {
+					res := testutil.RunCommand(ctx, "ip", "-6", "addr", "show", "docker0")
+					assert.Equal(t, res.ExitCode, 0, step.stepName)
+					stdout := res.Stdout()
+					for _, expAddr := range step.expAddrs {
+						assert.Check(t, is.Contains(stdout, expAddr))
+					}
+				}
+			}
+		})
+	}
+}

+ 51 - 5
libnetwork/drivers/bridge/bridge_linux.go

@@ -1,8 +1,8 @@
 package bridge
 
 import (
+	"bytes"
 	"context"
-	"errors"
 	"fmt"
 	"net"
 	"os"
@@ -11,6 +11,7 @@ import (
 	"sync"
 
 	"github.com/containerd/log"
+	"github.com/docker/docker/errdefs"
 	"github.com/docker/docker/libnetwork/datastore"
 	"github.com/docker/docker/libnetwork/driverapi"
 	"github.com/docker/docker/libnetwork/iptables"
@@ -22,6 +23,7 @@ import (
 	"github.com/docker/docker/libnetwork/portmapper"
 	"github.com/docker/docker/libnetwork/scope"
 	"github.com/docker/docker/libnetwork/types"
+	"github.com/pkg/errors"
 	"github.com/vishvananda/netlink"
 )
 
@@ -179,6 +181,44 @@ func Register(r driverapi.Registerer, config map[string]interface{}) error {
 	})
 }
 
+// The behaviour of previous implementations of bridge subnet prefix assignment
+// is preserved here...
+//
+// The LL prefix, 'fe80::/64' can be used as an IPAM pool. Linux always assigns
+// link-local addresses with this prefix. But, pool-assigned addresses are very
+// unlikely to conflict.
+//
+// Don't allow a nonstandard LL subnet to overlap with 'fe80::/64'. For example,
+// if the config asked for subnet prefix 'fe80::/80', the bridge and its
+// containers would each end up with two LL addresses, Linux's '/64' and one from
+// the IPAM pool claiming '/80'. Although the specified prefix length must not
+// affect the host's determination of whether the address is on-link and to be
+// added to the interface's Prefix List (RFC-5942), differing prefix lengths
+// would be confusing and have been disallowed by earlier implementations of
+// bridge address assignment.
+func validateIPv6Subnet(addr *net.IPNet) error {
+	if addr != nil && bridgeIPv6.Contains(addr.IP) && !bytes.Equal(bridgeIPv6.Mask, addr.Mask) {
+		return errdefs.InvalidParameter(errors.New("clash with the Link-Local prefix 'fe80::/64'"))
+	}
+	return nil
+}
+
+// ValidateFixedCIDRV6 checks that val is an IPv6 address and prefix length that
+// does not overlap with the link local subnet prefix 'fe80::/64'.
+func ValidateFixedCIDRV6(val string) error {
+	if val == "" {
+		return nil
+	}
+	ip, ipNet, err := net.ParseCIDR(val)
+	if err != nil {
+		return errdefs.InvalidParameter(err)
+	}
+	if ip.To4() != nil {
+		return errdefs.InvalidParameter(errors.New("fixed-cidr-v6 is not an IPv6 subnet"))
+	}
+	return validateIPv6Subnet(ipNet)
+}
+
 // Validate performs a static validation on the network configuration parameters.
 // Whatever can be assessed a priori before attempting any programming.
 func (c *networkConfiguration) Validate() error {
@@ -516,6 +556,13 @@ func (c *networkConfiguration) processIPAM(id string, ipamV4Data, ipamV6Data []d
 		}
 	}
 
+	// TODO(robmry) - move this to networkConfiguration.Validate()
+	//   - but that can't happen until Validate() is called after processIPAM() has set
+	//     up the IP addresses, instead of during parseNetworkOptions().
+	if err := validateIPv6Subnet(c.AddressIPv6); err != nil {
+		return err
+	}
+
 	return nil
 }
 
@@ -759,9 +806,9 @@ func (d *driver) createNetwork(config *networkConfiguration) (err error) {
 		// assigned an IPv6 link-local address.
 		{config.EnableIPv6, setupBridgeIPv6},
 
-		// We ensure that the bridge has the expectedIPv4 and IPv6 addresses in
-		// the case of a previously existing device.
-		{bridgeAlreadyExists && !config.InhibitIPv4, setupVerifyAndReconcile},
+		// Ensure the bridge has the expected IPv4 addresses in the case of a previously
+		// existing device.
+		{bridgeAlreadyExists && !config.InhibitIPv4, setupVerifyAndReconcileIPv4},
 
 		// Enable IPv6 Forwarding
 		{enableIPv6Forwarding, setupIPv6Forwarding},
@@ -1059,7 +1106,6 @@ func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo,
 		if config.AddressIPv6 != nil {
 			network = config.AddressIPv6
 		}
-
 		ones, _ := network.Mask.Size()
 		if ones > 80 {
 			err = types.ForbiddenErrorf("Cannot self generate an IPv6 address on network %v: At least 48 host bits are needed.", network)

+ 78 - 1
libnetwork/drivers/bridge/bridge_linux_test.go

@@ -19,6 +19,8 @@ import (
 	"github.com/docker/docker/libnetwork/portallocator"
 	"github.com/docker/docker/libnetwork/types"
 	"github.com/vishvananda/netlink"
+	"gotest.tools/v3/assert"
+	is "gotest.tools/v3/assert/cmp"
 )
 
 func TestEndpointMarshalling(t *testing.T) {
@@ -964,6 +966,81 @@ func TestValidateConfig(t *testing.T) {
 	}
 }
 
+func TestValidateFixedCIDRV6(t *testing.T) {
+	tests := []struct {
+		doc, input, expectedErr string
+	}{
+		{
+			doc:   "valid",
+			input: "2001:db8::/32",
+		},
+		{
+			// fixed-cidr-v6 doesn't have to be specified.
+			doc: "empty",
+		},
+		{
+			// Using the LL subnet prefix is ok.
+			doc:   "Link-Local subnet prefix",
+			input: "fe80::/64",
+		},
+		{
+			// Using a nonstandard LL prefix that doesn't overlap with the standard LL prefix
+			// is ok.
+			doc:   "non-overlapping link-local prefix",
+			input: "fe80:1234::/80",
+		},
+		{
+			// Overlapping with the standard LL prefix isn't allowed.
+			doc:         "overlapping link-local prefix fe80::/63",
+			input:       "fe80::/63",
+			expectedErr: "clash with the Link-Local prefix 'fe80::/64'",
+		},
+		{
+			// Overlapping with the standard LL prefix isn't allowed.
+			doc:         "overlapping link-local subnet fe80::/65",
+			input:       "fe80::/65",
+			expectedErr: "clash with the Link-Local prefix 'fe80::/64'",
+		},
+		{
+			// The address has to be valid IPv6 subnet.
+			doc:         "invalid IPv6 subnet",
+			input:       "2000:db8::",
+			expectedErr: "invalid CIDR address: 2000:db8::",
+		},
+		{
+			doc:         "non-IPv6 subnet",
+			input:       "10.3.4.5/24",
+			expectedErr: "fixed-cidr-v6 is not an IPv6 subnet",
+		},
+		{
+			doc:         "IPv4-mapped subnet 1",
+			input:       "::ffff:10.2.4.0/24",
+			expectedErr: "fixed-cidr-v6 is not an IPv6 subnet",
+		},
+		{
+			doc:         "IPv4-mapped subnet 2",
+			input:       "::ffff:a01:203/24",
+			expectedErr: "fixed-cidr-v6 is not an IPv6 subnet",
+		},
+		{
+			doc:         "invalid subnet",
+			input:       "nonsense",
+			expectedErr: "invalid CIDR address: nonsense",
+		},
+	}
+	for _, tc := range tests {
+		tc := tc
+		t.Run(tc.doc, func(t *testing.T) {
+			err := ValidateFixedCIDRV6(tc.input)
+			if tc.expectedErr == "" {
+				assert.Check(t, err)
+			} else {
+				assert.Check(t, is.Error(err, tc.expectedErr))
+			}
+		})
+	}
+}
+
 func TestSetDefaultGw(t *testing.T) {
 	defer netnsutils.SetupTestOSContext(t)()
 
@@ -1098,7 +1175,7 @@ func TestCreateWithExistingBridge(t *testing.T) {
 		t.Fatalf("Failed to getNetwork(%s): %v", brName, err)
 	}
 
-	addrs4, _, err := nw.bridge.addresses()
+	addrs4, err := nw.bridge.addresses(netlink.FAMILY_V4)
 	if err != nil {
 		t.Fatalf("Failed to get the bridge network's address: %v", err)
 	}

+ 0 - 13
libnetwork/drivers/bridge/errors.go

@@ -222,19 +222,6 @@ func (ipv4 *IPv4AddrAddError) Error() string {
 // InternalError denotes the type of this error
 func (ipv4 *IPv4AddrAddError) InternalError() {}
 
-// IPv6AddrAddError is returned when IPv6 address could not be added to the bridge.
-type IPv6AddrAddError struct {
-	IP  *net.IPNet
-	Err error
-}
-
-func (ipv6 *IPv6AddrAddError) Error() string {
-	return fmt.Sprintf("failed to add IPv6 address %s to bridge: %v", ipv6.IP, ipv6.Err)
-}
-
-// InternalError denotes the type of this error
-func (ipv6 *IPv6AddrAddError) InternalError() {}
-
 // IPv4AddrNoMatchError is returned when the bridge's IPv4 address does not match configured.
 type IPv4AddrNoMatchError struct {
 	IP    net.IP

+ 86 - 19
libnetwork/drivers/bridge/interface_linux.go

@@ -4,8 +4,11 @@ import (
 	"context"
 	"fmt"
 	"net"
+	"net/netip"
 
 	"github.com/containerd/log"
+	"github.com/docker/docker/errdefs"
+	"github.com/docker/docker/libnetwork/internal/netiputil"
 	"github.com/vishvananda/netlink"
 )
 
@@ -53,39 +56,103 @@ func (i *bridgeInterface) exists() bool {
 	return i.Link != nil
 }
 
-// addresses returns all IPv4 addresses and all IPv6 addresses for the bridge interface.
-func (i *bridgeInterface) addresses() ([]netlink.Addr, []netlink.Addr, error) {
+// addresses returns a bridge's addresses, IPv4 (with family=netlink.FAMILY_V4)
+// or IPv6 (family=netlink.FAMILY_V6).
+func (i *bridgeInterface) addresses(family int) ([]netlink.Addr, error) {
 	if !i.exists() {
 		// A nonexistent interface, by definition, cannot have any addresses.
-		return nil, nil, nil
+		return nil, nil
 	}
-	v4addr, err := i.nlh.AddrList(i.Link, netlink.FAMILY_V4)
+	addrs, err := i.nlh.AddrList(i.Link, family)
 	if err != nil {
-		return nil, nil, fmt.Errorf("Failed to retrieve V4 addresses: %v", err)
+		return nil, fmt.Errorf("Failed to retrieve addresses: %v", err)
 	}
+	return addrs, nil
+}
 
-	v6addr, err := i.nlh.AddrList(i.Link, netlink.FAMILY_V6)
-	if err != nil {
-		return nil, nil, fmt.Errorf("Failed to retrieve V6 addresses: %v", err)
+func getRequiredIPv6Addrs(config *networkConfiguration) (requiredAddrs map[netip.Addr]netip.Prefix, addr *net.IPNet, gateway net.IP, err error) {
+	requiredAddrs = make(map[netip.Addr]netip.Prefix)
+
+	// TODO(robmry) - is config.AddressIPv6 always set at this point?
+	//   The logic here is preserved from the original setupBridgeIPv6(), but
+	//   can probably be simplified.
+
+	// Always give the bridge 'fe80::1' - every interface is required to have an
+	// address in 'fe80::/64'. Linux may assign an address, but we'll replace it with
+	// 'fe80::1'. Then, if the configured prefix is 'fe80::/64', the IPAM pool
+	// assigned address will not be a second address in the LL subnet.
+	addr = bridgeIPv6
+	gateway = bridgeIPv6.IP
+	ra, ok := netiputil.ToPrefix(bridgeIPv6)
+	if !ok {
+		err = fmt.Errorf("Failed to convert Link-Local IPv6 address to netip.Prefix")
+		return nil, nil, net.IP{}, err
 	}
+	requiredAddrs[ra.Addr()] = ra
 
-	if len(v4addr) == 0 {
-		return nil, v6addr, nil
+	// Set up the user-specified bridge address and gateway.
+	if config.AddressIPv6 != nil {
+		addr = config.AddressIPv6
+		gateway = config.AddressIPv6.IP
+		ra, ok := netiputil.ToPrefix(config.AddressIPv6)
+		if !ok {
+			err = fmt.Errorf("failed to convert bridge IPv6 address '%s' to netip.Prefix", config.AddressIPv6.String())
+			return nil, nil, net.IP{}, err
+		}
+		requiredAddrs[ra.Addr()] = ra
 	}
-	return v4addr, v6addr, nil
+
+	return requiredAddrs, addr, gateway, nil
 }
 
-func (i *bridgeInterface) programIPv6Address() error {
-	_, nlAddressList, err := i.addresses()
+func (i *bridgeInterface) programIPv6Addresses(config *networkConfiguration) error {
+	// Get the IPv6 addresses currently assigned to the bridge, if any.
+	existingAddrs, err := i.addresses(netlink.FAMILY_V6)
 	if err != nil {
-		return &IPv6AddrAddError{IP: i.bridgeIPv6, Err: fmt.Errorf("failed to retrieve address list: %v", err)}
+		return errdefs.System(err)
 	}
-	nlAddr := netlink.Addr{IPNet: i.bridgeIPv6}
-	if findIPv6Address(nlAddr, nlAddressList) {
-		return nil
+
+	// Get the list of required IPv6 addresses for this bridge.
+	var requiredAddrs map[netip.Addr]netip.Prefix
+	requiredAddrs, i.bridgeIPv6, i.gatewayIPv6, err = getRequiredIPv6Addrs(config)
+	if err != nil {
+		return errdefs.System(err)
+	}
+
+	// Remove addresses that aren't required.
+	for _, existingAddr := range existingAddrs {
+		ea, ok := netip.AddrFromSlice(existingAddr.IP)
+		if !ok {
+			return errdefs.System(fmt.Errorf("Failed to convert IPv6 address '%s' to netip.Addr", config.AddressIPv6))
+		}
+		// Ignore the prefix length when comparing addresses, it's informational
+		// (RFC-5942 section 4), and removing/re-adding an address that's still valid
+		// would disrupt traffic on live-restore.
+		if _, required := requiredAddrs[ea]; !required {
+			err := i.nlh.AddrDel(i.Link, &existingAddr) //#nosec G601 -- Memory aliasing is not an issue in practice as the &existingAddr pointer is not retained by the callee after the AddrDel() call returns.
+			if err != nil {
+				log.G(context.TODO()).WithFields(log.Fields{"error": err, "address": existingAddr.IPNet}).Warnf("Failed to remove residual IPv6 address from bridge")
+			}
+		}
 	}
-	if err := i.nlh.AddrAdd(i.Link, &nlAddr); err != nil {
-		return &IPv6AddrAddError{IP: i.bridgeIPv6, Err: err}
+	// Add or update required addresses.
+	for _, addrPrefix := range requiredAddrs {
+		// Using AddrReplace(), rather than AddrAdd(). When the subnet is changed for an
+		// existing bridge in a way that doesn't affect the bridge's assigned address,
+		// the old address has not been removed at this point - because that would be
+		// service-affecting for a running container.
+		//
+		// But if, for example, 'fixed-cidr-v6' is changed from '2000:dbe::/64' to
+		// '2000:dbe::/80', the default bridge will still be assigned address
+		// '2000:dbe::1'. In the output of 'ip a', the prefix length is displayed - and
+		// the user is likely to expect to see it updated from '64' to '80'.
+		// Unfortunately, 'netlink.AddrReplace()' ('RTM_NEWADDR' with 'NLM_F_REPLACE')
+		// doesn't update the prefix length. This is a cosmetic problem, the prefix
+		// length of an assigned address is not used to determine whether an address is
+		// "on-link" (RFC-5942).
+		if err := i.nlh.AddrReplace(i.Link, &netlink.Addr{IPNet: netiputil.ToIPNet(addrPrefix)}); err != nil {
+			return errdefs.System(fmt.Errorf("failed to add IPv6 address %s to bridge: %v", i.bridgeIPv6, err))
+		}
 	}
 	return nil
 }

+ 157 - 19
libnetwork/drivers/bridge/interface_linux_test.go

@@ -1,12 +1,43 @@
 package bridge
 
 import (
+	"net"
+	"net/netip"
+	"strings"
 	"testing"
 
 	"github.com/docker/docker/internal/testutils/netnsutils"
+	"github.com/google/go-cmp/cmp"
 	"github.com/vishvananda/netlink"
+	"gotest.tools/v3/assert"
+	is "gotest.tools/v3/assert/cmp"
 )
 
+func cidrToIPNet(t *testing.T, cidr string) *net.IPNet {
+	t.Helper()
+	ip, ipNet, err := net.ParseCIDR(cidr)
+	assert.Assert(t, is.Nil(err))
+	return &net.IPNet{IP: ip, Mask: ipNet.Mask}
+}
+
+func addAddr(t *testing.T, link netlink.Link, addr string) {
+	t.Helper()
+	ipNet := cidrToIPNet(t, addr)
+	err := netlink.AddrAdd(link, &netlink.Addr{IPNet: ipNet})
+	assert.Assert(t, is.Nil(err))
+}
+
+func prepTestBridge(t *testing.T, nc *networkConfiguration) *bridgeInterface {
+	t.Helper()
+	nh, err := netlink.NewHandle()
+	assert.Assert(t, err)
+	i, err := newInterface(nh, nc)
+	assert.Assert(t, err)
+	err = setupDevice(nc, i)
+	assert.Assert(t, err)
+	return i
+}
+
 func TestInterfaceDefaultName(t *testing.T) {
 	defer netnsutils.SetupTestOSContext(t)()
 
@@ -16,35 +47,142 @@ func TestInterfaceDefaultName(t *testing.T) {
 	}
 	config := &networkConfiguration{}
 	_, err = newInterface(nh, config)
-	if err != nil {
-		t.Fatalf("newInterface() failed: %v", err)
-	}
+	assert.Check(t, err)
+	assert.Equal(t, config.BridgeName, DefaultBridgeName)
+}
 
-	if config.BridgeName != DefaultBridgeName {
-		t.Fatalf("Expected default interface name %q, got %q", DefaultBridgeName, config.BridgeName)
-	}
+func TestAddressesNoInterface(t *testing.T) {
+	i := bridgeInterface{}
+	addrs, err := i.addresses(netlink.FAMILY_V6)
+	assert.NilError(t, err)
+	assert.Check(t, is.Len(addrs, 0))
 }
 
 func TestAddressesEmptyInterface(t *testing.T) {
 	defer netnsutils.SetupTestOSContext(t)()
 
 	nh, err := netlink.NewHandle()
-	if err != nil {
-		t.Fatal(err)
-	}
+	assert.NilError(t, err)
+
 	inf, err := newInterface(nh, &networkConfiguration{})
-	if err != nil {
-		t.Fatalf("newInterface() failed: %v", err)
-	}
+	assert.NilError(t, err)
 
-	addrsv4, addrsv6, err := inf.addresses()
-	if err != nil {
-		t.Fatalf("Failed to get addresses of default interface: %v", err)
+	addrsv4, err := inf.addresses(netlink.FAMILY_V4)
+	assert.NilError(t, err)
+	assert.Check(t, is.Len(addrsv4, 0))
+
+	addrsv6, err := inf.addresses(netlink.FAMILY_V6)
+	assert.NilError(t, err)
+	assert.Check(t, is.Len(addrsv6, 0))
+}
+
+func TestAddressesNonEmptyInterface(t *testing.T) {
+	defer netnsutils.SetupTestOSContext(t)()
+
+	i := prepTestBridge(t, &networkConfiguration{})
+
+	const expAddrV4, expAddrV6 = "192.168.1.2/24", "fd00:1234::/64"
+	addAddr(t, i.Link, expAddrV4)
+	addAddr(t, i.Link, expAddrV6)
+
+	addrs, err := i.addresses(netlink.FAMILY_V4)
+	assert.NilError(t, err)
+	assert.Check(t, is.Len(addrs, 1))
+	assert.Equal(t, addrs[0].IPNet.String(), expAddrV4)
+
+	addrs, err = i.addresses(netlink.FAMILY_V6)
+	assert.NilError(t, err)
+	assert.Check(t, is.Len(addrs, 1))
+	assert.Equal(t, addrs[0].IPNet.String(), expAddrV6)
+}
+
+func TestGetRequiredIPv6Addrs(t *testing.T) {
+	testcases := []struct {
+		name         string
+		addressIPv6  string
+		expReqdAddrs []string
+	}{
+		{
+			name:         "Regular address, expect default link local",
+			addressIPv6:  "2000:3000::1/80",
+			expReqdAddrs: []string{"fe80::1/64", "2000:3000::1/80"},
+		},
+		{
+			name:         "Standard link local address only",
+			addressIPv6:  "fe80::1/64",
+			expReqdAddrs: []string{"fe80::1/64"},
+		},
+		{
+			name:         "Nonstandard link local address",
+			addressIPv6:  "fe80:abcd::1/42",
+			expReqdAddrs: []string{"fe80:abcd::1/42", "fe80::1/64"},
+		},
 	}
-	if len(addrsv4) != 0 {
-		t.Fatalf("Default interface has unexpected IPv4: %s", addrsv4)
+
+	for _, tc := range testcases {
+		t.Run(tc.name, func(t *testing.T) {
+			config := &networkConfiguration{
+				AddressIPv6: cidrToIPNet(t, tc.addressIPv6),
+			}
+
+			expResult := map[netip.Addr]netip.Prefix{}
+			for _, addr := range tc.expReqdAddrs {
+				expResult[netip.MustParseAddr(strings.Split(addr, "/")[0])] = netip.MustParsePrefix(addr)
+			}
+
+			reqd, addr, gw, err := getRequiredIPv6Addrs(config)
+			assert.Check(t, is.Nil(err))
+			assert.Check(t, is.DeepEqual(addr, config.AddressIPv6))
+			assert.Check(t, is.DeepEqual(gw, config.AddressIPv6.IP))
+			assert.Check(t, is.DeepEqual(reqd, expResult,
+				cmp.Comparer(func(a, b netip.Prefix) bool { return a == b })))
+		})
 	}
-	if len(addrsv6) != 0 {
-		t.Fatalf("Default interface has unexpected IPv6: %v", addrsv6)
+}
+
+func TestProgramIPv6Addresses(t *testing.T) {
+	defer netnsutils.SetupTestOSContext(t)()
+
+	checkAddrs := func(i *bridgeInterface, expAddrs []string) {
+		t.Helper()
+		exp := []netlink.Addr{}
+		for _, a := range expAddrs {
+			ipNet := cidrToIPNet(t, a)
+			exp = append(exp, netlink.Addr{IPNet: ipNet})
+		}
+		actual, err := i.addresses(netlink.FAMILY_V6)
+		assert.NilError(t, err)
+		assert.DeepEqual(t, exp, actual)
 	}
+
+	nc := &networkConfiguration{}
+	i := prepTestBridge(t, nc)
+
+	// The bridge has no addresses, ask for a regular IPv6 network and expect it to
+	// be added to the bridge, with the default link local address.
+	nc.AddressIPv6 = cidrToIPNet(t, "2000:3000::1/64")
+	err := i.programIPv6Addresses(nc)
+	assert.NilError(t, err)
+	checkAddrs(i, []string{"2000:3000::1/64", "fe80::1/64"})
+
+	// Shrink the subnet of that regular address, the prefix length of the address
+	// will not be modified - but it's informational-only, the address itself has
+	// not changed.
+	nc.AddressIPv6 = cidrToIPNet(t, "2000:3000::1/80")
+	err = i.programIPv6Addresses(nc)
+	assert.NilError(t, err)
+	checkAddrs(i, []string{"2000:3000::1/64", "fe80::1/64"})
+
+	// Ask for link-local only, by specifying an address with the Link Local prefix.
+	// The regular address should be removed.
+	nc.AddressIPv6 = cidrToIPNet(t, "fe80::1/64")
+	err = i.programIPv6Addresses(nc)
+	assert.NilError(t, err)
+	checkAddrs(i, []string{"fe80::1/64"})
+
+	// Swap the standard link local address for a nonstandard one.
+	nc.AddressIPv6 = cidrToIPNet(t, "fe80:5555::1/55")
+	err = i.programIPv6Addresses(nc)
+	assert.NilError(t, err)
+	checkAddrs(i, []string{"fe80:5555::1/55", "fe80::1/64"})
 }

+ 1 - 1
libnetwork/drivers/bridge/setup_ipv4_linux.go

@@ -37,7 +37,7 @@ func setupBridgeIPv4(config *networkConfiguration, i *bridgeInterface) error {
 	}
 
 	if !config.InhibitIPv4 {
-		addrv4List, _, err := i.addresses()
+		addrv4List, err := i.addresses(netlink.FAMILY_V4)
 		if err != nil {
 			return fmt.Errorf("failed to retrieve bridge interface addresses: %v", err)
 		}

+ 3 - 17
libnetwork/drivers/bridge/setup_ipv6_linux.go

@@ -32,23 +32,9 @@ func setupBridgeIPv6(config *networkConfiguration, i *bridgeInterface) error {
 		}
 	}
 
-	// Store bridge network and default gateway
-	i.bridgeIPv6 = bridgeIPv6
-	i.gatewayIPv6 = i.bridgeIPv6.IP
-
-	if err := i.programIPv6Address(); err != nil {
-		return err
-	}
-
-	if config.AddressIPv6 == nil {
-		return nil
-	}
-
-	// Store the user specified bridge network and network gateway and program it
-	i.bridgeIPv6 = config.AddressIPv6
-	i.gatewayIPv6 = config.AddressIPv6.IP
-
-	if err := i.programIPv6Address(); err != nil {
+	// Remove unwanted addresses from the bridge, add required addresses, and assign
+	// values to "i.bridgeIPv6", "i.gatewayIPv6".
+	if err := i.programIPv6Addresses(config); err != nil {
 		return err
 	}
 

+ 5 - 33
libnetwork/drivers/bridge/setup_verify_linux.go

@@ -1,28 +1,25 @@
 package bridge
 
 import (
-	"context"
 	"fmt"
 	"strings"
 
-	"github.com/containerd/log"
 	"github.com/docker/docker/libnetwork/ns"
-	"github.com/docker/docker/libnetwork/types"
 	"github.com/vishvananda/netlink"
 )
 
-// setupVerifyAndReconcile checks what IP addresses the given i interface has and ensures that they match the passed
-// network config. It also removes any extra unicast IPv6 addresses found.
-func setupVerifyAndReconcile(config *networkConfiguration, i *bridgeInterface) error {
+// setupVerifyAndReconcileIPv4 checks what IPv4 addresses the given i interface has
+// and ensures that they match the passed network config.
+func setupVerifyAndReconcileIPv4(config *networkConfiguration, i *bridgeInterface) error {
 	// Fetch a slice of IPv4 addresses and a slice of IPv6 addresses from the bridge.
-	addrsv4, addrsv6, err := i.addresses()
+	addrsv4, err := i.addresses(netlink.FAMILY_V4)
 	if err != nil {
 		return fmt.Errorf("Failed to verify ip addresses: %v", err)
 	}
 
 	addrv4, _ := selectIPv4Address(addrsv4, config.AddressIPv4)
 
-	// Verify that the bridge does have an IPv4 address.
+	// Verify that the bridge has an IPv4 address.
 	if !config.Internal && addrv4.IPNet == nil {
 		return &ErrNoIPAddr{}
 	}
@@ -32,34 +29,9 @@ func setupVerifyAndReconcile(config *networkConfiguration, i *bridgeInterface) e
 		return &IPv4AddrNoMatchError{IP: addrv4.IP, CfgIP: config.AddressIPv4.IP}
 	}
 
-	// Verify that one of the bridge IPv6 addresses matches the requested
-	// configuration.
-	if config.EnableIPv6 && !config.Internal && !findIPv6Address(netlink.Addr{IPNet: bridgeIPv6}, addrsv6) {
-		return (*IPv6AddrNoMatchError)(bridgeIPv6)
-	}
-
-	// Release any residual IPv6 address that might be there because of older daemon instances
-	for _, addrv6 := range addrsv6 {
-		addrv6 := addrv6
-		if addrv6.IP.IsGlobalUnicast() && !types.CompareIPNet(addrv6.IPNet, i.bridgeIPv6) {
-			if err := i.nlh.AddrDel(i.Link, &addrv6); err != nil {
-				log.G(context.TODO()).Warnf("Failed to remove residual IPv6 address %s from bridge: %v", addrv6.IPNet, err)
-			}
-		}
-	}
-
 	return nil
 }
 
-func findIPv6Address(addr netlink.Addr, addresses []netlink.Addr) bool {
-	for _, addrv6 := range addresses {
-		if addrv6.String() == addr.String() {
-			return true
-		}
-	}
-	return false
-}
-
 func bridgeInterfaceExists(name string) (bool, error) {
 	nlh := ns.NlHandle()
 	link, err := nlh.LinkByName(name)

+ 3 - 42
libnetwork/drivers/bridge/setup_verify_linux_test.go

@@ -38,7 +38,7 @@ func TestSetupVerify(t *testing.T) {
 		t.Fatalf("Failed to assign IPv4 %s to interface: %v", config.AddressIPv4, err)
 	}
 
-	if err := setupVerifyAndReconcile(config, inf); err != nil {
+	if err := setupVerifyAndReconcileIPv4(config, inf); err != nil {
 		t.Fatalf("Address verification failed: %v", err)
 	}
 }
@@ -56,7 +56,7 @@ func TestSetupVerifyBad(t *testing.T) {
 		t.Fatalf("Failed to assign IPv4 %s to interface: %v", ipnet, err)
 	}
 
-	if err := setupVerifyAndReconcile(config, inf); err == nil {
+	if err := setupVerifyAndReconcileIPv4(config, inf); err == nil {
 		t.Fatal("Address verification was expected to fail")
 	}
 }
@@ -69,46 +69,7 @@ func TestSetupVerifyMissing(t *testing.T) {
 	config := &networkConfiguration{}
 	config.AddressIPv4 = &net.IPNet{IP: addrv4, Mask: addrv4.DefaultMask()}
 
-	if err := setupVerifyAndReconcile(config, inf); err == nil {
-		t.Fatal("Address verification was expected to fail")
-	}
-}
-
-func TestSetupVerifyIPv6(t *testing.T) {
-	defer netnsutils.SetupTestOSContext(t)()
-
-	addrv4 := net.IPv4(192, 168, 1, 1)
-	inf := setupVerifyTest(t)
-	config := &networkConfiguration{}
-	config.AddressIPv4 = &net.IPNet{IP: addrv4, Mask: addrv4.DefaultMask()}
-	config.EnableIPv6 = true
-
-	if err := netlink.AddrAdd(inf.Link, &netlink.Addr{IPNet: bridgeIPv6}); err != nil {
-		t.Fatalf("Failed to assign IPv6 %s to interface: %v", bridgeIPv6, err)
-	}
-	if err := netlink.AddrAdd(inf.Link, &netlink.Addr{IPNet: config.AddressIPv4}); err != nil {
-		t.Fatalf("Failed to assign IPv4 %s to interface: %v", config.AddressIPv4, err)
-	}
-
-	if err := setupVerifyAndReconcile(config, inf); err != nil {
-		t.Fatalf("Address verification failed: %v", err)
-	}
-}
-
-func TestSetupVerifyIPv6Missing(t *testing.T) {
-	defer netnsutils.SetupTestOSContext(t)()
-
-	addrv4 := net.IPv4(192, 168, 1, 1)
-	inf := setupVerifyTest(t)
-	config := &networkConfiguration{}
-	config.AddressIPv4 = &net.IPNet{IP: addrv4, Mask: addrv4.DefaultMask()}
-	config.EnableIPv6 = true
-
-	if err := netlink.AddrAdd(inf.Link, &netlink.Addr{IPNet: config.AddressIPv4}); err != nil {
-		t.Fatalf("Failed to assign IPv4 %s to interface: %v", config.AddressIPv4, err)
-	}
-
-	if err := setupVerifyAndReconcile(config, inf); err == nil {
+	if err := setupVerifyAndReconcileIPv4(config, inf); err == nil {
 		t.Fatal("Address verification was expected to fail")
 	}
 }

+ 1 - 1
libnetwork/endpoint.go

@@ -1121,7 +1121,7 @@ func (ep *Endpoint) releaseAddress() {
 		}
 	}
 
-	if ep.iface.addrv6 != nil && ep.iface.addrv6.IP.IsGlobalUnicast() {
+	if ep.iface.addrv6 != nil {
 		if err := ipam.ReleaseAddress(ep.iface.v6PoolID, ep.iface.addrv6.IP); err != nil {
 			log.G(context.TODO()).Warnf("Failed to release ip address %s on delete of endpoint %s (%s): %v", ep.iface.addrv6.IP, ep.Name(), ep.ID(), err)
 		}

+ 12 - 7
libnetwork/ipam/utils.go → libnetwork/internal/netiputil/netiputil.go

@@ -1,4 +1,4 @@
-package ipam
+package netiputil
 
 import (
 	"net"
@@ -7,7 +7,8 @@ import (
 	"github.com/docker/docker/libnetwork/ipbits"
 )
 
-func toIPNet(p netip.Prefix) *net.IPNet {
+// ToIPNet converts p into a *net.IPNet, returning nil if p is not valid.
+func ToIPNet(p netip.Prefix) *net.IPNet {
 	if !p.IsValid() {
 		return nil
 	}
@@ -17,7 +18,9 @@ func toIPNet(p netip.Prefix) *net.IPNet {
 	}
 }
 
-func toPrefix(n *net.IPNet) (netip.Prefix, bool) {
+// ToPrefix converts n into a netip.Prefix. If n is not a valid IPv4 or IPV6
+// address, ToPrefix returns netip.Prefix{}, false.
+func ToPrefix(n *net.IPNet) (netip.Prefix, bool) {
 	if ll := len(n.Mask); ll != net.IPv4len && ll != net.IPv6len {
 		return netip.Prefix{}, false
 	}
@@ -35,14 +38,16 @@ func toPrefix(n *net.IPNet) (netip.Prefix, bool) {
 	return netip.PrefixFrom(addr.Unmap(), ones), true
 }
 
-func hostID(addr netip.Addr, bits uint) uint64 {
+// HostID masks out the 'bits' most-significant bits of addr. The result is
+// undefined if bits > addr.BitLen().
+func HostID(addr netip.Addr, bits uint) uint64 {
 	return ipbits.Field(addr, bits, uint(addr.BitLen()))
 }
 
-// subnetRange returns the amount to add to network.Addr() in order to yield the
+// SubnetRange returns the amount to add to network.Addr() in order to yield the
 // first and last addresses in subnet, respectively.
-func subnetRange(network, subnet netip.Prefix) (start, end uint64) {
-	start = hostID(subnet.Addr(), uint(network.Bits()))
+func SubnetRange(network, subnet netip.Prefix) (start, end uint64) {
+	start = HostID(subnet.Addr(), uint(network.Bits()))
 	end = start + (1 << uint64(subnet.Addr().BitLen()-subnet.Bits())) - 1
 	return start, end
 }

+ 7 - 6
libnetwork/ipam/allocator.go

@@ -9,6 +9,7 @@ import (
 
 	"github.com/containerd/log"
 	"github.com/docker/docker/libnetwork/bitmap"
+	"github.com/docker/docker/libnetwork/internal/netiputil"
 	"github.com/docker/docker/libnetwork/ipamapi"
 	"github.com/docker/docker/libnetwork/ipbits"
 	"github.com/docker/docker/libnetwork/types"
@@ -46,7 +47,7 @@ func newAddrSpace(predefined []*net.IPNet) (*addrSpace, error) {
 	pdf := make([]netip.Prefix, len(predefined))
 	for i, n := range predefined {
 		var ok bool
-		pdf[i], ok = toPrefix(n)
+		pdf[i], ok = netiputil.ToPrefix(n)
 		if !ok {
 			return nil, fmt.Errorf("network at index %d (%v) is not in canonical form", i, n)
 		}
@@ -91,7 +92,7 @@ func (a *Allocator) RequestPool(addressSpace, requestedPool, requestedSubPool st
 		if err != nil {
 			return "", nil, nil, err
 		}
-		return k.String(), toIPNet(k.Subnet), nil, nil
+		return k.String(), netiputil.ToIPNet(k.Subnet), nil, nil
 	}
 
 	if k.Subnet, err = netip.ParsePrefix(requestedPool); err != nil {
@@ -119,7 +120,7 @@ func (a *Allocator) RequestPool(addressSpace, requestedPool, requestedSubPool st
 		return "", nil, nil, err
 	}
 
-	return k.String(), toIPNet(k.Subnet), nil, nil
+	return k.String(), netiputil.ToIPNet(k.Subnet), nil, nil
 }
 
 // ReleasePool releases the address pool identified by the passed id
@@ -336,7 +337,7 @@ func (aSpace *addrSpace) releaseAddress(nw, sub netip.Prefix, address netip.Addr
 
 	defer log.G(context.TODO()).Debugf("Released address Address:%v Sequence:%s", address, p.addrs)
 
-	return p.addrs.Unset(hostID(address, uint(nw.Bits())))
+	return p.addrs.Unset(netiputil.HostID(address, uint(nw.Bits())))
 }
 
 func getAddress(base netip.Prefix, bitmask *bitmap.Bitmap, prefAddress netip.Addr, ipr netip.Prefix, serial bool) (netip.Addr, error) {
@@ -353,10 +354,10 @@ func getAddress(base netip.Prefix, bitmask *bitmap.Bitmap, prefAddress netip.Add
 	if ipr == (netip.Prefix{}) && prefAddress == (netip.Addr{}) {
 		ordinal, err = bitmask.SetAny(serial)
 	} else if prefAddress != (netip.Addr{}) {
-		ordinal = hostID(prefAddress, uint(base.Bits()))
+		ordinal = netiputil.HostID(prefAddress, uint(base.Bits()))
 		err = bitmask.Set(ordinal)
 	} else {
-		start, end := subnetRange(base, ipr)
+		start, end := netiputil.SubnetRange(base, ipr)
 		ordinal, err = bitmask.SetAnyInRange(start, end, serial)
 	}