diff --git a/daemon/config/config_linux.go b/daemon/config/config_linux.go index 9cc5a55bdf..c75033f2c5 100644 --- a/daemon/config/config_linux.go +++ b/daemon/config/config_linux.go @@ -9,6 +9,7 @@ import ( "github.com/containerd/cgroups/v3" "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/system" + "github.com/docker/docker/libnetwork/drivers/bridge" "github.com/docker/docker/opts" "github.com/docker/docker/pkg/homedir" "github.com/docker/docker/pkg/rootless" @@ -178,6 +179,10 @@ func (conf *Config) ValidatePlatformConfig() error { return err } + if err := bridge.ValidateFixedCIDRV6(conf.FixedCIDRv6); err != nil { + return errors.Wrap(err, "invalid fixed-cidr-v6") + } + return verifyDefaultCgroupNsMode(conf.CgroupNamespaceMode) } diff --git a/integration/networking/bridge_test.go b/integration/networking/bridge_test.go index f9c92909fa..e3d1fe2a36 100644 --- a/integration/networking/bridge_test.go +++ b/integration/networking/bridge_test.go @@ -86,6 +86,32 @@ func TestBridgeICC(t *testing.T) { }, linkLocal: true, }, + { + // As for 'LL non-internal', but ping the container by name instead of by address + // - the busybox test containers only have one interface with a link local + // address, so the zone index is not required: + // RFC-4007, section 6: "[...] for nodes with only a single non-loopback + // interface (e.g., a single Ethernet interface), the common case, link-local + // addresses need not be qualified with a zone index." + // So, for this common case, LL addresses should be included in DNS config. + name: "IPv6 link-local address on non-internal network ping by name", + bridgeOpts: []func(*types.NetworkCreate){ + network.WithIPv6(), + network.WithIPAM("fe80::/64", "fe80::1"), + }, + }, + { + name: "IPv6 nonstandard link-local subnet on non-internal network ping by name", + // No interfaces apart from the one on the bridge network with this non-default + // subnet will be on this link local subnet (it's not currently possible to + // configure two networks with the same LL subnet, although perhaps it should + // be). So, again, no zone index is required and the LL address should be + // included in DNS config. + bridgeOpts: []func(*types.NetworkCreate){ + network.WithIPv6(), + network.WithIPAM("fe80:1234::/64", "fe80:1234::1"), + }, + }, { name: "IPv6 non-internal network with SLAAC LL address", bridgeOpts: []func(*types.NetworkCreate){ @@ -292,3 +318,162 @@ func TestBridgeINC(t *testing.T) { }) } } + +func TestDefaultBridgeIPv6(t *testing.T) { + skip.If(t, testEnv.DaemonInfo.OSType == "windows") + + ctx := setupTest(t) + + testcases := []struct { + name string + fixed_cidr_v6 string + }{ + { + name: "IPv6 ULA", + fixed_cidr_v6: "fd00:1234::/64", + }, + { + name: "IPv6 LLA only", + fixed_cidr_v6: "fe80::/64", + }, + { + name: "IPv6 nonstandard LLA only", + fixed_cidr_v6: "fe80:1234::/64", + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + ctx := testutil.StartSpan(ctx, t) + + d := daemon.New(t) + d.StartWithBusybox(ctx, t, + "--experimental", + "--ip6tables", + "--ipv6", + "--fixed-cidr-v6", tc.fixed_cidr_v6, + ) + defer d.Stop(t) + + c := d.NewClientT(t) + defer c.Close() + + cID := container.Run(ctx, t, c, + container.WithImage("busybox:latest"), + container.WithCmd("top"), + ) + defer c.ContainerRemove(ctx, cID, containertypes.RemoveOptions{ + Force: true, + }) + + networkName := "bridge" + inspect := container.Inspect(ctx, t, c, cID) + pingHost := inspect.NetworkSettings.Networks[networkName].GlobalIPv6Address + + attachCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + res := container.RunAttach(attachCtx, t, c, + container.WithImage("busybox:latest"), + container.WithCmd("ping", "-c1", "-W3", pingHost), + ) + defer c.ContainerRemove(ctx, res.ContainerID, containertypes.RemoveOptions{ + Force: true, + }) + + assert.Check(t, is.Equal(res.ExitCode, 0)) + assert.Check(t, is.Equal(res.Stderr.String(), "")) + assert.Check(t, is.Contains(res.Stdout.String(), "1 packets transmitted, 1 packets received")) + }) + } +} + +// Check that it's possible to change 'fixed-cidr-v6' and restart the daemon. +func TestDefaultBridgeAddresses(t *testing.T) { + skip.If(t, testEnv.DaemonInfo.OSType == "windows") + + ctx := setupTest(t) + d := daemon.New(t) + + type testStep struct { + stepName string + fixedCIDRV6 string + expAddrs []string + } + + testcases := []struct { + name string + steps []testStep + }{ + { + name: "Unique-Local Subnet Changes", + steps: []testStep{ + { + stepName: "Set up initial UL prefix", + fixedCIDRV6: "fd1c:f1a0:5d8d:aaaa::/64", + expAddrs: []string{"fd1c:f1a0:5d8d:aaaa::1/64", "fe80::1/64"}, + }, + { + // Modify that prefix, the default bridge's address must be deleted and re-added. + stepName: "Modify UL prefix - address change", + fixedCIDRV6: "fd1c:f1a0:5d8d:bbbb::/64", + expAddrs: []string{"fd1c:f1a0:5d8d:bbbb::1/64", "fe80::1/64"}, + }, + { + // Modify the prefix length, the default bridge's address should not change. + stepName: "Modify UL prefix - no address change", + fixedCIDRV6: "fd1c:f1a0:5d8d:bbbb::/80", + // The prefix length displayed by 'ip a' is not updated - it's informational, and + // can't be changed without unnecessarily deleting and re-adding the address. + expAddrs: []string{"fd1c:f1a0:5d8d:bbbb::1/64", "fe80::1/64"}, + }, + }, + }, + { + name: "Link-Local Subnet Changes", + steps: []testStep{ + { + stepName: "Standard LL subnet prefix", + fixedCIDRV6: "fe80::/64", + expAddrs: []string{"fe80::1/64"}, + }, + { + // Modify that prefix, the default bridge's address must be deleted and re-added. + // The bridge must still have an address in the required (standard) LL subnet. + stepName: "Nonstandard LL prefix - address change", + fixedCIDRV6: "fe80:1234::/32", + expAddrs: []string{"fe80:1234::1/32", "fe80::1/64"}, + }, + { + // Modify the prefix length, the addresses should not change. + stepName: "Modify LL prefix - no address change", + fixedCIDRV6: "fe80:1234::/64", + // The prefix length displayed by 'ip a' is not updated - it's informational, and + // can't be changed without unnecessarily deleting and re-adding the address. + expAddrs: []string{"fe80:1234::1/", "fe80::1/64"}, + }, + }, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + for _, step := range tc.steps { + // Check that the daemon starts - regression test for: + // https://github.com/moby/moby/issues/46829 + d.Start(t, "--experimental", "--ipv6", "--ip6tables", "--fixed-cidr-v6="+step.fixedCIDRV6) + d.Stop(t) + + // Check that the expected addresses have been applied to the bridge. (Skip in + // rootless mode, because the bridge is in a different network namespace.) + if !testEnv.IsRootless() { + res := testutil.RunCommand(ctx, "ip", "-6", "addr", "show", "docker0") + assert.Equal(t, res.ExitCode, 0, step.stepName) + stdout := res.Stdout() + for _, expAddr := range step.expAddrs { + assert.Check(t, is.Contains(stdout, expAddr)) + } + } + } + }) + } +} diff --git a/libnetwork/drivers/bridge/bridge_linux.go b/libnetwork/drivers/bridge/bridge_linux.go index a9b54cfce3..c6f61de028 100644 --- a/libnetwork/drivers/bridge/bridge_linux.go +++ b/libnetwork/drivers/bridge/bridge_linux.go @@ -1,8 +1,8 @@ package bridge import ( + "bytes" "context" - "errors" "fmt" "net" "os" @@ -11,6 +11,7 @@ import ( "sync" "github.com/containerd/log" + "github.com/docker/docker/errdefs" "github.com/docker/docker/libnetwork/datastore" "github.com/docker/docker/libnetwork/driverapi" "github.com/docker/docker/libnetwork/iptables" @@ -22,6 +23,7 @@ import ( "github.com/docker/docker/libnetwork/portmapper" "github.com/docker/docker/libnetwork/scope" "github.com/docker/docker/libnetwork/types" + "github.com/pkg/errors" "github.com/vishvananda/netlink" ) @@ -179,6 +181,44 @@ func Register(r driverapi.Registerer, config map[string]interface{}) error { }) } +// The behaviour of previous implementations of bridge subnet prefix assignment +// is preserved here... +// +// The LL prefix, 'fe80::/64' can be used as an IPAM pool. Linux always assigns +// link-local addresses with this prefix. But, pool-assigned addresses are very +// unlikely to conflict. +// +// Don't allow a nonstandard LL subnet to overlap with 'fe80::/64'. For example, +// if the config asked for subnet prefix 'fe80::/80', the bridge and its +// containers would each end up with two LL addresses, Linux's '/64' and one from +// the IPAM pool claiming '/80'. Although the specified prefix length must not +// affect the host's determination of whether the address is on-link and to be +// added to the interface's Prefix List (RFC-5942), differing prefix lengths +// would be confusing and have been disallowed by earlier implementations of +// bridge address assignment. +func validateIPv6Subnet(addr *net.IPNet) error { + if addr != nil && bridgeIPv6.Contains(addr.IP) && !bytes.Equal(bridgeIPv6.Mask, addr.Mask) { + return errdefs.InvalidParameter(errors.New("clash with the Link-Local prefix 'fe80::/64'")) + } + return nil +} + +// ValidateFixedCIDRV6 checks that val is an IPv6 address and prefix length that +// does not overlap with the link local subnet prefix 'fe80::/64'. +func ValidateFixedCIDRV6(val string) error { + if val == "" { + return nil + } + ip, ipNet, err := net.ParseCIDR(val) + if err != nil { + return errdefs.InvalidParameter(err) + } + if ip.To4() != nil { + return errdefs.InvalidParameter(errors.New("fixed-cidr-v6 is not an IPv6 subnet")) + } + return validateIPv6Subnet(ipNet) +} + // Validate performs a static validation on the network configuration parameters. // Whatever can be assessed a priori before attempting any programming. func (c *networkConfiguration) Validate() error { @@ -516,6 +556,13 @@ func (c *networkConfiguration) processIPAM(id string, ipamV4Data, ipamV6Data []d } } + // TODO(robmry) - move this to networkConfiguration.Validate() + // - but that can't happen until Validate() is called after processIPAM() has set + // up the IP addresses, instead of during parseNetworkOptions(). + if err := validateIPv6Subnet(c.AddressIPv6); err != nil { + return err + } + return nil } @@ -759,9 +806,9 @@ func (d *driver) createNetwork(config *networkConfiguration) (err error) { // assigned an IPv6 link-local address. {config.EnableIPv6, setupBridgeIPv6}, - // We ensure that the bridge has the expectedIPv4 and IPv6 addresses in - // the case of a previously existing device. - {bridgeAlreadyExists && !config.InhibitIPv4, setupVerifyAndReconcile}, + // Ensure the bridge has the expected IPv4 addresses in the case of a previously + // existing device. + {bridgeAlreadyExists && !config.InhibitIPv4, setupVerifyAndReconcileIPv4}, // Enable IPv6 Forwarding {enableIPv6Forwarding, setupIPv6Forwarding}, @@ -1059,7 +1106,6 @@ func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo, if config.AddressIPv6 != nil { network = config.AddressIPv6 } - ones, _ := network.Mask.Size() if ones > 80 { err = types.ForbiddenErrorf("Cannot self generate an IPv6 address on network %v: At least 48 host bits are needed.", network) diff --git a/libnetwork/drivers/bridge/bridge_linux_test.go b/libnetwork/drivers/bridge/bridge_linux_test.go index 1f93ef1e44..1f9bd51231 100644 --- a/libnetwork/drivers/bridge/bridge_linux_test.go +++ b/libnetwork/drivers/bridge/bridge_linux_test.go @@ -19,6 +19,8 @@ import ( "github.com/docker/docker/libnetwork/portallocator" "github.com/docker/docker/libnetwork/types" "github.com/vishvananda/netlink" + "gotest.tools/v3/assert" + is "gotest.tools/v3/assert/cmp" ) func TestEndpointMarshalling(t *testing.T) { @@ -964,6 +966,81 @@ func TestValidateConfig(t *testing.T) { } } +func TestValidateFixedCIDRV6(t *testing.T) { + tests := []struct { + doc, input, expectedErr string + }{ + { + doc: "valid", + input: "2001:db8::/32", + }, + { + // fixed-cidr-v6 doesn't have to be specified. + doc: "empty", + }, + { + // Using the LL subnet prefix is ok. + doc: "Link-Local subnet prefix", + input: "fe80::/64", + }, + { + // Using a nonstandard LL prefix that doesn't overlap with the standard LL prefix + // is ok. + doc: "non-overlapping link-local prefix", + input: "fe80:1234::/80", + }, + { + // Overlapping with the standard LL prefix isn't allowed. + doc: "overlapping link-local prefix fe80::/63", + input: "fe80::/63", + expectedErr: "clash with the Link-Local prefix 'fe80::/64'", + }, + { + // Overlapping with the standard LL prefix isn't allowed. + doc: "overlapping link-local subnet fe80::/65", + input: "fe80::/65", + expectedErr: "clash with the Link-Local prefix 'fe80::/64'", + }, + { + // The address has to be valid IPv6 subnet. + doc: "invalid IPv6 subnet", + input: "2000:db8::", + expectedErr: "invalid CIDR address: 2000:db8::", + }, + { + doc: "non-IPv6 subnet", + input: "10.3.4.5/24", + expectedErr: "fixed-cidr-v6 is not an IPv6 subnet", + }, + { + doc: "IPv4-mapped subnet 1", + input: "::ffff:10.2.4.0/24", + expectedErr: "fixed-cidr-v6 is not an IPv6 subnet", + }, + { + doc: "IPv4-mapped subnet 2", + input: "::ffff:a01:203/24", + expectedErr: "fixed-cidr-v6 is not an IPv6 subnet", + }, + { + doc: "invalid subnet", + input: "nonsense", + expectedErr: "invalid CIDR address: nonsense", + }, + } + for _, tc := range tests { + tc := tc + t.Run(tc.doc, func(t *testing.T) { + err := ValidateFixedCIDRV6(tc.input) + if tc.expectedErr == "" { + assert.Check(t, err) + } else { + assert.Check(t, is.Error(err, tc.expectedErr)) + } + }) + } +} + func TestSetDefaultGw(t *testing.T) { defer netnsutils.SetupTestOSContext(t)() @@ -1098,7 +1175,7 @@ func TestCreateWithExistingBridge(t *testing.T) { t.Fatalf("Failed to getNetwork(%s): %v", brName, err) } - addrs4, _, err := nw.bridge.addresses() + addrs4, err := nw.bridge.addresses(netlink.FAMILY_V4) if err != nil { t.Fatalf("Failed to get the bridge network's address: %v", err) } diff --git a/libnetwork/drivers/bridge/errors.go b/libnetwork/drivers/bridge/errors.go index ad72900c12..bf03157f8d 100644 --- a/libnetwork/drivers/bridge/errors.go +++ b/libnetwork/drivers/bridge/errors.go @@ -222,19 +222,6 @@ func (ipv4 *IPv4AddrAddError) Error() string { // InternalError denotes the type of this error func (ipv4 *IPv4AddrAddError) InternalError() {} -// IPv6AddrAddError is returned when IPv6 address could not be added to the bridge. -type IPv6AddrAddError struct { - IP *net.IPNet - Err error -} - -func (ipv6 *IPv6AddrAddError) Error() string { - return fmt.Sprintf("failed to add IPv6 address %s to bridge: %v", ipv6.IP, ipv6.Err) -} - -// InternalError denotes the type of this error -func (ipv6 *IPv6AddrAddError) InternalError() {} - // IPv4AddrNoMatchError is returned when the bridge's IPv4 address does not match configured. type IPv4AddrNoMatchError struct { IP net.IP diff --git a/libnetwork/drivers/bridge/interface_linux.go b/libnetwork/drivers/bridge/interface_linux.go index 6618fd66bd..add9c231b5 100644 --- a/libnetwork/drivers/bridge/interface_linux.go +++ b/libnetwork/drivers/bridge/interface_linux.go @@ -4,8 +4,11 @@ import ( "context" "fmt" "net" + "net/netip" "github.com/containerd/log" + "github.com/docker/docker/errdefs" + "github.com/docker/docker/libnetwork/internal/netiputil" "github.com/vishvananda/netlink" ) @@ -53,39 +56,103 @@ func (i *bridgeInterface) exists() bool { return i.Link != nil } -// addresses returns all IPv4 addresses and all IPv6 addresses for the bridge interface. -func (i *bridgeInterface) addresses() ([]netlink.Addr, []netlink.Addr, error) { +// addresses returns a bridge's addresses, IPv4 (with family=netlink.FAMILY_V4) +// or IPv6 (family=netlink.FAMILY_V6). +func (i *bridgeInterface) addresses(family int) ([]netlink.Addr, error) { if !i.exists() { // A nonexistent interface, by definition, cannot have any addresses. - return nil, nil, nil + return nil, nil } - v4addr, err := i.nlh.AddrList(i.Link, netlink.FAMILY_V4) + addrs, err := i.nlh.AddrList(i.Link, family) if err != nil { - return nil, nil, fmt.Errorf("Failed to retrieve V4 addresses: %v", err) + return nil, fmt.Errorf("Failed to retrieve addresses: %v", err) } - - v6addr, err := i.nlh.AddrList(i.Link, netlink.FAMILY_V6) - if err != nil { - return nil, nil, fmt.Errorf("Failed to retrieve V6 addresses: %v", err) - } - - if len(v4addr) == 0 { - return nil, v6addr, nil - } - return v4addr, v6addr, nil + return addrs, nil } -func (i *bridgeInterface) programIPv6Address() error { - _, nlAddressList, err := i.addresses() +func getRequiredIPv6Addrs(config *networkConfiguration) (requiredAddrs map[netip.Addr]netip.Prefix, addr *net.IPNet, gateway net.IP, err error) { + requiredAddrs = make(map[netip.Addr]netip.Prefix) + + // TODO(robmry) - is config.AddressIPv6 always set at this point? + // The logic here is preserved from the original setupBridgeIPv6(), but + // can probably be simplified. + + // Always give the bridge 'fe80::1' - every interface is required to have an + // address in 'fe80::/64'. Linux may assign an address, but we'll replace it with + // 'fe80::1'. Then, if the configured prefix is 'fe80::/64', the IPAM pool + // assigned address will not be a second address in the LL subnet. + addr = bridgeIPv6 + gateway = bridgeIPv6.IP + ra, ok := netiputil.ToPrefix(bridgeIPv6) + if !ok { + err = fmt.Errorf("Failed to convert Link-Local IPv6 address to netip.Prefix") + return nil, nil, net.IP{}, err + } + requiredAddrs[ra.Addr()] = ra + + // Set up the user-specified bridge address and gateway. + if config.AddressIPv6 != nil { + addr = config.AddressIPv6 + gateway = config.AddressIPv6.IP + ra, ok := netiputil.ToPrefix(config.AddressIPv6) + if !ok { + err = fmt.Errorf("failed to convert bridge IPv6 address '%s' to netip.Prefix", config.AddressIPv6.String()) + return nil, nil, net.IP{}, err + } + requiredAddrs[ra.Addr()] = ra + } + + return requiredAddrs, addr, gateway, nil +} + +func (i *bridgeInterface) programIPv6Addresses(config *networkConfiguration) error { + // Get the IPv6 addresses currently assigned to the bridge, if any. + existingAddrs, err := i.addresses(netlink.FAMILY_V6) if err != nil { - return &IPv6AddrAddError{IP: i.bridgeIPv6, Err: fmt.Errorf("failed to retrieve address list: %v", err)} + return errdefs.System(err) } - nlAddr := netlink.Addr{IPNet: i.bridgeIPv6} - if findIPv6Address(nlAddr, nlAddressList) { - return nil + + // Get the list of required IPv6 addresses for this bridge. + var requiredAddrs map[netip.Addr]netip.Prefix + requiredAddrs, i.bridgeIPv6, i.gatewayIPv6, err = getRequiredIPv6Addrs(config) + if err != nil { + return errdefs.System(err) } - if err := i.nlh.AddrAdd(i.Link, &nlAddr); err != nil { - return &IPv6AddrAddError{IP: i.bridgeIPv6, Err: err} + + // Remove addresses that aren't required. + for _, existingAddr := range existingAddrs { + ea, ok := netip.AddrFromSlice(existingAddr.IP) + if !ok { + return errdefs.System(fmt.Errorf("Failed to convert IPv6 address '%s' to netip.Addr", config.AddressIPv6)) + } + // Ignore the prefix length when comparing addresses, it's informational + // (RFC-5942 section 4), and removing/re-adding an address that's still valid + // would disrupt traffic on live-restore. + if _, required := requiredAddrs[ea]; !required { + err := i.nlh.AddrDel(i.Link, &existingAddr) //#nosec G601 -- Memory aliasing is not an issue in practice as the &existingAddr pointer is not retained by the callee after the AddrDel() call returns. + if err != nil { + log.G(context.TODO()).WithFields(log.Fields{"error": err, "address": existingAddr.IPNet}).Warnf("Failed to remove residual IPv6 address from bridge") + } + } + } + // Add or update required addresses. + for _, addrPrefix := range requiredAddrs { + // Using AddrReplace(), rather than AddrAdd(). When the subnet is changed for an + // existing bridge in a way that doesn't affect the bridge's assigned address, + // the old address has not been removed at this point - because that would be + // service-affecting for a running container. + // + // But if, for example, 'fixed-cidr-v6' is changed from '2000:dbe::/64' to + // '2000:dbe::/80', the default bridge will still be assigned address + // '2000:dbe::1'. In the output of 'ip a', the prefix length is displayed - and + // the user is likely to expect to see it updated from '64' to '80'. + // Unfortunately, 'netlink.AddrReplace()' ('RTM_NEWADDR' with 'NLM_F_REPLACE') + // doesn't update the prefix length. This is a cosmetic problem, the prefix + // length of an assigned address is not used to determine whether an address is + // "on-link" (RFC-5942). + if err := i.nlh.AddrReplace(i.Link, &netlink.Addr{IPNet: netiputil.ToIPNet(addrPrefix)}); err != nil { + return errdefs.System(fmt.Errorf("failed to add IPv6 address %s to bridge: %v", i.bridgeIPv6, err)) + } } return nil } diff --git a/libnetwork/drivers/bridge/interface_linux_test.go b/libnetwork/drivers/bridge/interface_linux_test.go index d7859131d0..734c69b4dd 100644 --- a/libnetwork/drivers/bridge/interface_linux_test.go +++ b/libnetwork/drivers/bridge/interface_linux_test.go @@ -1,12 +1,43 @@ package bridge import ( + "net" + "net/netip" + "strings" "testing" "github.com/docker/docker/internal/testutils/netnsutils" + "github.com/google/go-cmp/cmp" "github.com/vishvananda/netlink" + "gotest.tools/v3/assert" + is "gotest.tools/v3/assert/cmp" ) +func cidrToIPNet(t *testing.T, cidr string) *net.IPNet { + t.Helper() + ip, ipNet, err := net.ParseCIDR(cidr) + assert.Assert(t, is.Nil(err)) + return &net.IPNet{IP: ip, Mask: ipNet.Mask} +} + +func addAddr(t *testing.T, link netlink.Link, addr string) { + t.Helper() + ipNet := cidrToIPNet(t, addr) + err := netlink.AddrAdd(link, &netlink.Addr{IPNet: ipNet}) + assert.Assert(t, is.Nil(err)) +} + +func prepTestBridge(t *testing.T, nc *networkConfiguration) *bridgeInterface { + t.Helper() + nh, err := netlink.NewHandle() + assert.Assert(t, err) + i, err := newInterface(nh, nc) + assert.Assert(t, err) + err = setupDevice(nc, i) + assert.Assert(t, err) + return i +} + func TestInterfaceDefaultName(t *testing.T) { defer netnsutils.SetupTestOSContext(t)() @@ -16,35 +47,142 @@ func TestInterfaceDefaultName(t *testing.T) { } config := &networkConfiguration{} _, err = newInterface(nh, config) - if err != nil { - t.Fatalf("newInterface() failed: %v", err) - } + assert.Check(t, err) + assert.Equal(t, config.BridgeName, DefaultBridgeName) +} - if config.BridgeName != DefaultBridgeName { - t.Fatalf("Expected default interface name %q, got %q", DefaultBridgeName, config.BridgeName) - } +func TestAddressesNoInterface(t *testing.T) { + i := bridgeInterface{} + addrs, err := i.addresses(netlink.FAMILY_V6) + assert.NilError(t, err) + assert.Check(t, is.Len(addrs, 0)) } func TestAddressesEmptyInterface(t *testing.T) { defer netnsutils.SetupTestOSContext(t)() nh, err := netlink.NewHandle() - if err != nil { - t.Fatal(err) - } + assert.NilError(t, err) + inf, err := newInterface(nh, &networkConfiguration{}) - if err != nil { - t.Fatalf("newInterface() failed: %v", err) + assert.NilError(t, err) + + addrsv4, err := inf.addresses(netlink.FAMILY_V4) + assert.NilError(t, err) + assert.Check(t, is.Len(addrsv4, 0)) + + addrsv6, err := inf.addresses(netlink.FAMILY_V6) + assert.NilError(t, err) + assert.Check(t, is.Len(addrsv6, 0)) +} + +func TestAddressesNonEmptyInterface(t *testing.T) { + defer netnsutils.SetupTestOSContext(t)() + + i := prepTestBridge(t, &networkConfiguration{}) + + const expAddrV4, expAddrV6 = "192.168.1.2/24", "fd00:1234::/64" + addAddr(t, i.Link, expAddrV4) + addAddr(t, i.Link, expAddrV6) + + addrs, err := i.addresses(netlink.FAMILY_V4) + assert.NilError(t, err) + assert.Check(t, is.Len(addrs, 1)) + assert.Equal(t, addrs[0].IPNet.String(), expAddrV4) + + addrs, err = i.addresses(netlink.FAMILY_V6) + assert.NilError(t, err) + assert.Check(t, is.Len(addrs, 1)) + assert.Equal(t, addrs[0].IPNet.String(), expAddrV6) +} + +func TestGetRequiredIPv6Addrs(t *testing.T) { + testcases := []struct { + name string + addressIPv6 string + expReqdAddrs []string + }{ + { + name: "Regular address, expect default link local", + addressIPv6: "2000:3000::1/80", + expReqdAddrs: []string{"fe80::1/64", "2000:3000::1/80"}, + }, + { + name: "Standard link local address only", + addressIPv6: "fe80::1/64", + expReqdAddrs: []string{"fe80::1/64"}, + }, + { + name: "Nonstandard link local address", + addressIPv6: "fe80:abcd::1/42", + expReqdAddrs: []string{"fe80:abcd::1/42", "fe80::1/64"}, + }, } - addrsv4, addrsv6, err := inf.addresses() - if err != nil { - t.Fatalf("Failed to get addresses of default interface: %v", err) - } - if len(addrsv4) != 0 { - t.Fatalf("Default interface has unexpected IPv4: %s", addrsv4) - } - if len(addrsv6) != 0 { - t.Fatalf("Default interface has unexpected IPv6: %v", addrsv6) + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + config := &networkConfiguration{ + AddressIPv6: cidrToIPNet(t, tc.addressIPv6), + } + + expResult := map[netip.Addr]netip.Prefix{} + for _, addr := range tc.expReqdAddrs { + expResult[netip.MustParseAddr(strings.Split(addr, "/")[0])] = netip.MustParsePrefix(addr) + } + + reqd, addr, gw, err := getRequiredIPv6Addrs(config) + assert.Check(t, is.Nil(err)) + assert.Check(t, is.DeepEqual(addr, config.AddressIPv6)) + assert.Check(t, is.DeepEqual(gw, config.AddressIPv6.IP)) + assert.Check(t, is.DeepEqual(reqd, expResult, + cmp.Comparer(func(a, b netip.Prefix) bool { return a == b }))) + }) } } + +func TestProgramIPv6Addresses(t *testing.T) { + defer netnsutils.SetupTestOSContext(t)() + + checkAddrs := func(i *bridgeInterface, expAddrs []string) { + t.Helper() + exp := []netlink.Addr{} + for _, a := range expAddrs { + ipNet := cidrToIPNet(t, a) + exp = append(exp, netlink.Addr{IPNet: ipNet}) + } + actual, err := i.addresses(netlink.FAMILY_V6) + assert.NilError(t, err) + assert.DeepEqual(t, exp, actual) + } + + nc := &networkConfiguration{} + i := prepTestBridge(t, nc) + + // The bridge has no addresses, ask for a regular IPv6 network and expect it to + // be added to the bridge, with the default link local address. + nc.AddressIPv6 = cidrToIPNet(t, "2000:3000::1/64") + err := i.programIPv6Addresses(nc) + assert.NilError(t, err) + checkAddrs(i, []string{"2000:3000::1/64", "fe80::1/64"}) + + // Shrink the subnet of that regular address, the prefix length of the address + // will not be modified - but it's informational-only, the address itself has + // not changed. + nc.AddressIPv6 = cidrToIPNet(t, "2000:3000::1/80") + err = i.programIPv6Addresses(nc) + assert.NilError(t, err) + checkAddrs(i, []string{"2000:3000::1/64", "fe80::1/64"}) + + // Ask for link-local only, by specifying an address with the Link Local prefix. + // The regular address should be removed. + nc.AddressIPv6 = cidrToIPNet(t, "fe80::1/64") + err = i.programIPv6Addresses(nc) + assert.NilError(t, err) + checkAddrs(i, []string{"fe80::1/64"}) + + // Swap the standard link local address for a nonstandard one. + nc.AddressIPv6 = cidrToIPNet(t, "fe80:5555::1/55") + err = i.programIPv6Addresses(nc) + assert.NilError(t, err) + checkAddrs(i, []string{"fe80:5555::1/55", "fe80::1/64"}) +} diff --git a/libnetwork/drivers/bridge/setup_ipv4_linux.go b/libnetwork/drivers/bridge/setup_ipv4_linux.go index 30096014e7..0940745c23 100644 --- a/libnetwork/drivers/bridge/setup_ipv4_linux.go +++ b/libnetwork/drivers/bridge/setup_ipv4_linux.go @@ -37,7 +37,7 @@ func setupBridgeIPv4(config *networkConfiguration, i *bridgeInterface) error { } if !config.InhibitIPv4 { - addrv4List, _, err := i.addresses() + addrv4List, err := i.addresses(netlink.FAMILY_V4) if err != nil { return fmt.Errorf("failed to retrieve bridge interface addresses: %v", err) } diff --git a/libnetwork/drivers/bridge/setup_ipv6_linux.go b/libnetwork/drivers/bridge/setup_ipv6_linux.go index 6cc12926fe..08ce0f4397 100644 --- a/libnetwork/drivers/bridge/setup_ipv6_linux.go +++ b/libnetwork/drivers/bridge/setup_ipv6_linux.go @@ -32,23 +32,9 @@ func setupBridgeIPv6(config *networkConfiguration, i *bridgeInterface) error { } } - // Store bridge network and default gateway - i.bridgeIPv6 = bridgeIPv6 - i.gatewayIPv6 = i.bridgeIPv6.IP - - if err := i.programIPv6Address(); err != nil { - return err - } - - if config.AddressIPv6 == nil { - return nil - } - - // Store the user specified bridge network and network gateway and program it - i.bridgeIPv6 = config.AddressIPv6 - i.gatewayIPv6 = config.AddressIPv6.IP - - if err := i.programIPv6Address(); err != nil { + // Remove unwanted addresses from the bridge, add required addresses, and assign + // values to "i.bridgeIPv6", "i.gatewayIPv6". + if err := i.programIPv6Addresses(config); err != nil { return err } diff --git a/libnetwork/drivers/bridge/setup_verify_linux.go b/libnetwork/drivers/bridge/setup_verify_linux.go index c05fa7e242..a39d750346 100644 --- a/libnetwork/drivers/bridge/setup_verify_linux.go +++ b/libnetwork/drivers/bridge/setup_verify_linux.go @@ -1,28 +1,25 @@ package bridge import ( - "context" "fmt" "strings" - "github.com/containerd/log" "github.com/docker/docker/libnetwork/ns" - "github.com/docker/docker/libnetwork/types" "github.com/vishvananda/netlink" ) -// setupVerifyAndReconcile checks what IP addresses the given i interface has and ensures that they match the passed -// network config. It also removes any extra unicast IPv6 addresses found. -func setupVerifyAndReconcile(config *networkConfiguration, i *bridgeInterface) error { +// setupVerifyAndReconcileIPv4 checks what IPv4 addresses the given i interface has +// and ensures that they match the passed network config. +func setupVerifyAndReconcileIPv4(config *networkConfiguration, i *bridgeInterface) error { // Fetch a slice of IPv4 addresses and a slice of IPv6 addresses from the bridge. - addrsv4, addrsv6, err := i.addresses() + addrsv4, err := i.addresses(netlink.FAMILY_V4) if err != nil { return fmt.Errorf("Failed to verify ip addresses: %v", err) } addrv4, _ := selectIPv4Address(addrsv4, config.AddressIPv4) - // Verify that the bridge does have an IPv4 address. + // Verify that the bridge has an IPv4 address. if !config.Internal && addrv4.IPNet == nil { return &ErrNoIPAddr{} } @@ -32,34 +29,9 @@ func setupVerifyAndReconcile(config *networkConfiguration, i *bridgeInterface) e return &IPv4AddrNoMatchError{IP: addrv4.IP, CfgIP: config.AddressIPv4.IP} } - // Verify that one of the bridge IPv6 addresses matches the requested - // configuration. - if config.EnableIPv6 && !config.Internal && !findIPv6Address(netlink.Addr{IPNet: bridgeIPv6}, addrsv6) { - return (*IPv6AddrNoMatchError)(bridgeIPv6) - } - - // Release any residual IPv6 address that might be there because of older daemon instances - for _, addrv6 := range addrsv6 { - addrv6 := addrv6 - if addrv6.IP.IsGlobalUnicast() && !types.CompareIPNet(addrv6.IPNet, i.bridgeIPv6) { - if err := i.nlh.AddrDel(i.Link, &addrv6); err != nil { - log.G(context.TODO()).Warnf("Failed to remove residual IPv6 address %s from bridge: %v", addrv6.IPNet, err) - } - } - } - return nil } -func findIPv6Address(addr netlink.Addr, addresses []netlink.Addr) bool { - for _, addrv6 := range addresses { - if addrv6.String() == addr.String() { - return true - } - } - return false -} - func bridgeInterfaceExists(name string) (bool, error) { nlh := ns.NlHandle() link, err := nlh.LinkByName(name) diff --git a/libnetwork/drivers/bridge/setup_verify_linux_test.go b/libnetwork/drivers/bridge/setup_verify_linux_test.go index f6850ba202..045f3a1681 100644 --- a/libnetwork/drivers/bridge/setup_verify_linux_test.go +++ b/libnetwork/drivers/bridge/setup_verify_linux_test.go @@ -38,7 +38,7 @@ func TestSetupVerify(t *testing.T) { t.Fatalf("Failed to assign IPv4 %s to interface: %v", config.AddressIPv4, err) } - if err := setupVerifyAndReconcile(config, inf); err != nil { + if err := setupVerifyAndReconcileIPv4(config, inf); err != nil { t.Fatalf("Address verification failed: %v", err) } } @@ -56,7 +56,7 @@ func TestSetupVerifyBad(t *testing.T) { t.Fatalf("Failed to assign IPv4 %s to interface: %v", ipnet, err) } - if err := setupVerifyAndReconcile(config, inf); err == nil { + if err := setupVerifyAndReconcileIPv4(config, inf); err == nil { t.Fatal("Address verification was expected to fail") } } @@ -69,46 +69,7 @@ func TestSetupVerifyMissing(t *testing.T) { config := &networkConfiguration{} config.AddressIPv4 = &net.IPNet{IP: addrv4, Mask: addrv4.DefaultMask()} - if err := setupVerifyAndReconcile(config, inf); err == nil { - t.Fatal("Address verification was expected to fail") - } -} - -func TestSetupVerifyIPv6(t *testing.T) { - defer netnsutils.SetupTestOSContext(t)() - - addrv4 := net.IPv4(192, 168, 1, 1) - inf := setupVerifyTest(t) - config := &networkConfiguration{} - config.AddressIPv4 = &net.IPNet{IP: addrv4, Mask: addrv4.DefaultMask()} - config.EnableIPv6 = true - - if err := netlink.AddrAdd(inf.Link, &netlink.Addr{IPNet: bridgeIPv6}); err != nil { - t.Fatalf("Failed to assign IPv6 %s to interface: %v", bridgeIPv6, err) - } - if err := netlink.AddrAdd(inf.Link, &netlink.Addr{IPNet: config.AddressIPv4}); err != nil { - t.Fatalf("Failed to assign IPv4 %s to interface: %v", config.AddressIPv4, err) - } - - if err := setupVerifyAndReconcile(config, inf); err != nil { - t.Fatalf("Address verification failed: %v", err) - } -} - -func TestSetupVerifyIPv6Missing(t *testing.T) { - defer netnsutils.SetupTestOSContext(t)() - - addrv4 := net.IPv4(192, 168, 1, 1) - inf := setupVerifyTest(t) - config := &networkConfiguration{} - config.AddressIPv4 = &net.IPNet{IP: addrv4, Mask: addrv4.DefaultMask()} - config.EnableIPv6 = true - - if err := netlink.AddrAdd(inf.Link, &netlink.Addr{IPNet: config.AddressIPv4}); err != nil { - t.Fatalf("Failed to assign IPv4 %s to interface: %v", config.AddressIPv4, err) - } - - if err := setupVerifyAndReconcile(config, inf); err == nil { + if err := setupVerifyAndReconcileIPv4(config, inf); err == nil { t.Fatal("Address verification was expected to fail") } } diff --git a/libnetwork/endpoint.go b/libnetwork/endpoint.go index 79a2689366..16e92b547b 100644 --- a/libnetwork/endpoint.go +++ b/libnetwork/endpoint.go @@ -1121,7 +1121,7 @@ func (ep *Endpoint) releaseAddress() { } } - if ep.iface.addrv6 != nil && ep.iface.addrv6.IP.IsGlobalUnicast() { + if ep.iface.addrv6 != nil { if err := ipam.ReleaseAddress(ep.iface.v6PoolID, ep.iface.addrv6.IP); err != nil { log.G(context.TODO()).Warnf("Failed to release ip address %s on delete of endpoint %s (%s): %v", ep.iface.addrv6.IP, ep.Name(), ep.ID(), err) } diff --git a/libnetwork/ipam/utils.go b/libnetwork/internal/netiputil/netiputil.go similarity index 54% rename from libnetwork/ipam/utils.go rename to libnetwork/internal/netiputil/netiputil.go index 4e7d72c2e0..76cb5922a3 100644 --- a/libnetwork/ipam/utils.go +++ b/libnetwork/internal/netiputil/netiputil.go @@ -1,4 +1,4 @@ -package ipam +package netiputil import ( "net" @@ -7,7 +7,8 @@ import ( "github.com/docker/docker/libnetwork/ipbits" ) -func toIPNet(p netip.Prefix) *net.IPNet { +// ToIPNet converts p into a *net.IPNet, returning nil if p is not valid. +func ToIPNet(p netip.Prefix) *net.IPNet { if !p.IsValid() { return nil } @@ -17,7 +18,9 @@ func toIPNet(p netip.Prefix) *net.IPNet { } } -func toPrefix(n *net.IPNet) (netip.Prefix, bool) { +// ToPrefix converts n into a netip.Prefix. If n is not a valid IPv4 or IPV6 +// address, ToPrefix returns netip.Prefix{}, false. +func ToPrefix(n *net.IPNet) (netip.Prefix, bool) { if ll := len(n.Mask); ll != net.IPv4len && ll != net.IPv6len { return netip.Prefix{}, false } @@ -35,14 +38,16 @@ func toPrefix(n *net.IPNet) (netip.Prefix, bool) { return netip.PrefixFrom(addr.Unmap(), ones), true } -func hostID(addr netip.Addr, bits uint) uint64 { +// HostID masks out the 'bits' most-significant bits of addr. The result is +// undefined if bits > addr.BitLen(). +func HostID(addr netip.Addr, bits uint) uint64 { return ipbits.Field(addr, bits, uint(addr.BitLen())) } -// subnetRange returns the amount to add to network.Addr() in order to yield the +// SubnetRange returns the amount to add to network.Addr() in order to yield the // first and last addresses in subnet, respectively. -func subnetRange(network, subnet netip.Prefix) (start, end uint64) { - start = hostID(subnet.Addr(), uint(network.Bits())) +func SubnetRange(network, subnet netip.Prefix) (start, end uint64) { + start = HostID(subnet.Addr(), uint(network.Bits())) end = start + (1 << uint64(subnet.Addr().BitLen()-subnet.Bits())) - 1 return start, end } diff --git a/libnetwork/ipam/allocator.go b/libnetwork/ipam/allocator.go index 55b51643d1..79376c49bb 100644 --- a/libnetwork/ipam/allocator.go +++ b/libnetwork/ipam/allocator.go @@ -9,6 +9,7 @@ import ( "github.com/containerd/log" "github.com/docker/docker/libnetwork/bitmap" + "github.com/docker/docker/libnetwork/internal/netiputil" "github.com/docker/docker/libnetwork/ipamapi" "github.com/docker/docker/libnetwork/ipbits" "github.com/docker/docker/libnetwork/types" @@ -46,7 +47,7 @@ func newAddrSpace(predefined []*net.IPNet) (*addrSpace, error) { pdf := make([]netip.Prefix, len(predefined)) for i, n := range predefined { var ok bool - pdf[i], ok = toPrefix(n) + pdf[i], ok = netiputil.ToPrefix(n) if !ok { return nil, fmt.Errorf("network at index %d (%v) is not in canonical form", i, n) } @@ -91,7 +92,7 @@ func (a *Allocator) RequestPool(addressSpace, requestedPool, requestedSubPool st if err != nil { return "", nil, nil, err } - return k.String(), toIPNet(k.Subnet), nil, nil + return k.String(), netiputil.ToIPNet(k.Subnet), nil, nil } if k.Subnet, err = netip.ParsePrefix(requestedPool); err != nil { @@ -119,7 +120,7 @@ func (a *Allocator) RequestPool(addressSpace, requestedPool, requestedSubPool st return "", nil, nil, err } - return k.String(), toIPNet(k.Subnet), nil, nil + return k.String(), netiputil.ToIPNet(k.Subnet), nil, nil } // ReleasePool releases the address pool identified by the passed id @@ -336,7 +337,7 @@ func (aSpace *addrSpace) releaseAddress(nw, sub netip.Prefix, address netip.Addr defer log.G(context.TODO()).Debugf("Released address Address:%v Sequence:%s", address, p.addrs) - return p.addrs.Unset(hostID(address, uint(nw.Bits()))) + return p.addrs.Unset(netiputil.HostID(address, uint(nw.Bits()))) } func getAddress(base netip.Prefix, bitmask *bitmap.Bitmap, prefAddress netip.Addr, ipr netip.Prefix, serial bool) (netip.Addr, error) { @@ -353,10 +354,10 @@ func getAddress(base netip.Prefix, bitmask *bitmap.Bitmap, prefAddress netip.Add if ipr == (netip.Prefix{}) && prefAddress == (netip.Addr{}) { ordinal, err = bitmask.SetAny(serial) } else if prefAddress != (netip.Addr{}) { - ordinal = hostID(prefAddress, uint(base.Bits())) + ordinal = netiputil.HostID(prefAddress, uint(base.Bits())) err = bitmask.Set(ordinal) } else { - start, end := subnetRange(base, ipr) + start, end := netiputil.SubnetRange(base, ipr) ordinal, err = bitmask.SetAnyInRange(start, end, serial) }