소스 검색

Detect IPv6 support in containers.

Some configuration in a container depends on whether it has support for
IPv6 (including default entries for '::1' etc in '/etc/hosts').

Before this change, the container's support for IPv6 was determined by
whether it was connected to any IPv6-enabled networks. But, that can
change over time, it isn't a property of the container itself.

So, instead, detect IPv6 support by looking for '::1' on the container's
loopback interface. It will not be present if the kernel does not have
IPv6 support, or the user has disabled it in new namespaces by other
means.

Once IPv6 support has been determined for the container, its '/etc/hosts'
is re-generated accordingly.

The daemon no longer disables IPv6 on all interfaces during initialisation.
It now disables IPv6 only for interfaces that have not been assigned an
IPv6 address. (But, even if IPv6 is disabled for the container using the
sysctl 'net.ipv6.conf.all.disable_ipv6=1', interfaces connected to IPv6
networks still get IPv6 addresses that appear in the internal DNS. There's
more to-do!)

Signed-off-by: Rob Murray <rob.murray@docker.com>
Rob Murray 1 년 전
부모
커밋
a8f7c5ee48

+ 8 - 0
integration/internal/container/ops.go

@@ -1,6 +1,7 @@
 package container
 package container
 
 
 import (
 import (
+	"maps"
 	"strings"
 	"strings"
 
 
 	"github.com/docker/docker/api/types/container"
 	"github.com/docker/docker/api/types/container"
@@ -46,6 +47,13 @@ func WithNetworkMode(mode string) func(*TestContainerConfig) {
 	}
 	}
 }
 }
 
 
+// WithSysctls sets sysctl options for the container
+func WithSysctls(sysctls map[string]string) func(*TestContainerConfig) {
+	return func(c *TestContainerConfig) {
+		c.HostConfig.Sysctls = maps.Clone(sysctls)
+	}
+}
+
 // WithExposedPorts sets the exposed ports of the container
 // WithExposedPorts sets the exposed ports of the container
 func WithExposedPorts(ports ...string) func(*TestContainerConfig) {
 func WithExposedPorts(ports ...string) func(*TestContainerConfig) {
 	return func(c *TestContainerConfig) {
 	return func(c *TestContainerConfig) {

+ 107 - 0
integration/networking/etchosts_test.go

@@ -0,0 +1,107 @@
+package networking
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	containertypes "github.com/docker/docker/api/types/container"
+	"github.com/docker/docker/integration/internal/container"
+	"github.com/docker/docker/testutil"
+	"github.com/docker/docker/testutil/daemon"
+	"gotest.tools/v3/assert"
+	is "gotest.tools/v3/assert/cmp"
+	"gotest.tools/v3/skip"
+)
+
+// Check that the '/etc/hosts' file in a container is created according to
+// whether the container supports IPv6.
+// Regression test for https://github.com/moby/moby/issues/35954
+func TestEtcHostsIpv6(t *testing.T) {
+	skip.If(t, testEnv.DaemonInfo.OSType == "windows")
+
+	ctx := setupTest(t)
+	d := daemon.New(t)
+	d.StartWithBusybox(ctx, t,
+		"--ipv6",
+		"--ip6tables",
+		"--experimental",
+		"--fixed-cidr-v6=fdc8:ffe2:d8d7:1234::/64")
+	defer d.Stop(t)
+
+	c := d.NewClientT(t)
+	defer c.Close()
+
+	testcases := []struct {
+		name           string
+		sysctls        map[string]string
+		expIPv6Enabled bool
+		expEtcHosts    string
+	}{
+		{
+			// Create a container with no overrides, on the IPv6-enabled default bridge.
+			// Expect the container to have a working '::1' address, on the assumption
+			// the test host's kernel supports IPv6 - and for its '/etc/hosts' file to
+			// include IPv6 addresses.
+			name:           "IPv6 enabled",
+			expIPv6Enabled: true,
+			expEtcHosts: `127.0.0.1	localhost
+::1	localhost ip6-localhost ip6-loopback
+fe00::0	ip6-localnet
+ff00::0	ip6-mcastprefix
+ff02::1	ip6-allnodes
+ff02::2	ip6-allrouters
+`,
+		},
+		{
+			// Create a container in the same network, with IPv6 disabled. Expect '::1'
+			// not to be pingable, and no IPv6 addresses in its '/etc/hosts'.
+			name:           "IPv6 disabled",
+			sysctls:        map[string]string{"net.ipv6.conf.all.disable_ipv6": "1"},
+			expIPv6Enabled: false,
+			expEtcHosts:    "127.0.0.1\tlocalhost\n",
+		},
+	}
+
+	for _, tc := range testcases {
+		t.Run(tc.name, func(t *testing.T) {
+			ctx := testutil.StartSpan(ctx, t)
+			ctrId := container.Run(ctx, t, c,
+				container.WithName("etchosts_"+sanitizeCtrName(t.Name())),
+				container.WithImage("busybox:latest"),
+				container.WithCmd("top"),
+				container.WithSysctls(tc.sysctls),
+			)
+			defer func() {
+				c.ContainerRemove(ctx, ctrId, containertypes.RemoveOptions{Force: true})
+			}()
+
+			runCmd := func(ctrId string, cmd []string, expExitCode int) string {
+				t.Helper()
+				execCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
+				defer cancel()
+				res, err := container.Exec(execCtx, c, ctrId, cmd)
+				assert.Check(t, is.Nil(err))
+				assert.Check(t, is.Equal(res.ExitCode, expExitCode))
+				return res.Stdout()
+			}
+
+			// Check that IPv6 is/isn't enabled, as expected.
+			var expPingExitStatus int
+			if !tc.expIPv6Enabled {
+				expPingExitStatus = 1
+			}
+			runCmd(ctrId, []string{"ping", "-6", "-c1", "-W3", "::1"}, expPingExitStatus)
+
+			// Check the contents of /etc/hosts.
+			stdout := runCmd(ctrId, []string{"cat", "/etc/hosts"}, 0)
+			// Append the container's own addresses/name to the expected hosts file content.
+			inspect := container.Inspect(ctx, t, c, ctrId)
+			exp := tc.expEtcHosts + inspect.NetworkSettings.IPAddress + "\t" + inspect.Config.Hostname + "\n"
+			if tc.expIPv6Enabled {
+				exp += inspect.NetworkSettings.GlobalIPv6Address + "\t" + inspect.Config.Hostname + "\n"
+			}
+			assert.Check(t, is.Equal(stdout, exp))
+		})
+	}
+}

+ 2 - 25
libnetwork/drivers/bridge/port_mapping_linux.go

@@ -6,9 +6,9 @@ import (
 	"errors"
 	"errors"
 	"fmt"
 	"fmt"
 	"net"
 	"net"
-	"sync"
 
 
 	"github.com/containerd/log"
 	"github.com/containerd/log"
+	"github.com/docker/docker/libnetwork/netutils"
 	"github.com/docker/docker/libnetwork/types"
 	"github.com/docker/docker/libnetwork/types"
 	"github.com/ishidawataru/sctp"
 	"github.com/ishidawataru/sctp"
 )
 )
@@ -55,7 +55,7 @@ func (n *bridgeNetwork) allocatePortsInternal(bindings []types.PortBinding, cont
 		// skip adding implicit v6 addr, when the kernel was booted with `ipv6.disable=1`
 		// skip adding implicit v6 addr, when the kernel was booted with `ipv6.disable=1`
 		// https://github.com/moby/moby/issues/42288
 		// https://github.com/moby/moby/issues/42288
 		isV6Binding := c.HostIP != nil && c.HostIP.To4() == nil
 		isV6Binding := c.HostIP != nil && c.HostIP.To4() == nil
-		if !isV6Binding && !IsV6Listenable() {
+		if !isV6Binding && !netutils.IsV6Listenable() {
 			continue
 			continue
 		}
 		}
 
 
@@ -219,26 +219,3 @@ func (n *bridgeNetwork) releasePort(bnd types.PortBinding) error {
 
 
 	return portmapper.Unmap(host)
 	return portmapper.Unmap(host)
 }
 }
-
-var (
-	v6ListenableCached bool
-	v6ListenableOnce   sync.Once
-)
-
-// IsV6Listenable returns true when `[::1]:0` is listenable.
-// IsV6Listenable returns false mostly when the kernel was booted with `ipv6.disable=1` option.
-func IsV6Listenable() bool {
-	v6ListenableOnce.Do(func() {
-		ln, err := net.Listen("tcp6", "[::1]:0")
-		if err != nil {
-			// When the kernel was booted with `ipv6.disable=1`,
-			// we get err "listen tcp6 [::1]:0: socket: address family not supported by protocol"
-			// https://github.com/moby/moby/issues/42288
-			log.G(context.TODO()).Debugf("port_mapping: v6Listenable=false (%v)", err)
-		} else {
-			v6ListenableCached = true
-			ln.Close()
-		}
-	})
-	return v6ListenableCached
-}

+ 13 - 25
libnetwork/endpoint.go

@@ -478,18 +478,8 @@ func (ep *Endpoint) sbJoin(sb *Sandbox, options ...EndpointOption) (err error) {
 		}
 		}
 	}
 	}
 
 
-	// Do not update hosts file with internal networks endpoint IP
-	if !n.ingress && n.Name() != libnGWNetwork {
-		var addresses []string
-		if ip := ep.getFirstInterfaceIPv4Address(); ip != nil {
-			addresses = append(addresses, ip.String())
-		}
-		if ip := ep.getFirstInterfaceIPv6Address(); ip != nil {
-			addresses = append(addresses, ip.String())
-		}
-		if err = sb.updateHostsFile(addresses); err != nil {
-			return err
-		}
+	if err := sb.updateHostsFile(ep.getEtcHostsAddrs()); err != nil {
+		return err
 	}
 	}
 	if err = sb.updateDNS(n.enableIPv6); err != nil {
 	if err = sb.updateDNS(n.enableIPv6); err != nil {
 		return err
 		return err
@@ -860,26 +850,24 @@ func (ep *Endpoint) getSandbox() (*Sandbox, bool) {
 	return ps, ok
 	return ps, ok
 }
 }
 
 
-func (ep *Endpoint) getFirstInterfaceIPv4Address() net.IP {
+// Return a list of this endpoint's addresses to add to '/etc/hosts'.
+func (ep *Endpoint) getEtcHostsAddrs() []string {
 	ep.mu.Lock()
 	ep.mu.Lock()
 	defer ep.mu.Unlock()
 	defer ep.mu.Unlock()
 
 
-	if ep.iface.addr != nil {
-		return ep.iface.addr.IP
+	// Do not update hosts file with internal network's endpoint IP
+	if n := ep.network; n == nil || n.ingress || n.Name() == libnGWNetwork {
+		return nil
 	}
 	}
 
 
-	return nil
-}
-
-func (ep *Endpoint) getFirstInterfaceIPv6Address() net.IP {
-	ep.mu.Lock()
-	defer ep.mu.Unlock()
-
+	var addresses []string
+	if ep.iface.addr != nil {
+		addresses = append(addresses, ep.iface.addr.IP.String())
+	}
 	if ep.iface.addrv6 != nil {
 	if ep.iface.addrv6 != nil {
-		return ep.iface.addrv6.IP
+		addresses = append(addresses, ep.iface.addrv6.IP.String())
 	}
 	}
-
-	return nil
+	return addresses
 }
 }
 
 
 // EndpointOptionGeneric function returns an option setter for a Generic option defined
 // EndpointOptionGeneric function returns an option setter for a Generic option defined

+ 39 - 14
libnetwork/etchosts/etchosts.go

@@ -5,6 +5,7 @@ import (
 	"bytes"
 	"bytes"
 	"fmt"
 	"fmt"
 	"io"
 	"io"
+	"net/netip"
 	"os"
 	"os"
 	"regexp"
 	"regexp"
 	"strings"
 	"strings"
@@ -25,8 +26,10 @@ func (r Record) WriteTo(w io.Writer) (int64, error) {
 
 
 var (
 var (
 	// Default hosts config records slice
 	// Default hosts config records slice
-	defaultContent = []Record{
+	defaultContentIPv4 = []Record{
 		{Hosts: "localhost", IP: "127.0.0.1"},
 		{Hosts: "localhost", IP: "127.0.0.1"},
+	}
+	defaultContentIPv6 = []Record{
 		{Hosts: "localhost ip6-localhost ip6-loopback", IP: "::1"},
 		{Hosts: "localhost ip6-localhost ip6-loopback", IP: "::1"},
 		{Hosts: "ip6-localnet", IP: "fe00::0"},
 		{Hosts: "ip6-localnet", IP: "fe00::0"},
 		{Hosts: "ip6-mcastprefix", IP: "ff00::0"},
 		{Hosts: "ip6-mcastprefix", IP: "ff00::0"},
@@ -71,9 +74,34 @@ func Drop(path string) {
 // IP, hostname, and domainname set main record leave empty for no master record
 // IP, hostname, and domainname set main record leave empty for no master record
 // extraContent is an array of extra host records.
 // extraContent is an array of extra host records.
 func Build(path, IP, hostname, domainname string, extraContent []Record) error {
 func Build(path, IP, hostname, domainname string, extraContent []Record) error {
+	return build(path, IP, hostname, domainname, defaultContentIPv4, defaultContentIPv6, extraContent)
+}
+
+// BuildNoIPv6 is the same as Build, but will not include IPv6 entries.
+func BuildNoIPv6(path, IP, hostname, domainname string, extraContent []Record) error {
+	if isIPv6(IP) {
+		IP = ""
+	}
+
+	var ipv4ExtraContent []Record
+	for _, rec := range extraContent {
+		if !isIPv6(rec.IP) {
+			ipv4ExtraContent = append(ipv4ExtraContent, rec)
+		}
+	}
+
+	return build(path, IP, hostname, domainname, defaultContentIPv4, ipv4ExtraContent)
+}
+
+func isIPv6(s string) bool {
+	addr, err := netip.ParseAddr(s)
+	return err == nil && addr.Is6()
+}
+
+func build(path, IP, hostname, domainname string, contents ...[]Record) error {
 	defer pathLock(path)()
 	defer pathLock(path)()
 
 
-	content := bytes.NewBuffer(nil)
+	buf := bytes.NewBuffer(nil)
 	if IP != "" {
 	if IP != "" {
 		// set main record
 		// set main record
 		var mainRec Record
 		var mainRec Record
@@ -89,24 +117,21 @@ func Build(path, IP, hostname, domainname string, extraContent []Record) error {
 		if hostName, _, ok := strings.Cut(fqdn, "."); ok {
 		if hostName, _, ok := strings.Cut(fqdn, "."); ok {
 			mainRec.Hosts += " " + hostName
 			mainRec.Hosts += " " + hostName
 		}
 		}
-		if _, err := mainRec.WriteTo(content); err != nil {
-			return err
-		}
-	}
-	// Write defaultContent slice to buffer
-	for _, r := range defaultContent {
-		if _, err := r.WriteTo(content); err != nil {
+		if _, err := mainRec.WriteTo(buf); err != nil {
 			return err
 			return err
 		}
 		}
 	}
 	}
-	// Write extra content from function arguments
-	for _, r := range extraContent {
-		if _, err := r.WriteTo(content); err != nil {
-			return err
+
+	// Write content from function arguments
+	for _, content := range contents {
+		for _, c := range content {
+			if _, err := c.WriteTo(buf); err != nil {
+				return err
+			}
 		}
 		}
 	}
 	}
 
 
-	return os.WriteFile(path, content.Bytes(), 0o644)
+	return os.WriteFile(path, buf.Bytes(), 0o644)
 }
 }
 
 
 // Add adds an arbitrary number of Records to an already existing /etc/hosts file
 // Add adds an arbitrary number of Records to an already existing /etc/hosts file

+ 23 - 0
libnetwork/etchosts/etchosts_test.go

@@ -4,9 +4,12 @@ import (
 	"bytes"
 	"bytes"
 	"fmt"
 	"fmt"
 	"os"
 	"os"
+	"path/filepath"
 	"testing"
 	"testing"
 
 
 	"golang.org/x/sync/errgroup"
 	"golang.org/x/sync/errgroup"
+	"gotest.tools/v3/assert"
+	is "gotest.tools/v3/assert/cmp"
 )
 )
 
 
 func TestBuildDefault(t *testing.T) {
 func TestBuildDefault(t *testing.T) {
@@ -35,6 +38,26 @@ func TestBuildDefault(t *testing.T) {
 	}
 	}
 }
 }
 
 
+func TestBuildNoIPv6(t *testing.T) {
+	d := t.TempDir()
+	filename := filepath.Join(d, "hosts")
+
+	err := BuildNoIPv6(filename, "fdbb:c59c:d015::2", "an.example", "", []Record{
+		{
+			Hosts: "another.example",
+			IP:    "fdbb:c59c:d015::3",
+		},
+		{
+			Hosts: "another.example",
+			IP:    "10.11.12.13",
+		},
+	})
+	assert.NilError(t, err)
+	content, err := os.ReadFile(filename)
+	assert.NilError(t, err)
+	assert.Check(t, is.DeepEqual(string(content), "127.0.0.1\tlocalhost\n10.11.12.13\tanother.example\n"))
+}
+
 func TestBuildHostnameDomainname(t *testing.T) {
 func TestBuildHostnameDomainname(t *testing.T) {
 	file, err := os.CreateTemp("", "")
 	file, err := os.CreateTemp("", "")
 	if err != nil {
 	if err != nil {

+ 26 - 0
libnetwork/netutils/utils.go

@@ -3,6 +3,7 @@
 package netutils
 package netutils
 
 
 import (
 import (
+	"context"
 	"crypto/rand"
 	"crypto/rand"
 	"encoding/hex"
 	"encoding/hex"
 	"errors"
 	"errors"
@@ -10,7 +11,9 @@ import (
 	"io"
 	"io"
 	"net"
 	"net"
 	"strings"
 	"strings"
+	"sync"
 
 
+	"github.com/containerd/log"
 	"github.com/docker/docker/libnetwork/types"
 	"github.com/docker/docker/libnetwork/types"
 )
 )
 
 
@@ -144,3 +147,26 @@ func ReverseIP(IP string) string {
 
 
 	return strings.Join(reverseIP, ".")
 	return strings.Join(reverseIP, ".")
 }
 }
+
+var (
+	v6ListenableCached bool
+	v6ListenableOnce   sync.Once
+)
+
+// IsV6Listenable returns true when `[::1]:0` is listenable.
+// IsV6Listenable returns false mostly when the kernel was booted with `ipv6.disable=1` option.
+func IsV6Listenable() bool {
+	v6ListenableOnce.Do(func() {
+		ln, err := net.Listen("tcp6", "[::1]:0")
+		if err != nil {
+			// When the kernel was booted with `ipv6.disable=1`,
+			// we get err "listen tcp6 [::1]:0: socket: address family not supported by protocol"
+			// https://github.com/moby/moby/issues/42288
+			log.G(context.TODO()).Debugf("v6Listenable=false (%v)", err)
+		} else {
+			v6ListenableCached = true
+			ln.Close()
+		}
+	})
+	return v6ListenableCached
+}

+ 0 - 4
libnetwork/osl/interface_linux.go

@@ -257,8 +257,6 @@ func (n *Namespace) AddInterface(srcName, dstPrefix string, options ...IfaceOpti
 	n.iFaces = append(n.iFaces, i)
 	n.iFaces = append(n.iFaces, i)
 	n.mu.Unlock()
 	n.mu.Unlock()
 
 
-	n.checkLoV6()
-
 	return nil
 	return nil
 }
 }
 
 
@@ -311,8 +309,6 @@ func (n *Namespace) RemoveInterface(i *Interface) error {
 	}
 	}
 	n.mu.Unlock()
 	n.mu.Unlock()
 
 
-	// TODO(aker): This function will disable IPv6 on lo interface if the removed interface was the last one offering IPv6 connectivity. That's a weird behavior, and shouldn't be hiding this deep down in this function.
-	n.checkLoV6()
 	return nil
 	return nil
 }
 }
 
 

+ 30 - 52
libnetwork/osl/namespace_linux.go

@@ -20,6 +20,7 @@ import (
 	"github.com/docker/docker/libnetwork/osl/kernel"
 	"github.com/docker/docker/libnetwork/osl/kernel"
 	"github.com/docker/docker/libnetwork/types"
 	"github.com/docker/docker/libnetwork/types"
 	"github.com/vishvananda/netlink"
 	"github.com/vishvananda/netlink"
+	"github.com/vishvananda/netlink/nl"
 	"github.com/vishvananda/netns"
 	"github.com/vishvananda/netns"
 	"golang.org/x/sys/unix"
 	"golang.org/x/sys/unix"
 )
 )
@@ -206,16 +207,6 @@ func NewSandbox(key string, osCreate, isRestore bool) (*Namespace, error) {
 	if err != nil {
 	if err != nil {
 		log.G(context.TODO()).Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err)
 		log.G(context.TODO()).Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err)
 	}
 	}
-	// In live-restore mode, IPV6 entries are getting cleaned up due to below code
-	// We should retain IPV6 configurations in live-restore mode when Docker Daemon
-	// comes back. It should work as it is on other cases
-	// As starting point, disable IPv6 on all interfaces
-	if !isRestore && !n.isDefault {
-		err = setIPv6(n.path, "all", false)
-		if err != nil {
-			log.G(context.TODO()).Warnf("Failed to disable IPv6 on all interfaces on network namespace %q: %v", n.path, err)
-		}
-	}
 
 
 	if err = n.loopbackUp(); err != nil {
 	if err = n.loopbackUp(); err != nil {
 		n.nlHandle.Close()
 		n.nlHandle.Close()
@@ -260,12 +251,6 @@ func GetSandboxForExternalKey(basePath string, key string) (*Namespace, error) {
 		log.G(context.TODO()).Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err)
 		log.G(context.TODO()).Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err)
 	}
 	}
 
 
-	// As starting point, disable IPv6 on all interfaces
-	err = setIPv6(n.path, "all", false)
-	if err != nil {
-		log.G(context.TODO()).Warnf("Failed to disable IPv6 on all interfaces on network namespace %q: %v", n.path, err)
-	}
-
 	if err = n.loopbackUp(); err != nil {
 	if err = n.loopbackUp(); err != nil {
 		n.nlHandle.Close()
 		n.nlHandle.Close()
 		return nil, err
 		return nil, err
@@ -325,17 +310,18 @@ func createNamespaceFile(path string) error {
 // or sets the gateway etc. It holds a list of Interfaces, routes etc., and more
 // or sets the gateway etc. It holds a list of Interfaces, routes etc., and more
 // can be added dynamically.
 // can be added dynamically.
 type Namespace struct {
 type Namespace struct {
-	path         string
-	iFaces       []*Interface
-	gw           net.IP
-	gwv6         net.IP
-	staticRoutes []*types.StaticRoute
-	neighbors    []*neigh
-	nextIfIndex  map[string]int
-	isDefault    bool
-	nlHandle     *netlink.Handle
-	loV6Enabled  bool
-	mu           sync.Mutex
+	path                string
+	iFaces              []*Interface
+	gw                  net.IP
+	gwv6                net.IP
+	staticRoutes        []*types.StaticRoute
+	neighbors           []*neigh
+	nextIfIndex         map[string]int
+	isDefault           bool
+	ipv6LoEnabledOnce   sync.Once
+	ipv6LoEnabledCached bool
+	nlHandle            *netlink.Handle
+	mu                  sync.Mutex
 }
 }
 
 
 // Interfaces returns the collection of Interface previously added with the AddInterface
 // Interfaces returns the collection of Interface previously added with the AddInterface
@@ -559,32 +545,24 @@ func (n *Namespace) Restore(interfaces map[Iface][]IfaceOption, routes []*types.
 	return nil
 	return nil
 }
 }
 
 
-// Checks whether IPv6 needs to be enabled/disabled on the loopback interface
-func (n *Namespace) checkLoV6() {
-	var (
-		enable = false
-		action = "disable"
-	)
-
-	n.mu.Lock()
-	for _, iface := range n.iFaces {
-		if iface.AddressIPv6() != nil {
-			enable = true
-			action = "enable"
-			break
+// IPv6LoEnabled checks whether the loopback interface has an IPv6 address ('::1'
+// is assigned by the kernel if IPv6 is enabled).
+func (n *Namespace) IPv6LoEnabled() bool {
+	n.ipv6LoEnabledOnce.Do(func() {
+		// If anything goes wrong, assume no-IPv6.
+		iface, err := n.nlHandle.LinkByName("lo")
+		if err != nil {
+			log.G(context.TODO()).WithError(err).Warn("Unable to find 'lo' to determine IPv6 support")
+			return
 		}
 		}
-	}
-	n.mu.Unlock()
-
-	if n.loV6Enabled == enable {
-		return
-	}
-
-	if err := setIPv6(n.path, "lo", enable); err != nil {
-		log.G(context.TODO()).Warnf("Failed to %s IPv6 on loopback interface on network namespace %q: %v", action, n.path, err)
-	}
-
-	n.loV6Enabled = enable
+		addrs, err := n.nlHandle.AddrList(iface, nl.FAMILY_V6)
+		if err != nil {
+			log.G(context.TODO()).WithError(err).Warn("Unable to get 'lo' addresses to determine IPv6 support")
+			return
+		}
+		n.ipv6LoEnabledCached = len(addrs) > 0
+	})
+	return n.ipv6LoEnabledCached
 }
 }
 
 
 // ApplyOSTweaks applies operating system specific knobs on the sandbox.
 // ApplyOSTweaks applies operating system specific knobs on the sandbox.

+ 56 - 13
libnetwork/sandbox_dns_unix.go

@@ -7,6 +7,7 @@ import (
 	"context"
 	"context"
 	"fmt"
 	"fmt"
 	"net"
 	"net"
+	"net/netip"
 	"os"
 	"os"
 	"path"
 	"path"
 	"path/filepath"
 	"path/filepath"
@@ -14,6 +15,7 @@ import (
 	"strings"
 	"strings"
 
 
 	"github.com/containerd/log"
 	"github.com/containerd/log"
+	"github.com/docker/docker/errdefs"
 	"github.com/docker/docker/libnetwork/etchosts"
 	"github.com/docker/docker/libnetwork/etchosts"
 	"github.com/docker/docker/libnetwork/resolvconf"
 	"github.com/docker/docker/libnetwork/resolvconf"
 	"github.com/docker/docker/libnetwork/types"
 	"github.com/docker/docker/libnetwork/types"
@@ -27,6 +29,21 @@ const (
 	resolverIPSandbox = "127.0.0.11"
 	resolverIPSandbox = "127.0.0.11"
 )
 )
 
 
+// finishInitDNS is to be called after the container namespace has been created,
+// before it the user process is started. The container's support for IPv6 can be
+// determined at this point.
+func (sb *Sandbox) finishInitDNS() error {
+	if err := sb.buildHostsFile(); err != nil {
+		return errdefs.System(err)
+	}
+	for _, ep := range sb.Endpoints() {
+		if err := sb.updateHostsFile(ep.getEtcHostsAddrs()); err != nil {
+			return errdefs.System(err)
+		}
+	}
+	return nil
+}
+
 func (sb *Sandbox) startResolver(restore bool) {
 func (sb *Sandbox) startResolver(restore bool) {
 	sb.resolverOnce.Do(func() {
 	sb.resolverOnce.Do(func() {
 		var err error
 		var err error
@@ -65,11 +82,17 @@ func (sb *Sandbox) startResolver(restore bool) {
 }
 }
 
 
 func (sb *Sandbox) setupResolutionFiles() error {
 func (sb *Sandbox) setupResolutionFiles() error {
-	if err := sb.buildHostsFile(); err != nil {
+	// Create a hosts file that can be mounted during container setup. For most
+	// networking modes (not host networking) it will be re-created before the
+	// container start, once its support for IPv6 is known.
+	if sb.config.hostsPath == "" {
+		sb.config.hostsPath = defaultPrefix + "/" + sb.id + "/hosts"
+	}
+	dir, _ := filepath.Split(sb.config.hostsPath)
+	if err := createBasePath(dir); err != nil {
 		return err
 		return err
 	}
 	}
-
-	if err := sb.updateParentHosts(); err != nil {
+	if err := sb.buildHostsFile(); err != nil {
 		return err
 		return err
 	}
 	}
 
 
@@ -77,15 +100,6 @@ func (sb *Sandbox) setupResolutionFiles() error {
 }
 }
 
 
 func (sb *Sandbox) buildHostsFile() error {
 func (sb *Sandbox) buildHostsFile() error {
-	if sb.config.hostsPath == "" {
-		sb.config.hostsPath = defaultPrefix + "/" + sb.id + "/hosts"
-	}
-
-	dir, _ := filepath.Split(sb.config.hostsPath)
-	if err := createBasePath(dir); err != nil {
-		return err
-	}
-
 	// This is for the host mode networking
 	// This is for the host mode networking
 	if sb.config.useDefaultSandBox && len(sb.config.extraHosts) == 0 {
 	if sb.config.useDefaultSandBox && len(sb.config.extraHosts) == 0 {
 		// We are working under the assumption that the origin file option had been properly expressed by the upper layer
 		// We are working under the assumption that the origin file option had been properly expressed by the upper layer
@@ -101,7 +115,16 @@ func (sb *Sandbox) buildHostsFile() error {
 		extraContent = append(extraContent, etchosts.Record{Hosts: extraHost.name, IP: extraHost.IP})
 		extraContent = append(extraContent, etchosts.Record{Hosts: extraHost.name, IP: extraHost.IP})
 	}
 	}
 
 
-	return etchosts.Build(sb.config.hostsPath, "", sb.config.hostName, sb.config.domainName, extraContent)
+	// Assume IPv6 support, unless it's definitely disabled.
+	buildf := etchosts.Build
+	if en, ok := sb.ipv6Enabled(); ok && !en {
+		buildf = etchosts.BuildNoIPv6
+	}
+	if err := buildf(sb.config.hostsPath, "", sb.config.hostName, sb.config.domainName, extraContent); err != nil {
+		return err
+	}
+
+	return sb.updateParentHosts()
 }
 }
 
 
 func (sb *Sandbox) updateHostsFile(ifaceIPs []string) error {
 func (sb *Sandbox) updateHostsFile(ifaceIPs []string) error {
@@ -135,6 +158,16 @@ func (sb *Sandbox) updateHostsFile(ifaceIPs []string) error {
 }
 }
 
 
 func (sb *Sandbox) addHostsEntries(recs []etchosts.Record) {
 func (sb *Sandbox) addHostsEntries(recs []etchosts.Record) {
+	// Assume IPv6 support, unless it's definitely disabled.
+	if en, ok := sb.ipv6Enabled(); ok && !en {
+		var filtered []etchosts.Record
+		for _, rec := range recs {
+			if addr, err := netip.ParseAddr(rec.IP); err == nil && !addr.Is6() {
+				filtered = append(filtered, rec)
+			}
+		}
+		recs = filtered
+	}
 	if err := etchosts.Add(sb.config.hostsPath, recs); err != nil {
 	if err := etchosts.Add(sb.config.hostsPath, recs); err != nil {
 		log.G(context.TODO()).Warnf("Failed adding service host entries to the running container: %v", err)
 		log.G(context.TODO()).Warnf("Failed adding service host entries to the running container: %v", err)
 	}
 	}
@@ -157,6 +190,16 @@ func (sb *Sandbox) updateParentHosts() error {
 		if pSb == nil {
 		if pSb == nil {
 			continue
 			continue
 		}
 		}
+		// TODO(robmry) - filter out IPv6 addresses here if !sb.ipv6Enabled() but...
+		// - this is part of the implementation of '--link', which will be removed along
+		//   with the rest of legacy networking.
+		// - IPv6 addresses shouldn't be allocated if IPv6 is not available in a container,
+		//   and that change will come along later.
+		// - I think this may be dead code, it's not possible to start a parent container with
+		//   '--link child' unless the child has already started ("Error response from daemon:
+		//   Cannot link to a non running container"). So, when the child starts and this method
+		//   is called with updates for parents, the parents aren't running and GetSandbox()
+		//   returns nil.)
 		if err := etchosts.Update(pSb.config.hostsPath, update.ip, update.name); err != nil {
 		if err := etchosts.Update(pSb.config.hostsPath, update.ip, update.name); err != nil {
 			return err
 			return err
 		}
 		}

+ 26 - 0
libnetwork/sandbox_linux.go

@@ -7,6 +7,7 @@ import (
 	"time"
 	"time"
 
 
 	"github.com/containerd/log"
 	"github.com/containerd/log"
+	"github.com/docker/docker/libnetwork/netutils"
 	"github.com/docker/docker/libnetwork/osl"
 	"github.com/docker/docker/libnetwork/osl"
 	"github.com/docker/docker/libnetwork/types"
 	"github.com/docker/docker/libnetwork/types"
 )
 )
@@ -157,14 +158,39 @@ func (sb *Sandbox) SetKey(basePath string) error {
 		}
 		}
 	}
 	}
 
 
+	if err := sb.finishInitDNS(); err != nil {
+		return err
+	}
+
 	for _, ep := range sb.Endpoints() {
 	for _, ep := range sb.Endpoints() {
 		if err = sb.populateNetworkResources(ep); err != nil {
 		if err = sb.populateNetworkResources(ep); err != nil {
 			return err
 			return err
 		}
 		}
 	}
 	}
+
 	return nil
 	return nil
 }
 }
 
 
+// IPv6 support can always be determined for host networking. For other network
+// types it can only be determined once there's a container namespace to probe,
+// return ok=false in that case.
+func (sb *Sandbox) ipv6Enabled() (enabled, ok bool) {
+	// For host networking, IPv6 support depends on the host.
+	if sb.config.useDefaultSandBox {
+		return netutils.IsV6Listenable(), true
+	}
+
+	// For other network types, look at whether the container's loopback interface has an IPv6 address.
+	sb.mu.Lock()
+	osSbox := sb.osSbox
+	sb.mu.Unlock()
+
+	if osSbox == nil {
+		return false, false
+	}
+	return osSbox.IPv6LoEnabled(), true
+}
+
 func (sb *Sandbox) releaseOSSbox() error {
 func (sb *Sandbox) releaseOSSbox() error {
 	sb.mu.Lock()
 	sb.mu.Lock()
 	osSbox := sb.osSbox
 	osSbox := sb.osSbox