瀏覽代碼

Merge pull request #47062 from robmry/35954-default_ipv6_enabled

Detect IPv6 support in containers, generate '/etc/hosts' accordingly.
Albin Kerouanton 1 年之前
父節點
當前提交
794f7127ef

+ 0 - 24
daemon/oci_linux.go

@@ -23,7 +23,6 @@ import (
 	"github.com/docker/docker/oci/caps"
 	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/rootless/specconv"
-	"github.com/docker/docker/pkg/stringid"
 	volumemounts "github.com/docker/docker/volume/mounts"
 	"github.com/moby/sys/mount"
 	"github.com/moby/sys/mountinfo"
@@ -61,28 +60,6 @@ func withRlimits(daemon *Daemon, daemonCfg *dconfig.Config, c *container.Contain
 	}
 }
 
-// withLibnetwork sets the libnetwork hook
-func withLibnetwork(daemon *Daemon, daemonCfg *dconfig.Config, c *container.Container) coci.SpecOpts {
-	return func(ctx context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) error {
-		if c.Config.NetworkDisabled {
-			return nil
-		}
-		for _, ns := range s.Linux.Namespaces {
-			if ns.Type == specs.NetworkNamespace && ns.Path == "" {
-				if s.Hooks == nil {
-					s.Hooks = &specs.Hooks{}
-				}
-				shortNetCtlrID := stringid.TruncateID(daemon.netController.ID())
-				s.Hooks.Prestart = append(s.Hooks.Prestart, specs.Hook{
-					Path: filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe"),
-					Args: []string{"libnetwork-setkey", "-exec-root=" + daemonCfg.GetExecRoot(), c.ID, shortNetCtlrID},
-				})
-			}
-		}
-		return nil
-	}
-}
-
 // withRootless sets the spec to the rootless configuration
 func withRootless(daemon *Daemon, daemonCfg *dconfig.Config) coci.SpecOpts {
 	return func(_ context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) error {
@@ -1070,7 +1047,6 @@ func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c
 		WithCapabilities(c),
 		WithSeccomp(daemon, c),
 		withMounts(daemon, daemonCfg, c, mounts),
-		withLibnetwork(daemon, &daemonCfg.Config, c),
 		WithApparmor(c),
 		WithSelinux(c),
 		WithOOMScore(&c.HostConfig.OomScoreAdj),

+ 4 - 0
daemon/start.go

@@ -236,6 +236,10 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore
 		}
 	}()
 
+	if err := daemon.initializeCreatedTask(ctx, tsk, container, spec); err != nil {
+		return err
+	}
+
 	if err := tsk.Start(context.TODO()); err != nil { // passing ctx caused integration tests to be stuck in the cleanup phase
 		return setExitCodeFromError(container.SetExitCode, err)
 	}

+ 31 - 0
daemon/start_linux.go

@@ -0,0 +1,31 @@
+package daemon // import "github.com/docker/docker/daemon"
+
+import (
+	"context"
+	"fmt"
+
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+
+	"github.com/docker/docker/container"
+	"github.com/docker/docker/errdefs"
+	"github.com/docker/docker/libcontainerd/types"
+	"github.com/docker/docker/oci"
+)
+
+// initializeCreatedTask performs any initialization that needs to be done to
+// prepare a freshly-created task to be started.
+func (daemon *Daemon) initializeCreatedTask(ctx context.Context, tsk types.Task, container *container.Container, spec *specs.Spec) error {
+	if !container.Config.NetworkDisabled {
+		nspath, ok := oci.NamespacePath(spec, specs.NetworkNamespace)
+		if ok && nspath == "" { // the runtime has been instructed to create a new network namespace for tsk.
+			sb, err := daemon.netController.GetSandbox(container.ID)
+			if err != nil {
+				return errdefs.System(err)
+			}
+			if err := sb.SetKey(fmt.Sprintf("/proc/%d/ns/net", tsk.Pid())); err != nil {
+				return errdefs.System(err)
+			}
+		}
+	}
+	return nil
+}

+ 17 - 0
daemon/start_notlinux.go

@@ -0,0 +1,17 @@
+//go:build !linux
+
+package daemon // import "github.com/docker/docker/daemon"
+
+import (
+	"context"
+
+	"github.com/docker/docker/container"
+	"github.com/docker/docker/libcontainerd/types"
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+// initializeCreatedTask performs any initialization that needs to be done to
+// prepare a freshly-created task to be started.
+func (daemon *Daemon) initializeCreatedTask(ctx context.Context, tsk types.Task, container *container.Container, spec *specs.Spec) error {
+	return nil
+}

+ 8 - 0
integration/internal/container/ops.go

@@ -1,6 +1,7 @@
 package container
 
 import (
+	"maps"
 	"strings"
 
 	"github.com/docker/docker/api/types/container"
@@ -46,6 +47,13 @@ func WithNetworkMode(mode string) func(*TestContainerConfig) {
 	}
 }
 
+// WithSysctls sets sysctl options for the container
+func WithSysctls(sysctls map[string]string) func(*TestContainerConfig) {
+	return func(c *TestContainerConfig) {
+		c.HostConfig.Sysctls = maps.Clone(sysctls)
+	}
+}
+
 // WithExposedPorts sets the exposed ports of the container
 func WithExposedPorts(ports ...string) func(*TestContainerConfig) {
 	return func(c *TestContainerConfig) {

+ 107 - 0
integration/networking/etchosts_test.go

@@ -0,0 +1,107 @@
+package networking
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	containertypes "github.com/docker/docker/api/types/container"
+	"github.com/docker/docker/integration/internal/container"
+	"github.com/docker/docker/testutil"
+	"github.com/docker/docker/testutil/daemon"
+	"gotest.tools/v3/assert"
+	is "gotest.tools/v3/assert/cmp"
+	"gotest.tools/v3/skip"
+)
+
+// Check that the '/etc/hosts' file in a container is created according to
+// whether the container supports IPv6.
+// Regression test for https://github.com/moby/moby/issues/35954
+func TestEtcHostsIpv6(t *testing.T) {
+	skip.If(t, testEnv.DaemonInfo.OSType == "windows")
+
+	ctx := setupTest(t)
+	d := daemon.New(t)
+	d.StartWithBusybox(ctx, t,
+		"--ipv6",
+		"--ip6tables",
+		"--experimental",
+		"--fixed-cidr-v6=fdc8:ffe2:d8d7:1234::/64")
+	defer d.Stop(t)
+
+	c := d.NewClientT(t)
+	defer c.Close()
+
+	testcases := []struct {
+		name           string
+		sysctls        map[string]string
+		expIPv6Enabled bool
+		expEtcHosts    string
+	}{
+		{
+			// Create a container with no overrides, on the IPv6-enabled default bridge.
+			// Expect the container to have a working '::1' address, on the assumption
+			// the test host's kernel supports IPv6 - and for its '/etc/hosts' file to
+			// include IPv6 addresses.
+			name:           "IPv6 enabled",
+			expIPv6Enabled: true,
+			expEtcHosts: `127.0.0.1	localhost
+::1	localhost ip6-localhost ip6-loopback
+fe00::0	ip6-localnet
+ff00::0	ip6-mcastprefix
+ff02::1	ip6-allnodes
+ff02::2	ip6-allrouters
+`,
+		},
+		{
+			// Create a container in the same network, with IPv6 disabled. Expect '::1'
+			// not to be pingable, and no IPv6 addresses in its '/etc/hosts'.
+			name:           "IPv6 disabled",
+			sysctls:        map[string]string{"net.ipv6.conf.all.disable_ipv6": "1"},
+			expIPv6Enabled: false,
+			expEtcHosts:    "127.0.0.1\tlocalhost\n",
+		},
+	}
+
+	for _, tc := range testcases {
+		t.Run(tc.name, func(t *testing.T) {
+			ctx := testutil.StartSpan(ctx, t)
+			ctrId := container.Run(ctx, t, c,
+				container.WithName("etchosts_"+sanitizeCtrName(t.Name())),
+				container.WithImage("busybox:latest"),
+				container.WithCmd("top"),
+				container.WithSysctls(tc.sysctls),
+			)
+			defer func() {
+				c.ContainerRemove(ctx, ctrId, containertypes.RemoveOptions{Force: true})
+			}()
+
+			runCmd := func(ctrId string, cmd []string, expExitCode int) string {
+				t.Helper()
+				execCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
+				defer cancel()
+				res, err := container.Exec(execCtx, c, ctrId, cmd)
+				assert.Check(t, is.Nil(err))
+				assert.Check(t, is.Equal(res.ExitCode, expExitCode))
+				return res.Stdout()
+			}
+
+			// Check that IPv6 is/isn't enabled, as expected.
+			var expPingExitStatus int
+			if !tc.expIPv6Enabled {
+				expPingExitStatus = 1
+			}
+			runCmd(ctrId, []string{"ping", "-6", "-c1", "-W3", "::1"}, expPingExitStatus)
+
+			// Check the contents of /etc/hosts.
+			stdout := runCmd(ctrId, []string{"cat", "/etc/hosts"}, 0)
+			// Append the container's own addresses/name to the expected hosts file content.
+			inspect := container.Inspect(ctx, t, c, ctrId)
+			exp := tc.expEtcHosts + inspect.NetworkSettings.IPAddress + "\t" + inspect.Config.Hostname + "\n"
+			if tc.expIPv6Enabled {
+				exp += inspect.NetworkSettings.GlobalIPv6Address + "\t" + inspect.Config.Hostname + "\n"
+			}
+			assert.Check(t, is.Equal(stdout, exp))
+		})
+	}
+}

+ 2 - 25
libnetwork/drivers/bridge/port_mapping_linux.go

@@ -6,9 +6,9 @@ import (
 	"errors"
 	"fmt"
 	"net"
-	"sync"
 
 	"github.com/containerd/log"
+	"github.com/docker/docker/libnetwork/netutils"
 	"github.com/docker/docker/libnetwork/types"
 	"github.com/ishidawataru/sctp"
 )
@@ -55,7 +55,7 @@ func (n *bridgeNetwork) allocatePortsInternal(bindings []types.PortBinding, cont
 		// skip adding implicit v6 addr, when the kernel was booted with `ipv6.disable=1`
 		// https://github.com/moby/moby/issues/42288
 		isV6Binding := c.HostIP != nil && c.HostIP.To4() == nil
-		if !isV6Binding && !IsV6Listenable() {
+		if !isV6Binding && !netutils.IsV6Listenable() {
 			continue
 		}
 
@@ -219,26 +219,3 @@ func (n *bridgeNetwork) releasePort(bnd types.PortBinding) error {
 
 	return portmapper.Unmap(host)
 }
-
-var (
-	v6ListenableCached bool
-	v6ListenableOnce   sync.Once
-)
-
-// IsV6Listenable returns true when `[::1]:0` is listenable.
-// IsV6Listenable returns false mostly when the kernel was booted with `ipv6.disable=1` option.
-func IsV6Listenable() bool {
-	v6ListenableOnce.Do(func() {
-		ln, err := net.Listen("tcp6", "[::1]:0")
-		if err != nil {
-			// When the kernel was booted with `ipv6.disable=1`,
-			// we get err "listen tcp6 [::1]:0: socket: address family not supported by protocol"
-			// https://github.com/moby/moby/issues/42288
-			log.G(context.TODO()).Debugf("port_mapping: v6Listenable=false (%v)", err)
-		} else {
-			v6ListenableCached = true
-			ln.Close()
-		}
-	})
-	return v6ListenableCached
-}

+ 13 - 25
libnetwork/endpoint.go

@@ -522,18 +522,8 @@ func (ep *Endpoint) sbJoin(sb *Sandbox, options ...EndpointOption) (err error) {
 		}
 	}
 
-	// Do not update hosts file with internal networks endpoint IP
-	if !n.ingress && n.Name() != libnGWNetwork {
-		var addresses []string
-		if ip := ep.getFirstInterfaceIPv4Address(); ip != nil {
-			addresses = append(addresses, ip.String())
-		}
-		if ip := ep.getFirstInterfaceIPv6Address(); ip != nil {
-			addresses = append(addresses, ip.String())
-		}
-		if err = sb.updateHostsFile(addresses); err != nil {
-			return err
-		}
+	if err := sb.updateHostsFile(ep.getEtcHostsAddrs()); err != nil {
+		return err
 	}
 	if err = sb.updateDNS(n.enableIPv6); err != nil {
 		return err
@@ -904,26 +894,24 @@ func (ep *Endpoint) getSandbox() (*Sandbox, bool) {
 	return ps, ok
 }
 
-func (ep *Endpoint) getFirstInterfaceIPv4Address() net.IP {
+// Return a list of this endpoint's addresses to add to '/etc/hosts'.
+func (ep *Endpoint) getEtcHostsAddrs() []string {
 	ep.mu.Lock()
 	defer ep.mu.Unlock()
 
-	if ep.iface.addr != nil {
-		return ep.iface.addr.IP
+	// Do not update hosts file with internal network's endpoint IP
+	if n := ep.network; n == nil || n.ingress || n.Name() == libnGWNetwork {
+		return nil
 	}
 
-	return nil
-}
-
-func (ep *Endpoint) getFirstInterfaceIPv6Address() net.IP {
-	ep.mu.Lock()
-	defer ep.mu.Unlock()
-
+	var addresses []string
+	if ep.iface.addr != nil {
+		addresses = append(addresses, ep.iface.addr.IP.String())
+	}
 	if ep.iface.addrv6 != nil {
-		return ep.iface.addrv6.IP
+		addresses = append(addresses, ep.iface.addrv6.IP.String())
 	}
-
-	return nil
+	return addresses
 }
 
 // EndpointOptionGeneric function returns an option setter for a Generic option defined

+ 39 - 14
libnetwork/etchosts/etchosts.go

@@ -5,6 +5,7 @@ import (
 	"bytes"
 	"fmt"
 	"io"
+	"net/netip"
 	"os"
 	"regexp"
 	"strings"
@@ -25,8 +26,10 @@ func (r Record) WriteTo(w io.Writer) (int64, error) {
 
 var (
 	// Default hosts config records slice
-	defaultContent = []Record{
+	defaultContentIPv4 = []Record{
 		{Hosts: "localhost", IP: "127.0.0.1"},
+	}
+	defaultContentIPv6 = []Record{
 		{Hosts: "localhost ip6-localhost ip6-loopback", IP: "::1"},
 		{Hosts: "ip6-localnet", IP: "fe00::0"},
 		{Hosts: "ip6-mcastprefix", IP: "ff00::0"},
@@ -71,9 +74,34 @@ func Drop(path string) {
 // IP, hostname, and domainname set main record leave empty for no master record
 // extraContent is an array of extra host records.
 func Build(path, IP, hostname, domainname string, extraContent []Record) error {
+	return build(path, IP, hostname, domainname, defaultContentIPv4, defaultContentIPv6, extraContent)
+}
+
+// BuildNoIPv6 is the same as Build, but will not include IPv6 entries.
+func BuildNoIPv6(path, IP, hostname, domainname string, extraContent []Record) error {
+	if isIPv6(IP) {
+		IP = ""
+	}
+
+	var ipv4ExtraContent []Record
+	for _, rec := range extraContent {
+		if !isIPv6(rec.IP) {
+			ipv4ExtraContent = append(ipv4ExtraContent, rec)
+		}
+	}
+
+	return build(path, IP, hostname, domainname, defaultContentIPv4, ipv4ExtraContent)
+}
+
+func isIPv6(s string) bool {
+	addr, err := netip.ParseAddr(s)
+	return err == nil && addr.Is6()
+}
+
+func build(path, IP, hostname, domainname string, contents ...[]Record) error {
 	defer pathLock(path)()
 
-	content := bytes.NewBuffer(nil)
+	buf := bytes.NewBuffer(nil)
 	if IP != "" {
 		// set main record
 		var mainRec Record
@@ -89,24 +117,21 @@ func Build(path, IP, hostname, domainname string, extraContent []Record) error {
 		if hostName, _, ok := strings.Cut(fqdn, "."); ok {
 			mainRec.Hosts += " " + hostName
 		}
-		if _, err := mainRec.WriteTo(content); err != nil {
-			return err
-		}
-	}
-	// Write defaultContent slice to buffer
-	for _, r := range defaultContent {
-		if _, err := r.WriteTo(content); err != nil {
+		if _, err := mainRec.WriteTo(buf); err != nil {
 			return err
 		}
 	}
-	// Write extra content from function arguments
-	for _, r := range extraContent {
-		if _, err := r.WriteTo(content); err != nil {
-			return err
+
+	// Write content from function arguments
+	for _, content := range contents {
+		for _, c := range content {
+			if _, err := c.WriteTo(buf); err != nil {
+				return err
+			}
 		}
 	}
 
-	return os.WriteFile(path, content.Bytes(), 0o644)
+	return os.WriteFile(path, buf.Bytes(), 0o644)
 }
 
 // Add adds an arbitrary number of Records to an already existing /etc/hosts file

+ 23 - 0
libnetwork/etchosts/etchosts_test.go

@@ -4,9 +4,12 @@ import (
 	"bytes"
 	"fmt"
 	"os"
+	"path/filepath"
 	"testing"
 
 	"golang.org/x/sync/errgroup"
+	"gotest.tools/v3/assert"
+	is "gotest.tools/v3/assert/cmp"
 )
 
 func TestBuildDefault(t *testing.T) {
@@ -35,6 +38,26 @@ func TestBuildDefault(t *testing.T) {
 	}
 }
 
+func TestBuildNoIPv6(t *testing.T) {
+	d := t.TempDir()
+	filename := filepath.Join(d, "hosts")
+
+	err := BuildNoIPv6(filename, "fdbb:c59c:d015::2", "an.example", "", []Record{
+		{
+			Hosts: "another.example",
+			IP:    "fdbb:c59c:d015::3",
+		},
+		{
+			Hosts: "another.example",
+			IP:    "10.11.12.13",
+		},
+	})
+	assert.NilError(t, err)
+	content, err := os.ReadFile(filename)
+	assert.NilError(t, err)
+	assert.Check(t, is.DeepEqual(string(content), "127.0.0.1\tlocalhost\n10.11.12.13\tanother.example\n"))
+}
+
 func TestBuildHostnameDomainname(t *testing.T) {
 	file, err := os.CreateTemp("", "")
 	if err != nil {

+ 26 - 0
libnetwork/netutils/utils.go

@@ -3,6 +3,7 @@
 package netutils
 
 import (
+	"context"
 	"crypto/rand"
 	"encoding/hex"
 	"errors"
@@ -10,7 +11,9 @@ import (
 	"io"
 	"net"
 	"strings"
+	"sync"
 
+	"github.com/containerd/log"
 	"github.com/docker/docker/libnetwork/types"
 )
 
@@ -144,3 +147,26 @@ func ReverseIP(IP string) string {
 
 	return strings.Join(reverseIP, ".")
 }
+
+var (
+	v6ListenableCached bool
+	v6ListenableOnce   sync.Once
+)
+
+// IsV6Listenable returns true when `[::1]:0` is listenable.
+// IsV6Listenable returns false mostly when the kernel was booted with `ipv6.disable=1` option.
+func IsV6Listenable() bool {
+	v6ListenableOnce.Do(func() {
+		ln, err := net.Listen("tcp6", "[::1]:0")
+		if err != nil {
+			// When the kernel was booted with `ipv6.disable=1`,
+			// we get err "listen tcp6 [::1]:0: socket: address family not supported by protocol"
+			// https://github.com/moby/moby/issues/42288
+			log.G(context.TODO()).Debugf("v6Listenable=false (%v)", err)
+		} else {
+			v6ListenableCached = true
+			ln.Close()
+		}
+	})
+	return v6ListenableCached
+}

+ 0 - 4
libnetwork/osl/interface_linux.go

@@ -257,8 +257,6 @@ func (n *Namespace) AddInterface(srcName, dstPrefix string, options ...IfaceOpti
 	n.iFaces = append(n.iFaces, i)
 	n.mu.Unlock()
 
-	n.checkLoV6()
-
 	return nil
 }
 
@@ -311,8 +309,6 @@ func (n *Namespace) RemoveInterface(i *Interface) error {
 	}
 	n.mu.Unlock()
 
-	// TODO(aker): This function will disable IPv6 on lo interface if the removed interface was the last one offering IPv6 connectivity. That's a weird behavior, and shouldn't be hiding this deep down in this function.
-	n.checkLoV6()
 	return nil
 }
 

+ 35 - 53
libnetwork/osl/namespace_linux.go

@@ -20,6 +20,7 @@ import (
 	"github.com/docker/docker/libnetwork/osl/kernel"
 	"github.com/docker/docker/libnetwork/types"
 	"github.com/vishvananda/netlink"
+	"github.com/vishvananda/netlink/nl"
 	"github.com/vishvananda/netns"
 	"golang.org/x/sys/unix"
 )
@@ -206,16 +207,6 @@ func NewSandbox(key string, osCreate, isRestore bool) (*Namespace, error) {
 	if err != nil {
 		log.G(context.TODO()).Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err)
 	}
-	// In live-restore mode, IPV6 entries are getting cleaned up due to below code
-	// We should retain IPV6 configurations in live-restore mode when Docker Daemon
-	// comes back. It should work as it is on other cases
-	// As starting point, disable IPv6 on all interfaces
-	if !isRestore && !n.isDefault {
-		err = setIPv6(n.path, "all", false)
-		if err != nil {
-			log.G(context.TODO()).Warnf("Failed to disable IPv6 on all interfaces on network namespace %q: %v", n.path, err)
-		}
-	}
 
 	if err = n.loopbackUp(); err != nil {
 		n.nlHandle.Close()
@@ -226,7 +217,11 @@ func NewSandbox(key string, osCreate, isRestore bool) (*Namespace, error) {
 }
 
 func mountNetworkNamespace(basePath string, lnPath string) error {
-	return syscall.Mount(basePath, lnPath, "bind", syscall.MS_BIND, "")
+	err := syscall.Mount(basePath, lnPath, "bind", syscall.MS_BIND, "")
+	if err != nil {
+		return fmt.Errorf("bind-mount %s -> %s: %w", basePath, lnPath, err)
+	}
+	return nil
 }
 
 // GetSandboxForExternalKey returns sandbox object for the supplied path
@@ -256,12 +251,6 @@ func GetSandboxForExternalKey(basePath string, key string) (*Namespace, error) {
 		log.G(context.TODO()).Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err)
 	}
 
-	// As starting point, disable IPv6 on all interfaces
-	err = setIPv6(n.path, "all", false)
-	if err != nil {
-		log.G(context.TODO()).Warnf("Failed to disable IPv6 on all interfaces on network namespace %q: %v", n.path, err)
-	}
-
 	if err = n.loopbackUp(); err != nil {
 		n.nlHandle.Close()
 		return nil, err
@@ -321,17 +310,18 @@ func createNamespaceFile(path string) error {
 // or sets the gateway etc. It holds a list of Interfaces, routes etc., and more
 // can be added dynamically.
 type Namespace struct {
-	path         string
-	iFaces       []*Interface
-	gw           net.IP
-	gwv6         net.IP
-	staticRoutes []*types.StaticRoute
-	neighbors    []*neigh
-	nextIfIndex  map[string]int
-	isDefault    bool
-	nlHandle     *netlink.Handle
-	loV6Enabled  bool
-	mu           sync.Mutex
+	path                string
+	iFaces              []*Interface
+	gw                  net.IP
+	gwv6                net.IP
+	staticRoutes        []*types.StaticRoute
+	neighbors           []*neigh
+	nextIfIndex         map[string]int
+	isDefault           bool
+	ipv6LoEnabledOnce   sync.Once
+	ipv6LoEnabledCached bool
+	nlHandle            *netlink.Handle
+	mu                  sync.Mutex
 }
 
 // Interfaces returns the collection of Interface previously added with the AddInterface
@@ -555,32 +545,24 @@ func (n *Namespace) Restore(interfaces map[Iface][]IfaceOption, routes []*types.
 	return nil
 }
 
-// Checks whether IPv6 needs to be enabled/disabled on the loopback interface
-func (n *Namespace) checkLoV6() {
-	var (
-		enable = false
-		action = "disable"
-	)
-
-	n.mu.Lock()
-	for _, iface := range n.iFaces {
-		if iface.AddressIPv6() != nil {
-			enable = true
-			action = "enable"
-			break
+// IPv6LoEnabled checks whether the loopback interface has an IPv6 address ('::1'
+// is assigned by the kernel if IPv6 is enabled).
+func (n *Namespace) IPv6LoEnabled() bool {
+	n.ipv6LoEnabledOnce.Do(func() {
+		// If anything goes wrong, assume no-IPv6.
+		iface, err := n.nlHandle.LinkByName("lo")
+		if err != nil {
+			log.G(context.TODO()).WithError(err).Warn("Unable to find 'lo' to determine IPv6 support")
+			return
 		}
-	}
-	n.mu.Unlock()
-
-	if n.loV6Enabled == enable {
-		return
-	}
-
-	if err := setIPv6(n.path, "lo", enable); err != nil {
-		log.G(context.TODO()).Warnf("Failed to %s IPv6 on loopback interface on network namespace %q: %v", action, n.path, err)
-	}
-
-	n.loV6Enabled = enable
+		addrs, err := n.nlHandle.AddrList(iface, nl.FAMILY_V6)
+		if err != nil {
+			log.G(context.TODO()).WithError(err).Warn("Unable to get 'lo' addresses to determine IPv6 support")
+			return
+		}
+		n.ipv6LoEnabledCached = len(addrs) > 0
+	})
+	return n.ipv6LoEnabledCached
 }
 
 // ApplyOSTweaks applies operating system specific knobs on the sandbox.

+ 56 - 13
libnetwork/sandbox_dns_unix.go

@@ -7,6 +7,7 @@ import (
 	"context"
 	"fmt"
 	"net"
+	"net/netip"
 	"os"
 	"path"
 	"path/filepath"
@@ -14,6 +15,7 @@ import (
 	"strings"
 
 	"github.com/containerd/log"
+	"github.com/docker/docker/errdefs"
 	"github.com/docker/docker/libnetwork/etchosts"
 	"github.com/docker/docker/libnetwork/resolvconf"
 	"github.com/docker/docker/libnetwork/types"
@@ -27,6 +29,21 @@ const (
 	resolverIPSandbox = "127.0.0.11"
 )
 
+// finishInitDNS is to be called after the container namespace has been created,
+// before it the user process is started. The container's support for IPv6 can be
+// determined at this point.
+func (sb *Sandbox) finishInitDNS() error {
+	if err := sb.buildHostsFile(); err != nil {
+		return errdefs.System(err)
+	}
+	for _, ep := range sb.Endpoints() {
+		if err := sb.updateHostsFile(ep.getEtcHostsAddrs()); err != nil {
+			return errdefs.System(err)
+		}
+	}
+	return nil
+}
+
 func (sb *Sandbox) startResolver(restore bool) {
 	sb.resolverOnce.Do(func() {
 		var err error
@@ -65,11 +82,17 @@ func (sb *Sandbox) startResolver(restore bool) {
 }
 
 func (sb *Sandbox) setupResolutionFiles() error {
-	if err := sb.buildHostsFile(); err != nil {
+	// Create a hosts file that can be mounted during container setup. For most
+	// networking modes (not host networking) it will be re-created before the
+	// container start, once its support for IPv6 is known.
+	if sb.config.hostsPath == "" {
+		sb.config.hostsPath = defaultPrefix + "/" + sb.id + "/hosts"
+	}
+	dir, _ := filepath.Split(sb.config.hostsPath)
+	if err := createBasePath(dir); err != nil {
 		return err
 	}
-
-	if err := sb.updateParentHosts(); err != nil {
+	if err := sb.buildHostsFile(); err != nil {
 		return err
 	}
 
@@ -77,15 +100,6 @@ func (sb *Sandbox) setupResolutionFiles() error {
 }
 
 func (sb *Sandbox) buildHostsFile() error {
-	if sb.config.hostsPath == "" {
-		sb.config.hostsPath = defaultPrefix + "/" + sb.id + "/hosts"
-	}
-
-	dir, _ := filepath.Split(sb.config.hostsPath)
-	if err := createBasePath(dir); err != nil {
-		return err
-	}
-
 	// This is for the host mode networking
 	if sb.config.useDefaultSandBox && len(sb.config.extraHosts) == 0 {
 		// We are working under the assumption that the origin file option had been properly expressed by the upper layer
@@ -101,7 +115,16 @@ func (sb *Sandbox) buildHostsFile() error {
 		extraContent = append(extraContent, etchosts.Record{Hosts: extraHost.name, IP: extraHost.IP})
 	}
 
-	return etchosts.Build(sb.config.hostsPath, "", sb.config.hostName, sb.config.domainName, extraContent)
+	// Assume IPv6 support, unless it's definitely disabled.
+	buildf := etchosts.Build
+	if en, ok := sb.ipv6Enabled(); ok && !en {
+		buildf = etchosts.BuildNoIPv6
+	}
+	if err := buildf(sb.config.hostsPath, "", sb.config.hostName, sb.config.domainName, extraContent); err != nil {
+		return err
+	}
+
+	return sb.updateParentHosts()
 }
 
 func (sb *Sandbox) updateHostsFile(ifaceIPs []string) error {
@@ -135,6 +158,16 @@ func (sb *Sandbox) updateHostsFile(ifaceIPs []string) error {
 }
 
 func (sb *Sandbox) addHostsEntries(recs []etchosts.Record) {
+	// Assume IPv6 support, unless it's definitely disabled.
+	if en, ok := sb.ipv6Enabled(); ok && !en {
+		var filtered []etchosts.Record
+		for _, rec := range recs {
+			if addr, err := netip.ParseAddr(rec.IP); err == nil && !addr.Is6() {
+				filtered = append(filtered, rec)
+			}
+		}
+		recs = filtered
+	}
 	if err := etchosts.Add(sb.config.hostsPath, recs); err != nil {
 		log.G(context.TODO()).Warnf("Failed adding service host entries to the running container: %v", err)
 	}
@@ -157,6 +190,16 @@ func (sb *Sandbox) updateParentHosts() error {
 		if pSb == nil {
 			continue
 		}
+		// TODO(robmry) - filter out IPv6 addresses here if !sb.ipv6Enabled() but...
+		// - this is part of the implementation of '--link', which will be removed along
+		//   with the rest of legacy networking.
+		// - IPv6 addresses shouldn't be allocated if IPv6 is not available in a container,
+		//   and that change will come along later.
+		// - I think this may be dead code, it's not possible to start a parent container with
+		//   '--link child' unless the child has already started ("Error response from daemon:
+		//   Cannot link to a non running container"). So, when the child starts and this method
+		//   is called with updates for parents, the parents aren't running and GetSandbox()
+		//   returns nil.)
 		if err := etchosts.Update(pSb.config.hostsPath, update.ip, update.name); err != nil {
 			return err
 		}

+ 26 - 0
libnetwork/sandbox_linux.go

@@ -7,6 +7,7 @@ import (
 	"time"
 
 	"github.com/containerd/log"
+	"github.com/docker/docker/libnetwork/netutils"
 	"github.com/docker/docker/libnetwork/osl"
 	"github.com/docker/docker/libnetwork/types"
 )
@@ -157,14 +158,39 @@ func (sb *Sandbox) SetKey(basePath string) error {
 		}
 	}
 
+	if err := sb.finishInitDNS(); err != nil {
+		return err
+	}
+
 	for _, ep := range sb.Endpoints() {
 		if err = sb.populateNetworkResources(ep); err != nil {
 			return err
 		}
 	}
+
 	return nil
 }
 
+// IPv6 support can always be determined for host networking. For other network
+// types it can only be determined once there's a container namespace to probe,
+// return ok=false in that case.
+func (sb *Sandbox) ipv6Enabled() (enabled, ok bool) {
+	// For host networking, IPv6 support depends on the host.
+	if sb.config.useDefaultSandBox {
+		return netutils.IsV6Listenable(), true
+	}
+
+	// For other network types, look at whether the container's loopback interface has an IPv6 address.
+	sb.mu.Lock()
+	osSbox := sb.osSbox
+	sb.mu.Unlock()
+
+	if osSbox == nil {
+		return false, false
+	}
+	return osSbox.IPv6LoEnabled(), true
+}
+
 func (sb *Sandbox) releaseOSSbox() error {
 	sb.mu.Lock()
 	osSbox := sb.osSbox

+ 11 - 0
oci/namespaces.go

@@ -14,3 +14,14 @@ func RemoveNamespace(s *specs.Spec, nsType specs.LinuxNamespaceType) {
 		}
 	}
 }
+
+// NamespacePath returns the configured Path of the first namespace in
+// s.Linux.Namespaces of type nsType.
+func NamespacePath(s *specs.Spec, nsType specs.LinuxNamespaceType) (path string, ok bool) {
+	for _, n := range s.Linux.Namespaces {
+		if n.Type == nsType {
+			return n.Path, true
+		}
+	}
+	return "", false
+}