Detect IPv6 support in containers.

Some configuration in a container depends on whether it has support for
IPv6 (including default entries for '::1' etc in '/etc/hosts').

Before this change, the container's support for IPv6 was determined by
whether it was connected to any IPv6-enabled networks. But, that can
change over time, it isn't a property of the container itself.

So, instead, detect IPv6 support by looking for '::1' on the container's
loopback interface. It will not be present if the kernel does not have
IPv6 support, or the user has disabled it in new namespaces by other
means.

Once IPv6 support has been determined for the container, its '/etc/hosts'
is re-generated accordingly.

The daemon no longer disables IPv6 on all interfaces during initialisation.
It now disables IPv6 only for interfaces that have not been assigned an
IPv6 address. (But, even if IPv6 is disabled for the container using the
sysctl 'net.ipv6.conf.all.disable_ipv6=1', interfaces connected to IPv6
networks still get IPv6 addresses that appear in the internal DNS. There's
more to-do!)

Signed-off-by: Rob Murray <rob.murray@docker.com>
This commit is contained in:
Rob Murray 2024-01-11 16:44:58 +00:00
parent 0046b16d87
commit a8f7c5ee48
11 changed files with 332 additions and 135 deletions

View file

@ -1,6 +1,7 @@
package container
import (
"maps"
"strings"
"github.com/docker/docker/api/types/container"
@ -46,6 +47,13 @@ func WithNetworkMode(mode string) func(*TestContainerConfig) {
}
}
// WithSysctls sets sysctl options for the container
func WithSysctls(sysctls map[string]string) func(*TestContainerConfig) {
return func(c *TestContainerConfig) {
c.HostConfig.Sysctls = maps.Clone(sysctls)
}
}
// WithExposedPorts sets the exposed ports of the container
func WithExposedPorts(ports ...string) func(*TestContainerConfig) {
return func(c *TestContainerConfig) {

View file

@ -0,0 +1,107 @@
package networking
import (
"context"
"testing"
"time"
containertypes "github.com/docker/docker/api/types/container"
"github.com/docker/docker/integration/internal/container"
"github.com/docker/docker/testutil"
"github.com/docker/docker/testutil/daemon"
"gotest.tools/v3/assert"
is "gotest.tools/v3/assert/cmp"
"gotest.tools/v3/skip"
)
// Check that the '/etc/hosts' file in a container is created according to
// whether the container supports IPv6.
// Regression test for https://github.com/moby/moby/issues/35954
func TestEtcHostsIpv6(t *testing.T) {
skip.If(t, testEnv.DaemonInfo.OSType == "windows")
ctx := setupTest(t)
d := daemon.New(t)
d.StartWithBusybox(ctx, t,
"--ipv6",
"--ip6tables",
"--experimental",
"--fixed-cidr-v6=fdc8:ffe2:d8d7:1234::/64")
defer d.Stop(t)
c := d.NewClientT(t)
defer c.Close()
testcases := []struct {
name string
sysctls map[string]string
expIPv6Enabled bool
expEtcHosts string
}{
{
// Create a container with no overrides, on the IPv6-enabled default bridge.
// Expect the container to have a working '::1' address, on the assumption
// the test host's kernel supports IPv6 - and for its '/etc/hosts' file to
// include IPv6 addresses.
name: "IPv6 enabled",
expIPv6Enabled: true,
expEtcHosts: `127.0.0.1 localhost
::1 localhost ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
`,
},
{
// Create a container in the same network, with IPv6 disabled. Expect '::1'
// not to be pingable, and no IPv6 addresses in its '/etc/hosts'.
name: "IPv6 disabled",
sysctls: map[string]string{"net.ipv6.conf.all.disable_ipv6": "1"},
expIPv6Enabled: false,
expEtcHosts: "127.0.0.1\tlocalhost\n",
},
}
for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
ctx := testutil.StartSpan(ctx, t)
ctrId := container.Run(ctx, t, c,
container.WithName("etchosts_"+sanitizeCtrName(t.Name())),
container.WithImage("busybox:latest"),
container.WithCmd("top"),
container.WithSysctls(tc.sysctls),
)
defer func() {
c.ContainerRemove(ctx, ctrId, containertypes.RemoveOptions{Force: true})
}()
runCmd := func(ctrId string, cmd []string, expExitCode int) string {
t.Helper()
execCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
res, err := container.Exec(execCtx, c, ctrId, cmd)
assert.Check(t, is.Nil(err))
assert.Check(t, is.Equal(res.ExitCode, expExitCode))
return res.Stdout()
}
// Check that IPv6 is/isn't enabled, as expected.
var expPingExitStatus int
if !tc.expIPv6Enabled {
expPingExitStatus = 1
}
runCmd(ctrId, []string{"ping", "-6", "-c1", "-W3", "::1"}, expPingExitStatus)
// Check the contents of /etc/hosts.
stdout := runCmd(ctrId, []string{"cat", "/etc/hosts"}, 0)
// Append the container's own addresses/name to the expected hosts file content.
inspect := container.Inspect(ctx, t, c, ctrId)
exp := tc.expEtcHosts + inspect.NetworkSettings.IPAddress + "\t" + inspect.Config.Hostname + "\n"
if tc.expIPv6Enabled {
exp += inspect.NetworkSettings.GlobalIPv6Address + "\t" + inspect.Config.Hostname + "\n"
}
assert.Check(t, is.Equal(stdout, exp))
})
}
}

View file

@ -6,9 +6,9 @@ import (
"errors"
"fmt"
"net"
"sync"
"github.com/containerd/log"
"github.com/docker/docker/libnetwork/netutils"
"github.com/docker/docker/libnetwork/types"
"github.com/ishidawataru/sctp"
)
@ -55,7 +55,7 @@ func (n *bridgeNetwork) allocatePortsInternal(bindings []types.PortBinding, cont
// skip adding implicit v6 addr, when the kernel was booted with `ipv6.disable=1`
// https://github.com/moby/moby/issues/42288
isV6Binding := c.HostIP != nil && c.HostIP.To4() == nil
if !isV6Binding && !IsV6Listenable() {
if !isV6Binding && !netutils.IsV6Listenable() {
continue
}
@ -219,26 +219,3 @@ func (n *bridgeNetwork) releasePort(bnd types.PortBinding) error {
return portmapper.Unmap(host)
}
var (
v6ListenableCached bool
v6ListenableOnce sync.Once
)
// IsV6Listenable returns true when `[::1]:0` is listenable.
// IsV6Listenable returns false mostly when the kernel was booted with `ipv6.disable=1` option.
func IsV6Listenable() bool {
v6ListenableOnce.Do(func() {
ln, err := net.Listen("tcp6", "[::1]:0")
if err != nil {
// When the kernel was booted with `ipv6.disable=1`,
// we get err "listen tcp6 [::1]:0: socket: address family not supported by protocol"
// https://github.com/moby/moby/issues/42288
log.G(context.TODO()).Debugf("port_mapping: v6Listenable=false (%v)", err)
} else {
v6ListenableCached = true
ln.Close()
}
})
return v6ListenableCached
}

View file

@ -478,18 +478,8 @@ func (ep *Endpoint) sbJoin(sb *Sandbox, options ...EndpointOption) (err error) {
}
}
// Do not update hosts file with internal networks endpoint IP
if !n.ingress && n.Name() != libnGWNetwork {
var addresses []string
if ip := ep.getFirstInterfaceIPv4Address(); ip != nil {
addresses = append(addresses, ip.String())
}
if ip := ep.getFirstInterfaceIPv6Address(); ip != nil {
addresses = append(addresses, ip.String())
}
if err = sb.updateHostsFile(addresses); err != nil {
return err
}
if err := sb.updateHostsFile(ep.getEtcHostsAddrs()); err != nil {
return err
}
if err = sb.updateDNS(n.enableIPv6); err != nil {
return err
@ -860,26 +850,24 @@ func (ep *Endpoint) getSandbox() (*Sandbox, bool) {
return ps, ok
}
func (ep *Endpoint) getFirstInterfaceIPv4Address() net.IP {
// Return a list of this endpoint's addresses to add to '/etc/hosts'.
func (ep *Endpoint) getEtcHostsAddrs() []string {
ep.mu.Lock()
defer ep.mu.Unlock()
// Do not update hosts file with internal network's endpoint IP
if n := ep.network; n == nil || n.ingress || n.Name() == libnGWNetwork {
return nil
}
var addresses []string
if ep.iface.addr != nil {
return ep.iface.addr.IP
addresses = append(addresses, ep.iface.addr.IP.String())
}
return nil
}
func (ep *Endpoint) getFirstInterfaceIPv6Address() net.IP {
ep.mu.Lock()
defer ep.mu.Unlock()
if ep.iface.addrv6 != nil {
return ep.iface.addrv6.IP
addresses = append(addresses, ep.iface.addrv6.IP.String())
}
return nil
return addresses
}
// EndpointOptionGeneric function returns an option setter for a Generic option defined

View file

@ -5,6 +5,7 @@ import (
"bytes"
"fmt"
"io"
"net/netip"
"os"
"regexp"
"strings"
@ -25,8 +26,10 @@ func (r Record) WriteTo(w io.Writer) (int64, error) {
var (
// Default hosts config records slice
defaultContent = []Record{
defaultContentIPv4 = []Record{
{Hosts: "localhost", IP: "127.0.0.1"},
}
defaultContentIPv6 = []Record{
{Hosts: "localhost ip6-localhost ip6-loopback", IP: "::1"},
{Hosts: "ip6-localnet", IP: "fe00::0"},
{Hosts: "ip6-mcastprefix", IP: "ff00::0"},
@ -71,9 +74,34 @@ func Drop(path string) {
// IP, hostname, and domainname set main record leave empty for no master record
// extraContent is an array of extra host records.
func Build(path, IP, hostname, domainname string, extraContent []Record) error {
return build(path, IP, hostname, domainname, defaultContentIPv4, defaultContentIPv6, extraContent)
}
// BuildNoIPv6 is the same as Build, but will not include IPv6 entries.
func BuildNoIPv6(path, IP, hostname, domainname string, extraContent []Record) error {
if isIPv6(IP) {
IP = ""
}
var ipv4ExtraContent []Record
for _, rec := range extraContent {
if !isIPv6(rec.IP) {
ipv4ExtraContent = append(ipv4ExtraContent, rec)
}
}
return build(path, IP, hostname, domainname, defaultContentIPv4, ipv4ExtraContent)
}
func isIPv6(s string) bool {
addr, err := netip.ParseAddr(s)
return err == nil && addr.Is6()
}
func build(path, IP, hostname, domainname string, contents ...[]Record) error {
defer pathLock(path)()
content := bytes.NewBuffer(nil)
buf := bytes.NewBuffer(nil)
if IP != "" {
// set main record
var mainRec Record
@ -89,24 +117,21 @@ func Build(path, IP, hostname, domainname string, extraContent []Record) error {
if hostName, _, ok := strings.Cut(fqdn, "."); ok {
mainRec.Hosts += " " + hostName
}
if _, err := mainRec.WriteTo(content); err != nil {
return err
}
}
// Write defaultContent slice to buffer
for _, r := range defaultContent {
if _, err := r.WriteTo(content); err != nil {
return err
}
}
// Write extra content from function arguments
for _, r := range extraContent {
if _, err := r.WriteTo(content); err != nil {
if _, err := mainRec.WriteTo(buf); err != nil {
return err
}
}
return os.WriteFile(path, content.Bytes(), 0o644)
// Write content from function arguments
for _, content := range contents {
for _, c := range content {
if _, err := c.WriteTo(buf); err != nil {
return err
}
}
}
return os.WriteFile(path, buf.Bytes(), 0o644)
}
// Add adds an arbitrary number of Records to an already existing /etc/hosts file

View file

@ -4,9 +4,12 @@ import (
"bytes"
"fmt"
"os"
"path/filepath"
"testing"
"golang.org/x/sync/errgroup"
"gotest.tools/v3/assert"
is "gotest.tools/v3/assert/cmp"
)
func TestBuildDefault(t *testing.T) {
@ -35,6 +38,26 @@ func TestBuildDefault(t *testing.T) {
}
}
func TestBuildNoIPv6(t *testing.T) {
d := t.TempDir()
filename := filepath.Join(d, "hosts")
err := BuildNoIPv6(filename, "fdbb:c59c:d015::2", "an.example", "", []Record{
{
Hosts: "another.example",
IP: "fdbb:c59c:d015::3",
},
{
Hosts: "another.example",
IP: "10.11.12.13",
},
})
assert.NilError(t, err)
content, err := os.ReadFile(filename)
assert.NilError(t, err)
assert.Check(t, is.DeepEqual(string(content), "127.0.0.1\tlocalhost\n10.11.12.13\tanother.example\n"))
}
func TestBuildHostnameDomainname(t *testing.T) {
file, err := os.CreateTemp("", "")
if err != nil {

View file

@ -3,6 +3,7 @@
package netutils
import (
"context"
"crypto/rand"
"encoding/hex"
"errors"
@ -10,7 +11,9 @@ import (
"io"
"net"
"strings"
"sync"
"github.com/containerd/log"
"github.com/docker/docker/libnetwork/types"
)
@ -144,3 +147,26 @@ func ReverseIP(IP string) string {
return strings.Join(reverseIP, ".")
}
var (
v6ListenableCached bool
v6ListenableOnce sync.Once
)
// IsV6Listenable returns true when `[::1]:0` is listenable.
// IsV6Listenable returns false mostly when the kernel was booted with `ipv6.disable=1` option.
func IsV6Listenable() bool {
v6ListenableOnce.Do(func() {
ln, err := net.Listen("tcp6", "[::1]:0")
if err != nil {
// When the kernel was booted with `ipv6.disable=1`,
// we get err "listen tcp6 [::1]:0: socket: address family not supported by protocol"
// https://github.com/moby/moby/issues/42288
log.G(context.TODO()).Debugf("v6Listenable=false (%v)", err)
} else {
v6ListenableCached = true
ln.Close()
}
})
return v6ListenableCached
}

View file

@ -257,8 +257,6 @@ func (n *Namespace) AddInterface(srcName, dstPrefix string, options ...IfaceOpti
n.iFaces = append(n.iFaces, i)
n.mu.Unlock()
n.checkLoV6()
return nil
}
@ -311,8 +309,6 @@ func (n *Namespace) RemoveInterface(i *Interface) error {
}
n.mu.Unlock()
// TODO(aker): This function will disable IPv6 on lo interface if the removed interface was the last one offering IPv6 connectivity. That's a weird behavior, and shouldn't be hiding this deep down in this function.
n.checkLoV6()
return nil
}

View file

@ -20,6 +20,7 @@ import (
"github.com/docker/docker/libnetwork/osl/kernel"
"github.com/docker/docker/libnetwork/types"
"github.com/vishvananda/netlink"
"github.com/vishvananda/netlink/nl"
"github.com/vishvananda/netns"
"golang.org/x/sys/unix"
)
@ -206,16 +207,6 @@ func NewSandbox(key string, osCreate, isRestore bool) (*Namespace, error) {
if err != nil {
log.G(context.TODO()).Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err)
}
// In live-restore mode, IPV6 entries are getting cleaned up due to below code
// We should retain IPV6 configurations in live-restore mode when Docker Daemon
// comes back. It should work as it is on other cases
// As starting point, disable IPv6 on all interfaces
if !isRestore && !n.isDefault {
err = setIPv6(n.path, "all", false)
if err != nil {
log.G(context.TODO()).Warnf("Failed to disable IPv6 on all interfaces on network namespace %q: %v", n.path, err)
}
}
if err = n.loopbackUp(); err != nil {
n.nlHandle.Close()
@ -260,12 +251,6 @@ func GetSandboxForExternalKey(basePath string, key string) (*Namespace, error) {
log.G(context.TODO()).Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err)
}
// As starting point, disable IPv6 on all interfaces
err = setIPv6(n.path, "all", false)
if err != nil {
log.G(context.TODO()).Warnf("Failed to disable IPv6 on all interfaces on network namespace %q: %v", n.path, err)
}
if err = n.loopbackUp(); err != nil {
n.nlHandle.Close()
return nil, err
@ -325,17 +310,18 @@ func createNamespaceFile(path string) error {
// or sets the gateway etc. It holds a list of Interfaces, routes etc., and more
// can be added dynamically.
type Namespace struct {
path string
iFaces []*Interface
gw net.IP
gwv6 net.IP
staticRoutes []*types.StaticRoute
neighbors []*neigh
nextIfIndex map[string]int
isDefault bool
nlHandle *netlink.Handle
loV6Enabled bool
mu sync.Mutex
path string
iFaces []*Interface
gw net.IP
gwv6 net.IP
staticRoutes []*types.StaticRoute
neighbors []*neigh
nextIfIndex map[string]int
isDefault bool
ipv6LoEnabledOnce sync.Once
ipv6LoEnabledCached bool
nlHandle *netlink.Handle
mu sync.Mutex
}
// Interfaces returns the collection of Interface previously added with the AddInterface
@ -559,32 +545,24 @@ func (n *Namespace) Restore(interfaces map[Iface][]IfaceOption, routes []*types.
return nil
}
// Checks whether IPv6 needs to be enabled/disabled on the loopback interface
func (n *Namespace) checkLoV6() {
var (
enable = false
action = "disable"
)
n.mu.Lock()
for _, iface := range n.iFaces {
if iface.AddressIPv6() != nil {
enable = true
action = "enable"
break
// IPv6LoEnabled checks whether the loopback interface has an IPv6 address ('::1'
// is assigned by the kernel if IPv6 is enabled).
func (n *Namespace) IPv6LoEnabled() bool {
n.ipv6LoEnabledOnce.Do(func() {
// If anything goes wrong, assume no-IPv6.
iface, err := n.nlHandle.LinkByName("lo")
if err != nil {
log.G(context.TODO()).WithError(err).Warn("Unable to find 'lo' to determine IPv6 support")
return
}
}
n.mu.Unlock()
if n.loV6Enabled == enable {
return
}
if err := setIPv6(n.path, "lo", enable); err != nil {
log.G(context.TODO()).Warnf("Failed to %s IPv6 on loopback interface on network namespace %q: %v", action, n.path, err)
}
n.loV6Enabled = enable
addrs, err := n.nlHandle.AddrList(iface, nl.FAMILY_V6)
if err != nil {
log.G(context.TODO()).WithError(err).Warn("Unable to get 'lo' addresses to determine IPv6 support")
return
}
n.ipv6LoEnabledCached = len(addrs) > 0
})
return n.ipv6LoEnabledCached
}
// ApplyOSTweaks applies operating system specific knobs on the sandbox.

View file

@ -7,6 +7,7 @@ import (
"context"
"fmt"
"net"
"net/netip"
"os"
"path"
"path/filepath"
@ -14,6 +15,7 @@ import (
"strings"
"github.com/containerd/log"
"github.com/docker/docker/errdefs"
"github.com/docker/docker/libnetwork/etchosts"
"github.com/docker/docker/libnetwork/resolvconf"
"github.com/docker/docker/libnetwork/types"
@ -27,6 +29,21 @@ const (
resolverIPSandbox = "127.0.0.11"
)
// finishInitDNS is to be called after the container namespace has been created,
// before it the user process is started. The container's support for IPv6 can be
// determined at this point.
func (sb *Sandbox) finishInitDNS() error {
if err := sb.buildHostsFile(); err != nil {
return errdefs.System(err)
}
for _, ep := range sb.Endpoints() {
if err := sb.updateHostsFile(ep.getEtcHostsAddrs()); err != nil {
return errdefs.System(err)
}
}
return nil
}
func (sb *Sandbox) startResolver(restore bool) {
sb.resolverOnce.Do(func() {
var err error
@ -65,11 +82,17 @@ func (sb *Sandbox) startResolver(restore bool) {
}
func (sb *Sandbox) setupResolutionFiles() error {
if err := sb.buildHostsFile(); err != nil {
// Create a hosts file that can be mounted during container setup. For most
// networking modes (not host networking) it will be re-created before the
// container start, once its support for IPv6 is known.
if sb.config.hostsPath == "" {
sb.config.hostsPath = defaultPrefix + "/" + sb.id + "/hosts"
}
dir, _ := filepath.Split(sb.config.hostsPath)
if err := createBasePath(dir); err != nil {
return err
}
if err := sb.updateParentHosts(); err != nil {
if err := sb.buildHostsFile(); err != nil {
return err
}
@ -77,15 +100,6 @@ func (sb *Sandbox) setupResolutionFiles() error {
}
func (sb *Sandbox) buildHostsFile() error {
if sb.config.hostsPath == "" {
sb.config.hostsPath = defaultPrefix + "/" + sb.id + "/hosts"
}
dir, _ := filepath.Split(sb.config.hostsPath)
if err := createBasePath(dir); err != nil {
return err
}
// This is for the host mode networking
if sb.config.useDefaultSandBox && len(sb.config.extraHosts) == 0 {
// We are working under the assumption that the origin file option had been properly expressed by the upper layer
@ -101,7 +115,16 @@ func (sb *Sandbox) buildHostsFile() error {
extraContent = append(extraContent, etchosts.Record{Hosts: extraHost.name, IP: extraHost.IP})
}
return etchosts.Build(sb.config.hostsPath, "", sb.config.hostName, sb.config.domainName, extraContent)
// Assume IPv6 support, unless it's definitely disabled.
buildf := etchosts.Build
if en, ok := sb.ipv6Enabled(); ok && !en {
buildf = etchosts.BuildNoIPv6
}
if err := buildf(sb.config.hostsPath, "", sb.config.hostName, sb.config.domainName, extraContent); err != nil {
return err
}
return sb.updateParentHosts()
}
func (sb *Sandbox) updateHostsFile(ifaceIPs []string) error {
@ -135,6 +158,16 @@ func (sb *Sandbox) updateHostsFile(ifaceIPs []string) error {
}
func (sb *Sandbox) addHostsEntries(recs []etchosts.Record) {
// Assume IPv6 support, unless it's definitely disabled.
if en, ok := sb.ipv6Enabled(); ok && !en {
var filtered []etchosts.Record
for _, rec := range recs {
if addr, err := netip.ParseAddr(rec.IP); err == nil && !addr.Is6() {
filtered = append(filtered, rec)
}
}
recs = filtered
}
if err := etchosts.Add(sb.config.hostsPath, recs); err != nil {
log.G(context.TODO()).Warnf("Failed adding service host entries to the running container: %v", err)
}
@ -157,6 +190,16 @@ func (sb *Sandbox) updateParentHosts() error {
if pSb == nil {
continue
}
// TODO(robmry) - filter out IPv6 addresses here if !sb.ipv6Enabled() but...
// - this is part of the implementation of '--link', which will be removed along
// with the rest of legacy networking.
// - IPv6 addresses shouldn't be allocated if IPv6 is not available in a container,
// and that change will come along later.
// - I think this may be dead code, it's not possible to start a parent container with
// '--link child' unless the child has already started ("Error response from daemon:
// Cannot link to a non running container"). So, when the child starts and this method
// is called with updates for parents, the parents aren't running and GetSandbox()
// returns nil.)
if err := etchosts.Update(pSb.config.hostsPath, update.ip, update.name); err != nil {
return err
}

View file

@ -7,6 +7,7 @@ import (
"time"
"github.com/containerd/log"
"github.com/docker/docker/libnetwork/netutils"
"github.com/docker/docker/libnetwork/osl"
"github.com/docker/docker/libnetwork/types"
)
@ -157,14 +158,39 @@ func (sb *Sandbox) SetKey(basePath string) error {
}
}
if err := sb.finishInitDNS(); err != nil {
return err
}
for _, ep := range sb.Endpoints() {
if err = sb.populateNetworkResources(ep); err != nil {
return err
}
}
return nil
}
// IPv6 support can always be determined for host networking. For other network
// types it can only be determined once there's a container namespace to probe,
// return ok=false in that case.
func (sb *Sandbox) ipv6Enabled() (enabled, ok bool) {
// For host networking, IPv6 support depends on the host.
if sb.config.useDefaultSandBox {
return netutils.IsV6Listenable(), true
}
// For other network types, look at whether the container's loopback interface has an IPv6 address.
sb.mu.Lock()
osSbox := sb.osSbox
sb.mu.Unlock()
if osSbox == nil {
return false, false
}
return osSbox.IPv6LoEnabled(), true
}
func (sb *Sandbox) releaseOSSbox() error {
sb.mu.Lock()
osSbox := sb.osSbox