Browse Source

Windows DNS resolver forwarding

Make the internal DNS resolver for Windows containers forward requests
to upsteam DNS servers when it cannot respond itself, rather than
returning SERVFAIL.

Windows containers are normally configured with the internal resolver
first for service discovery (container name lookup), then external
resolvers from '--dns' or the host's networking configuration.

When a tool like ping gets a SERVFAIL from the internal resolver, it
tries the other nameservers. But, nslookup does not, and with this
change it does not need to.

The internal resolver learns external server addresses from the
container's HNSEndpoint configuration, so it will use the same DNS
servers as processes in the container.

The internal resolver for Windows containers listens on the network's
gateway address, and each container may have a different set of external
DNS servers. So, the resolver uses the source address of the DNS request
to select external resolvers.

On Windows, daemon.json feature option 'windows-no-dns-proxy' can be used
to prevent the internal resolver from forwarding requests (restoring the
old behaviour).

Signed-off-by: Rob Murray <rob.murray@docker.com>
Rob Murray 1 year ago
parent
commit
6c68be24a2

+ 4 - 0
daemon/config/config_linux.go

@@ -196,6 +196,10 @@ func (conf *Config) ValidatePlatformConfig() error {
 		return errors.Wrap(err, "invalid fixed-cidr-v6")
 		return errors.Wrap(err, "invalid fixed-cidr-v6")
 	}
 	}
 
 
+	if _, ok := conf.Features["windows-dns-proxy"]; ok {
+		return errors.New("feature option 'windows-dns-proxy' is only available on Windows")
+	}
+
 	return verifyDefaultCgroupNsMode(conf.CgroupNamespaceMode)
 	return verifyDefaultCgroupNsMode(conf.CgroupNamespaceMode)
 }
 }
 
 

+ 2 - 1
daemon/container_operations.go

@@ -54,7 +54,8 @@ func (daemon *Daemon) buildSandboxOptions(cfg *config.Config, container *contain
 		sboxOptions = append(sboxOptions, libnetwork.OptionUseExternalKey())
 		sboxOptions = append(sboxOptions, libnetwork.OptionUseExternalKey())
 	}
 	}
 
 
-	if err := setupPathsAndSandboxOptions(container, cfg, &sboxOptions); err != nil {
+	// Add platform-specific Sandbox options.
+	if err := buildSandboxPlatformOptions(container, cfg, &sboxOptions); err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
 
 

+ 2 - 1
daemon/container_operations_unix.go

@@ -417,7 +417,7 @@ func serviceDiscoveryOnDefaultNetwork() bool {
 	return false
 	return false
 }
 }
 
 
-func setupPathsAndSandboxOptions(container *container.Container, cfg *config.Config, sboxOptions *[]libnetwork.SandboxOption) error {
+func buildSandboxPlatformOptions(container *container.Container, cfg *config.Config, sboxOptions *[]libnetwork.SandboxOption) error {
 	var err error
 	var err error
 	var originResolvConfPath string
 	var originResolvConfPath string
 
 
@@ -481,6 +481,7 @@ func setupPathsAndSandboxOptions(container *container.Container, cfg *config.Con
 		return err
 		return err
 	}
 	}
 	*sboxOptions = append(*sboxOptions, libnetwork.OptionResolvConfPath(container.ResolvConfPath))
 	*sboxOptions = append(*sboxOptions, libnetwork.OptionResolvConfPath(container.ResolvConfPath))
+
 	return nil
 	return nil
 }
 }
 
 

+ 7 - 1
daemon/container_operations_windows.go

@@ -163,7 +163,13 @@ func serviceDiscoveryOnDefaultNetwork() bool {
 	return true
 	return true
 }
 }
 
 
-func setupPathsAndSandboxOptions(container *container.Container, cfg *config.Config, sboxOptions *[]libnetwork.SandboxOption) error {
+func buildSandboxPlatformOptions(container *container.Container, cfg *config.Config, sboxOptions *[]libnetwork.SandboxOption) error {
+	// By default, the Windows internal resolver does not forward requests to
+	// external resolvers - but forwarding can be enabled using feature flag
+	// "windows-dns-proxy":true.
+	if doproxy, exists := cfg.Features["windows-dns-proxy"]; !exists || !doproxy {
+		*sboxOptions = append(*sboxOptions, libnetwork.OptionDNSNoProxy())
+	}
 	return nil
 	return nil
 }
 }
 
 

+ 26 - 0
integration/networking/resolvconf_test.go

@@ -1,8 +1,10 @@
 package networking
 package networking
 
 
 import (
 import (
+	"context"
 	"strings"
 	"strings"
 	"testing"
 	"testing"
+	"time"
 
 
 	containertypes "github.com/docker/docker/api/types/container"
 	containertypes "github.com/docker/docker/api/types/container"
 	"github.com/docker/docker/integration/internal/container"
 	"github.com/docker/docker/integration/internal/container"
@@ -131,3 +133,27 @@ func TestInternalNetworkDNS(t *testing.T) {
 	assert.Check(t, is.Equal(res.ExitCode, 0))
 	assert.Check(t, is.Equal(res.ExitCode, 0))
 	assert.Check(t, is.Contains(res.Stdout(), network.DNSRespAddr))
 	assert.Check(t, is.Contains(res.Stdout(), network.DNSRespAddr))
 }
 }
+
+// TestNslookupWindows checks that nslookup gets results from external DNS.
+// Regression test for https://github.com/moby/moby/issues/46792
+func TestNslookupWindows(t *testing.T) {
+	skip.If(t, testEnv.DaemonInfo.OSType != "windows")
+
+	ctx := setupTest(t)
+	c := testEnv.APIClient()
+
+	attachCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
+	defer cancel()
+	res := container.RunAttach(attachCtx, t, c,
+		container.WithCmd("nslookup", "docker.com"),
+	)
+	defer c.ContainerRemove(ctx, res.ContainerID, containertypes.RemoveOptions{Force: true})
+
+	assert.Check(t, is.Equal(res.ExitCode, 0))
+	// Current default is to not-forward requests to external servers, which
+	// can only be changed in daemon.json using feature flag "windows-dns-proxy".
+	// So, expect the lookup to fail...
+	assert.Check(t, is.Contains(res.Stderr.String(), "Server failed"))
+	// When the default behaviour is changed, nslookup should succeed...
+	//assert.Check(t, is.Contains(res.Stdout.String(), "Addresses:"))
+}

+ 9 - 0
libnetwork/endpoint.go

@@ -11,6 +11,7 @@ import (
 	"sync"
 	"sync"
 
 
 	"github.com/containerd/log"
 	"github.com/containerd/log"
+	"github.com/docker/docker/errdefs"
 	"github.com/docker/docker/internal/sliceutil"
 	"github.com/docker/docker/internal/sliceutil"
 	"github.com/docker/docker/libnetwork/datastore"
 	"github.com/docker/docker/libnetwork/datastore"
 	"github.com/docker/docker/libnetwork/ipamapi"
 	"github.com/docker/docker/libnetwork/ipamapi"
@@ -543,6 +544,10 @@ func (ep *Endpoint) sbJoin(sb *Sandbox, options ...EndpointOption) (err error) {
 		return err
 		return err
 	}
 	}
 
 
+	if err = addEpToResolver(context.TODO(), n.Name(), ep.Name(), &sb.config, ep.iface, n.Resolvers()); err != nil {
+		return errdefs.System(err)
+	}
+
 	if err = n.getController().updateToStore(ep); err != nil {
 	if err = n.getController().updateToStore(ep); err != nil {
 		return err
 		return err
 	}
 	}
@@ -745,6 +750,10 @@ func (ep *Endpoint) sbLeave(sb *Sandbox, force bool) error {
 		log.G(context.TODO()).Warnf("Failed to clean up service info on container %s disconnect: %v", ep.name, err)
 		log.G(context.TODO()).Warnf("Failed to clean up service info on container %s disconnect: %v", ep.name, err)
 	}
 	}
 
 
+	if err := deleteEpFromResolver(ep.Name(), ep.iface, n.Resolvers()); err != nil {
+		log.G(context.TODO()).Warnf("Failed to clean up resolver info on container %s disconnect: %v", ep.name, err)
+	}
+
 	if err := sb.clearNetworkResources(ep); err != nil {
 	if err := sb.clearNetworkResources(ep); err != nil {
 		log.G(context.TODO()).Warnf("Failed to clean up network resources on container %s disconnect: %v", ep.name, err)
 		log.G(context.TODO()).Warnf("Failed to clean up network resources on container %s disconnect: %v", ep.name, err)
 	}
 	}

+ 8 - 5
libnetwork/network.go

@@ -192,7 +192,6 @@ type Network struct {
 	dbExists         bool
 	dbExists         bool
 	persist          bool
 	persist          bool
 	drvOnce          *sync.Once
 	drvOnce          *sync.Once
-	resolverOnce     sync.Once //nolint:nolintlint,unused // only used on windows
 	resolver         []*Resolver
 	resolver         []*Resolver
 	internal         bool
 	internal         bool
 	attachable       bool
 	attachable       bool
@@ -204,6 +203,7 @@ type Network struct {
 	configFrom       string
 	configFrom       string
 	loadBalancerIP   net.IP
 	loadBalancerIP   net.IP
 	loadBalancerMode string
 	loadBalancerMode string
+	platformNetwork  //nolint:nolintlint,unused // only populated on windows
 	mu               sync.Mutex
 	mu               sync.Mutex
 }
 }
 
 
@@ -244,6 +244,13 @@ func (n *Network) Type() string {
 	return n.networkType
 	return n.networkType
 }
 }
 
 
+func (n *Network) Resolvers() []*Resolver {
+	n.mu.Lock()
+	defer n.mu.Unlock()
+
+	return n.resolver
+}
+
 func (n *Network) Key() []string {
 func (n *Network) Key() []string {
 	n.mu.Lock()
 	n.mu.Lock()
 	defer n.mu.Unlock()
 	defer n.mu.Unlock()
@@ -2097,10 +2104,6 @@ func (n *Network) ResolveService(ctx context.Context, name string) ([]*net.SRV,
 	return srv, ip
 	return srv, ip
 }
 }
 
 
-func (n *Network) ExecFunc(f func()) error {
-	return types.NotImplementedErrorf("ExecFunc not supported by network")
-}
-
 func (n *Network) NdotsSet() bool {
 func (n *Network) NdotsSet() bool {
 	return false
 	return false
 }
 }

+ 21 - 1
libnetwork/network_unix.go

@@ -2,13 +2,33 @@
 
 
 package libnetwork
 package libnetwork
 
 
-import "github.com/docker/docker/libnetwork/ipamapi"
+import (
+	"context"
+
+	"github.com/docker/docker/libnetwork/ipamapi"
+)
+
+type platformNetwork struct{} //nolint:nolintlint,unused // only populated on windows
 
 
 // Stub implementations for DNS related functions
 // Stub implementations for DNS related functions
 
 
 func (n *Network) startResolver() {
 func (n *Network) startResolver() {
 }
 }
 
 
+func addEpToResolver(
+	ctx context.Context,
+	netName, epName string,
+	config *containerConfig,
+	epIface *EndpointInterface,
+	resolvers []*Resolver,
+) error {
+	return nil
+}
+
+func deleteEpFromResolver(epName string, epIface *EndpointInterface, resolvers []*Resolver) error {
+	return nil
+}
+
 func defaultIpamForNetworkType(networkType string) string {
 func defaultIpamForNetworkType(networkType string) string {
 	return ipamapi.DefaultIPAM
 	return ipamapi.DefaultIPAM
 }
 }

+ 179 - 2
libnetwork/network_windows.go

@@ -4,7 +4,12 @@ package libnetwork
 
 
 import (
 import (
 	"context"
 	"context"
+	"fmt"
+	"net"
+	"net/netip"
 	"runtime"
 	"runtime"
+	"strings"
+	"sync"
 	"time"
 	"time"
 
 
 	"github.com/Microsoft/hcsshim"
 	"github.com/Microsoft/hcsshim"
@@ -12,8 +17,14 @@ import (
 	"github.com/docker/docker/libnetwork/drivers/windows"
 	"github.com/docker/docker/libnetwork/drivers/windows"
 	"github.com/docker/docker/libnetwork/ipamapi"
 	"github.com/docker/docker/libnetwork/ipamapi"
 	"github.com/docker/docker/libnetwork/ipams/windowsipam"
 	"github.com/docker/docker/libnetwork/ipams/windowsipam"
+	"github.com/pkg/errors"
 )
 )
 
 
+type platformNetwork struct {
+	resolverOnce   sync.Once
+	dnsCompartment uint32
+}
+
 func executeInCompartment(compartmentID uint32, x func()) {
 func executeInCompartment(compartmentID uint32, x func()) {
 	runtime.LockOSThread()
 	runtime.LockOSThread()
 
 
@@ -28,6 +39,11 @@ func executeInCompartment(compartmentID uint32, x func()) {
 	x()
 	x()
 }
 }
 
 
+func (n *Network) ExecFunc(f func()) error {
+	executeInCompartment(n.dnsCompartment, f)
+	return nil
+}
+
 func (n *Network) startResolver() {
 func (n *Network) startResolver() {
 	if n.networkType == "ics" {
 	if n.networkType == "ics" {
 		return
 		return
@@ -48,9 +64,10 @@ func (n *Network) startResolver() {
 		for _, subnet := range hnsresponse.Subnets {
 		for _, subnet := range hnsresponse.Subnets {
 			if subnet.GatewayAddress != "" {
 			if subnet.GatewayAddress != "" {
 				for i := 0; i < 3; i++ {
 				for i := 0; i < 3; i++ {
-					resolver := NewResolver(subnet.GatewayAddress, false, n)
+					resolver := NewResolver(subnet.GatewayAddress, true, n)
 					log.G(context.TODO()).Debugf("Binding a resolver on network %s gateway %s", n.Name(), subnet.GatewayAddress)
 					log.G(context.TODO()).Debugf("Binding a resolver on network %s gateway %s", n.Name(), subnet.GatewayAddress)
-					executeInCompartment(hnsresponse.DNSServerCompartment, resolver.SetupFunc(53))
+					n.dnsCompartment = hnsresponse.DNSServerCompartment
+					n.ExecFunc(resolver.SetupFunc(53))
 
 
 					if err = resolver.Start(); err != nil {
 					if err = resolver.Start(); err != nil {
 						log.G(context.TODO()).Errorf("Resolver Setup/Start failed for container %s, %q", n.Name(), err)
 						log.G(context.TODO()).Errorf("Resolver Setup/Start failed for container %s, %q", n.Name(), err)
@@ -66,6 +83,166 @@ func (n *Network) startResolver() {
 	})
 	})
 }
 }
 
 
+// addEpToResolver configures the internal DNS resolver for an endpoint.
+//
+// Windows resolvers don't consistently fall back to a secondary server if they
+// get a SERVFAIL from our resolver. So, our resolver needs to forward the query
+// upstream.
+//
+// To retrieve the list of DNS Servers to use for requests originating from an
+// endpoint, this method finds the HNSEndpoint represented by the endpoint. If
+// HNSEndpoint's list of DNS servers includes the HNSEndpoint's gateway address,
+// it's the Resolver running at that address. Other DNS servers in the
+// list have either come from config ('--dns') or have been set up by HNS as
+// external resolvers, these are the external servers the Resolver should
+// use for DNS requests from that endpoint.
+func addEpToResolver(
+	ctx context.Context,
+	netName, epName string,
+	config *containerConfig,
+	epIface *EndpointInterface,
+	resolvers []*Resolver,
+) error {
+	if config.dnsNoProxy {
+		return nil
+	}
+	hnsEndpoints, err := hcsshim.HNSListEndpointRequest()
+	if err != nil {
+		return nil
+	}
+	return addEpToResolverImpl(ctx, netName, epName, epIface, resolvers, hnsEndpoints)
+}
+
+func addEpToResolverImpl(
+	ctx context.Context,
+	netName, epName string,
+	epIface *EndpointInterface,
+	resolvers []*Resolver,
+	hnsEndpoints []hcsshim.HNSEndpoint,
+) error {
+	// Find the HNSEndpoint represented by ep, matching on endpoint address.
+	hnsEp := findHNSEp(epIface.addr, epIface.addrv6, hnsEndpoints)
+	if hnsEp == nil || !hnsEp.EnableInternalDNS {
+		return nil
+	}
+
+	// Find the resolver for that HNSEndpoint, matching on gateway address.
+	resolver := findResolver(resolvers, hnsEp.GatewayAddress, hnsEp.GatewayAddressV6)
+	if resolver == nil {
+		log.G(ctx).WithFields(log.Fields{
+			"network":  netName,
+			"endpoint": epName,
+		}).Debug("No internal DNS resolver to configure")
+		return nil
+	}
+
+	// Get the list of DNS servers HNS has set up for this Endpoint.
+	var dnsList []extDNSEntry
+	dnsServers := strings.Split(hnsEp.DNSServerList, ",")
+
+	// Create an extDNSEntry for each DNS server, apart from 'resolver' itself.
+	var foundSelf bool
+	hnsGw4, _ := netip.ParseAddr(hnsEp.GatewayAddress)
+	hnsGw6, _ := netip.ParseAddr(hnsEp.GatewayAddressV6)
+	for _, dnsServer := range dnsServers {
+		dnsAddr, _ := netip.ParseAddr(dnsServer)
+		if dnsAddr.IsValid() && (dnsAddr == hnsGw4 || dnsAddr == hnsGw6) {
+			foundSelf = true
+		} else {
+			dnsList = append(dnsList, extDNSEntry{IPStr: dnsServer})
+		}
+	}
+	if !foundSelf {
+		log.G(ctx).WithFields(log.Fields{
+			"network":  netName,
+			"endpoint": epName,
+		}).Debug("Endpoint is not configured to use internal DNS resolver")
+		return nil
+	}
+
+	// If the internal resolver is configured as one of this endpoint's DNS servers,
+	// tell it which ext servers to use for requests from this endpoint's addresses.
+	log.G(ctx).Infof("External DNS servers for '%s': %v", epName, dnsList)
+	if srcAddr, ok := netip.AddrFromSlice(hnsEp.IPAddress); ok {
+		if err := resolver.SetExtServersForSrc(srcAddr.Unmap(), dnsList); err != nil {
+			return errors.Wrapf(err, "failed to set external DNS servers for %s address %s",
+				epName, hnsEp.IPAddress)
+		}
+	}
+	if srcAddr, ok := netip.AddrFromSlice(hnsEp.IPv6Address); ok {
+		if err := resolver.SetExtServersForSrc(srcAddr, dnsList); err != nil {
+			return errors.Wrapf(err, "failed to set external DNS servers for %s address %s",
+				epName, hnsEp.IPv6Address)
+		}
+	}
+	return nil
+}
+
+func deleteEpFromResolver(epName string, epIface *EndpointInterface, resolvers []*Resolver) error {
+	hnsEndpoints, err := hcsshim.HNSListEndpointRequest()
+	if err != nil {
+		return nil
+	}
+	return deleteEpFromResolverImpl(epName, epIface, resolvers, hnsEndpoints)
+}
+
+func deleteEpFromResolverImpl(
+	epName string,
+	epIface *EndpointInterface,
+	resolvers []*Resolver,
+	hnsEndpoints []hcsshim.HNSEndpoint,
+) error {
+	// Find the HNSEndpoint represented by ep, matching on endpoint address.
+	hnsEp := findHNSEp(epIface.addr, epIface.addrv6, hnsEndpoints)
+	if hnsEp == nil {
+		return fmt.Errorf("no HNS endpoint for %s", epName)
+	}
+
+	// Find the resolver for that HNSEndpoint, matching on gateway address.
+	resolver := findResolver(resolvers, hnsEp.GatewayAddress, hnsEp.GatewayAddressV6)
+	if resolver == nil {
+		return nil
+	}
+
+	// Delete external DNS servers for the endpoint's IP addresses.
+	if srcAddr, ok := netip.AddrFromSlice(hnsEp.IPAddress); ok {
+		if err := resolver.SetExtServersForSrc(srcAddr.Unmap(), nil); err != nil {
+			return errors.Wrapf(err, "failed to delete external DNS servers for %s address %s",
+				epName, hnsEp.IPv6Address)
+		}
+	}
+	if srcAddr, ok := netip.AddrFromSlice(hnsEp.IPv6Address); ok {
+		if err := resolver.SetExtServersForSrc(srcAddr, nil); err != nil {
+			return errors.Wrapf(err, "failed to delete external DNS servers for %s address %s",
+				epName, hnsEp.IPv6Address)
+		}
+	}
+
+	return nil
+}
+
+func findHNSEp(ip4, ip6 *net.IPNet, hnsEndpoints []hcsshim.HNSEndpoint) *hcsshim.HNSEndpoint {
+	for _, hnsEp := range hnsEndpoints {
+		if (hnsEp.IPAddress != nil && hnsEp.IPAddress.Equal(ip4.IP)) ||
+			(hnsEp.IPv6Address != nil && hnsEp.IPv6Address.Equal(ip6.IP)) {
+			return &hnsEp
+		}
+	}
+	return nil
+}
+
+func findResolver(resolvers []*Resolver, gw4, gw6 string) *Resolver {
+	gw4addr, _ := netip.ParseAddr(gw4)
+	gw6addr, _ := netip.ParseAddr(gw6)
+	for _, resolver := range resolvers {
+		ns := resolver.NameServer()
+		if ns.IsValid() && (ns == gw4addr || ns == gw6addr) {
+			return resolver
+		}
+	}
+	return nil
+}
+
 func defaultIpamForNetworkType(networkType string) string {
 func defaultIpamForNetworkType(networkType string) string {
 	if windows.IsBuiltinLocalDriver(networkType) {
 	if windows.IsBuiltinLocalDriver(networkType) {
 		return windowsipam.DefaultIPAM
 		return windowsipam.DefaultIPAM

+ 201 - 0
libnetwork/network_windows_test.go

@@ -0,0 +1,201 @@
+package libnetwork
+
+import (
+	"context"
+	"fmt"
+	"net"
+	"net/netip"
+	"testing"
+
+	"github.com/Microsoft/hcsshim"
+	"github.com/google/go-cmp/cmp"
+	"github.com/google/go-cmp/cmp/cmpopts"
+	"gotest.tools/v3/assert"
+	is "gotest.tools/v3/assert/cmp"
+)
+
+func TestAddEpToResolver(t *testing.T) {
+	const (
+		ep1v4      = "192.0.2.11"
+		ep2v4      = "192.0.2.12"
+		epFiveDNS  = "192.0.2.13"
+		epNoIntDNS = "192.0.2.14"
+		ep1v6      = "2001:db8:aaaa::2"
+		gw1v4      = "192.0.2.1"
+		gw2v4      = "192.0.2.2"
+		gw1v6      = "2001:db8:aaaa::1"
+		dns1v4     = "198.51.100.1"
+		dns2v4     = "198.51.100.2"
+		dns3v4     = "198.51.100.3"
+	)
+	hnsEndpoints := map[string]hcsshim.HNSEndpoint{
+		ep1v4: {
+			IPAddress:         net.ParseIP(ep1v4),
+			GatewayAddress:    gw1v4,
+			DNSServerList:     gw1v4 + "," + dns1v4,
+			EnableInternalDNS: true,
+		},
+		ep2v4: {
+			IPAddress:         net.ParseIP(ep2v4),
+			GatewayAddress:    gw1v4,
+			DNSServerList:     gw1v4 + "," + dns2v4,
+			EnableInternalDNS: true,
+		},
+		epFiveDNS: {
+			IPAddress:         net.ParseIP(epFiveDNS),
+			GatewayAddress:    gw1v4,
+			DNSServerList:     gw1v4 + "," + dns1v4 + "," + dns2v4 + "," + dns3v4 + ",198.51.100.4",
+			EnableInternalDNS: true,
+		},
+		epNoIntDNS: {
+			IPAddress:      net.ParseIP(epNoIntDNS),
+			GatewayAddress: gw1v4,
+			DNSServerList:  gw1v4 + "," + dns1v4,
+			//EnableInternalDNS: false,
+		},
+		ep1v6: {
+			IPv6Address:       net.ParseIP(ep1v6),
+			GatewayAddressV6:  gw1v6,
+			DNSServerList:     gw1v6 + "," + dns1v4,
+			EnableInternalDNS: true,
+		},
+	}
+
+	makeIPNet := func(addr, netmask string) *net.IPNet {
+		t.Helper()
+		ip, ipnet, err := net.ParseCIDR(addr + "/" + netmask)
+		assert.NilError(t, err)
+		return &net.IPNet{IP: ip, Mask: ipnet.Mask}
+	}
+
+	testcases := []struct {
+		name           string
+		epToAdd        *EndpointInterface
+		hnsEndpoints   []hcsshim.HNSEndpoint
+		resolverLAs    []string
+		expIPToExtDNS  map[netip.Addr][maxExtDNS]extDNSEntry
+		expResolverIdx int
+	}{
+		{
+			name: "ipv4",
+			epToAdd: &EndpointInterface{
+				addr: makeIPNet(ep1v4, "32"),
+			},
+			hnsEndpoints: []hcsshim.HNSEndpoint{
+				hnsEndpoints[ep1v4],
+			},
+			resolverLAs: []string{gw1v4},
+			expIPToExtDNS: map[netip.Addr][maxExtDNS]extDNSEntry{
+				netip.MustParseAddr(ep1v4): {{IPStr: dns1v4}},
+			},
+		},
+		{
+			name: "limit of three dns servers",
+			epToAdd: &EndpointInterface{
+				addr: makeIPNet(epFiveDNS, "32"),
+			},
+			hnsEndpoints: []hcsshim.HNSEndpoint{
+				hnsEndpoints[epFiveDNS],
+			},
+			resolverLAs: []string{gw1v4},
+			// Expect the internal resolver to keep the first three ext-servers.
+			expIPToExtDNS: map[netip.Addr][maxExtDNS]extDNSEntry{
+				netip.MustParseAddr(epFiveDNS): {
+					{IPStr: dns1v4},
+					{IPStr: dns2v4},
+					{IPStr: dns3v4},
+				},
+			},
+		},
+		{
+			name: "disabled internal resolver",
+			epToAdd: &EndpointInterface{
+				addr: makeIPNet(epNoIntDNS, "32"),
+			},
+			hnsEndpoints: []hcsshim.HNSEndpoint{
+				hnsEndpoints[epNoIntDNS],
+				hnsEndpoints[ep2v4],
+			},
+			resolverLAs: []string{gw1v4},
+		},
+		{
+			name: "missing internal resolver",
+			epToAdd: &EndpointInterface{
+				addr: makeIPNet(ep1v4, "32"),
+			},
+			hnsEndpoints: []hcsshim.HNSEndpoint{
+				hnsEndpoints[ep1v4],
+			},
+			// The only resolver is for the gateway on a different network.
+			resolverLAs: []string{gw2v4},
+		},
+		{
+			name: "multiple resolvers and endpoints",
+			epToAdd: &EndpointInterface{
+				addr: makeIPNet(ep2v4, "32"),
+			},
+			hnsEndpoints: []hcsshim.HNSEndpoint{
+				hnsEndpoints[ep1v4],
+				hnsEndpoints[ep2v4],
+			},
+			// Put the internal resolver for this network second in the list.
+			expResolverIdx: 1,
+			resolverLAs:    []string{gw2v4, gw1v4},
+			expIPToExtDNS: map[netip.Addr][maxExtDNS]extDNSEntry{
+				netip.MustParseAddr(ep2v4): {{IPStr: dns2v4}},
+			},
+		},
+		{
+			name: "ipv6",
+			epToAdd: &EndpointInterface{
+				addrv6: makeIPNet(ep1v6, "80"),
+			},
+			hnsEndpoints: []hcsshim.HNSEndpoint{
+				hnsEndpoints[ep1v6],
+			},
+			resolverLAs: []string{gw1v6},
+			expIPToExtDNS: map[netip.Addr][maxExtDNS]extDNSEntry{
+				netip.MustParseAddr(ep1v6): {{IPStr: dns1v4}},
+			},
+		},
+	}
+
+	eMapCmpOpts := []cmp.Option{
+		cmpopts.EquateEmpty(),
+		cmpopts.EquateComparable(netip.Addr{}),
+		cmpopts.IgnoreUnexported(extDNSEntry{}),
+	}
+	emptyEMap := map[netip.Addr][maxExtDNS]extDNSEntry{}
+
+	for _, tc := range testcases {
+		t.Run(tc.name, func(t *testing.T) {
+			// Set up resolvers with the required listen-addresses.
+			var resolvers []*Resolver
+			for _, la := range tc.resolverLAs {
+				resolvers = append(resolvers, NewResolver(la, true, nil))
+			}
+
+			// Add the endpoint and check expected results.
+			err := addEpToResolverImpl(context.TODO(),
+				"netname", "epname", tc.epToAdd, resolvers, tc.hnsEndpoints)
+			assert.Check(t, err)
+			for i, resolver := range resolvers {
+				if i == tc.expResolverIdx {
+					assert.Check(t, is.DeepEqual(resolver.ipToExtDNS.eMap, tc.expIPToExtDNS,
+						eMapCmpOpts...), fmt.Sprintf("resolveridx=%d", i))
+				} else {
+					assert.Check(t, is.DeepEqual(resolver.ipToExtDNS.eMap, emptyEMap,
+						eMapCmpOpts...), fmt.Sprintf("resolveridx=%d", i))
+				}
+			}
+
+			// Delete the endpoint, check nothing got left behind.
+			err = deleteEpFromResolverImpl("epname", tc.epToAdd, resolvers, tc.hnsEndpoints)
+			assert.Check(t, err)
+			for i, resolver := range resolvers {
+				assert.Check(t, is.DeepEqual(resolver.ipToExtDNS.eMap, emptyEMap,
+					eMapCmpOpts...), fmt.Sprintf("resolveridx=%d", i))
+			}
+		})
+	}
+}

+ 74 - 22
libnetwork/resolver.go

@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"fmt"
 	"math/rand"
 	"math/rand"
 	"net"
 	"net"
+	"net/netip"
 	"strconv"
 	"strconv"
 	"strings"
 	"strings"
 	"sync"
 	"sync"
@@ -13,6 +14,7 @@ import (
 	"time"
 	"time"
 
 
 	"github.com/containerd/log"
 	"github.com/containerd/log"
+	"github.com/docker/docker/libnetwork/internal/netiputil"
 	"github.com/docker/docker/libnetwork/types"
 	"github.com/docker/docker/libnetwork/types"
 	"github.com/miekg/dns"
 	"github.com/miekg/dns"
 	"go.opentelemetry.io/otel"
 	"go.opentelemetry.io/otel"
@@ -65,17 +67,25 @@ type extDNSEntry struct {
 	HostLoopback bool
 	HostLoopback bool
 }
 }
 
 
+func (e extDNSEntry) String() string {
+	if e.HostLoopback {
+		return "host(" + e.IPStr + ")"
+	}
+	return e.IPStr
+}
+
 // Resolver is the embedded DNS server in Docker. It operates by listening on
 // Resolver is the embedded DNS server in Docker. It operates by listening on
 // the container's loopback interface for DNS queries.
 // the container's loopback interface for DNS queries.
 type Resolver struct {
 type Resolver struct {
 	backend       DNSBackend
 	backend       DNSBackend
-	extDNSList    [maxExtDNS]extDNSEntry
+	extDNSList    [maxExtDNS]extDNSEntry // Ext servers to use when there's no entry in ipToExtDNS.
+	ipToExtDNS    addrToExtDNSMap        // DNS query source IP -> ext servers.
 	server        *dns.Server
 	server        *dns.Server
 	conn          *net.UDPConn
 	conn          *net.UDPConn
 	tcpServer     *dns.Server
 	tcpServer     *dns.Server
 	tcpListen     *net.TCPListener
 	tcpListen     *net.TCPListener
 	err           error
 	err           error
-	listenAddress string
+	listenAddress netip.Addr
 	proxyDNS      atomic.Bool
 	proxyDNS      atomic.Bool
 	startCh       chan struct{}
 	startCh       chan struct{}
 	logger        *log.Entry
 	logger        *log.Entry
@@ -87,18 +97,45 @@ type Resolver struct {
 // NewResolver creates a new instance of the Resolver
 // NewResolver creates a new instance of the Resolver
 func NewResolver(address string, proxyDNS bool, backend DNSBackend) *Resolver {
 func NewResolver(address string, proxyDNS bool, backend DNSBackend) *Resolver {
 	r := &Resolver{
 	r := &Resolver{
-		backend:       backend,
-		listenAddress: address,
-		err:           fmt.Errorf("setup not done yet"),
-		startCh:       make(chan struct{}, 1),
-		fwdSem:        semaphore.NewWeighted(maxConcurrent),
-		logInverval:   rate.Sometimes{Interval: logInterval},
+		backend:     backend,
+		err:         fmt.Errorf("setup not done yet"),
+		startCh:     make(chan struct{}, 1),
+		fwdSem:      semaphore.NewWeighted(maxConcurrent),
+		logInverval: rate.Sometimes{Interval: logInterval},
 	}
 	}
+	r.listenAddress, _ = netip.ParseAddr(address)
 	r.proxyDNS.Store(proxyDNS)
 	r.proxyDNS.Store(proxyDNS)
 
 
 	return r
 	return r
 }
 }
 
 
+type addrToExtDNSMap struct {
+	mu   sync.Mutex
+	eMap map[netip.Addr][maxExtDNS]extDNSEntry
+}
+
+func (am *addrToExtDNSMap) get(addr netip.Addr) ([maxExtDNS]extDNSEntry, bool) {
+	am.mu.Lock()
+	defer am.mu.Unlock()
+	entries, ok := am.eMap[addr]
+	return entries, ok
+}
+
+func (am *addrToExtDNSMap) set(addr netip.Addr, entries []extDNSEntry) {
+	var e [maxExtDNS]extDNSEntry
+	copy(e[:], entries)
+	am.mu.Lock()
+	defer am.mu.Unlock()
+	if len(entries) > 0 {
+		if am.eMap == nil {
+			am.eMap = map[netip.Addr][maxExtDNS]extDNSEntry{}
+		}
+		am.eMap[addr] = e
+	} else {
+		delete(am.eMap, addr)
+	}
+}
+
 func (r *Resolver) log(ctx context.Context) *log.Entry {
 func (r *Resolver) log(ctx context.Context) *log.Entry {
 	if r.logger == nil {
 	if r.logger == nil {
 		return log.G(ctx)
 		return log.G(ctx)
@@ -108,25 +145,23 @@ func (r *Resolver) log(ctx context.Context) *log.Entry {
 
 
 // SetupFunc returns the setup function that should be run in the container's
 // SetupFunc returns the setup function that should be run in the container's
 // network namespace.
 // network namespace.
-func (r *Resolver) SetupFunc(port int) func() {
+func (r *Resolver) SetupFunc(port uint16) func() {
 	return func() {
 	return func() {
 		var err error
 		var err error
 
 
 		// DNS operates primarily on UDP
 		// DNS operates primarily on UDP
-		r.conn, err = net.ListenUDP("udp", &net.UDPAddr{
-			IP:   net.ParseIP(r.listenAddress),
-			Port: port,
-		})
+		r.conn, err = net.ListenUDP("udp", net.UDPAddrFromAddrPort(
+			netip.AddrPortFrom(r.listenAddress, port)),
+		)
 		if err != nil {
 		if err != nil {
 			r.err = fmt.Errorf("error in opening name server socket %v", err)
 			r.err = fmt.Errorf("error in opening name server socket %v", err)
 			return
 			return
 		}
 		}
 
 
 		// Listen on a TCP as well
 		// Listen on a TCP as well
-		r.tcpListen, err = net.ListenTCP("tcp", &net.TCPAddr{
-			IP:   net.ParseIP(r.listenAddress),
-			Port: port,
-		})
+		r.tcpListen, err = net.ListenTCP("tcp", net.TCPAddrFromAddrPort(
+			netip.AddrPortFrom(r.listenAddress, port)),
+		)
 		if err != nil {
 		if err != nil {
 			r.err = fmt.Errorf("error in opening name TCP server socket %v", err)
 			r.err = fmt.Errorf("error in opening name TCP server socket %v", err)
 			return
 			return
@@ -186,7 +221,8 @@ func (r *Resolver) Stop() {
 }
 }
 
 
 // SetExtServers configures the external nameservers the resolver should use
 // SetExtServers configures the external nameservers the resolver should use
-// when forwarding queries.
+// when forwarding queries, unless SetExtServersForSrc has configured servers
+// for the DNS client making the request.
 func (r *Resolver) SetExtServers(extDNS []extDNSEntry) {
 func (r *Resolver) SetExtServers(extDNS []extDNSEntry) {
 	l := len(extDNS)
 	l := len(extDNS)
 	if l > maxExtDNS {
 	if l > maxExtDNS {
@@ -203,8 +239,17 @@ func (r *Resolver) SetForwardingPolicy(policy bool) {
 	r.proxyDNS.Store(policy)
 	r.proxyDNS.Store(policy)
 }
 }
 
 
+// SetExtServersForSrc configures the external nameservers the resolver should
+// use when forwarding queries from srcAddr. If set, these servers will be used
+// in preference to servers set by SetExtServers. Supplying a nil or empty extDNS
+// deletes nameservers for srcAddr.
+func (r *Resolver) SetExtServersForSrc(srcAddr netip.Addr, extDNS []extDNSEntry) error {
+	r.ipToExtDNS.set(srcAddr, extDNS)
+	return nil
+}
+
 // NameServer returns the IP of the DNS resolver for the containers.
 // NameServer returns the IP of the DNS resolver for the containers.
-func (r *Resolver) NameServer() string {
+func (r *Resolver) NameServer() netip.Addr {
 	return r.listenAddress
 	return r.listenAddress
 }
 }
 
 
@@ -439,7 +484,7 @@ func (r *Resolver) serveDNS(w dns.ResponseWriter, query *dns.Msg) {
 			!strings.Contains(strings.TrimSuffix(queryName, "."), ".") {
 			!strings.Contains(strings.TrimSuffix(queryName, "."), ".") {
 			resp = createRespMsg(query)
 			resp = createRespMsg(query)
 		} else {
 		} else {
-			resp = r.forwardExtDNS(ctx, w.LocalAddr().Network(), query)
+			resp = r.forwardExtDNS(ctx, w.LocalAddr().Network(), w.RemoteAddr(), query)
 		}
 		}
 	}
 	}
 
 
@@ -481,11 +526,11 @@ func (r *Resolver) dialExtDNS(proto string, server extDNSEntry) (net.Conn, error
 	return extConn, nil
 	return extConn, nil
 }
 }
 
 
-func (r *Resolver) forwardExtDNS(ctx context.Context, proto string, query *dns.Msg) *dns.Msg {
+func (r *Resolver) forwardExtDNS(ctx context.Context, proto string, remoteAddr net.Addr, query *dns.Msg) *dns.Msg {
 	ctx, span := otel.Tracer("").Start(ctx, "resolver.forwardExtDNS")
 	ctx, span := otel.Tracer("").Start(ctx, "resolver.forwardExtDNS")
 	defer span.End()
 	defer span.End()
 
 
-	for _, extDNS := range r.extDNSList {
+	for _, extDNS := range r.extDNS(netiputil.AddrPortFromNet(remoteAddr)) {
 		if extDNS.IPStr == "" {
 		if extDNS.IPStr == "" {
 			break
 			break
 		}
 		}
@@ -548,6 +593,13 @@ func (r *Resolver) forwardExtDNS(ctx context.Context, proto string, query *dns.M
 	return nil
 	return nil
 }
 }
 
 
+func (r *Resolver) extDNS(remoteAddr netip.AddrPort) []extDNSEntry {
+	if res, ok := r.ipToExtDNS.get(remoteAddr.Addr()); ok {
+		return res[:]
+	}
+	return r.extDNSList[:]
+}
+
 func (r *Resolver) exchange(ctx context.Context, proto string, extDNS extDNSEntry, query *dns.Msg) *dns.Msg {
 func (r *Resolver) exchange(ctx context.Context, proto string, extDNS extDNSEntry, query *dns.Msg) *dns.Msg {
 	ctx, span := otel.Tracer("").Start(ctx, "resolver.exchange", trace.WithAttributes(
 	ctx, span := otel.Tracer("").Start(ctx, "resolver.exchange", trace.WithAttributes(
 		attribute.String("libnet.resolver.upstream.proto", proto),
 		attribute.String("libnet.resolver.upstream.proto", proto),

+ 1 - 0
libnetwork/sandbox.go

@@ -92,6 +92,7 @@ type resolvConfPathConfig struct {
 }
 }
 
 
 type containerConfig struct {
 type containerConfig struct {
+	containerConfigOS //nolint:nolintlint,unused // only populated on windows
 	hostsPathConfig
 	hostsPathConfig
 	resolvConfPathConfig
 	resolvConfPathConfig
 	generic           map[string]interface{}
 	generic           map[string]interface{}

+ 4 - 3
libnetwork/sandbox_dns_unix.go

@@ -4,6 +4,7 @@ package libnetwork
 
 
 import (
 import (
 	"context"
 	"context"
+	"fmt"
 	"io/fs"
 	"io/fs"
 	"net/netip"
 	"net/netip"
 	"os"
 	"os"
@@ -343,9 +344,9 @@ func (sb *Sandbox) rebuildDNS() error {
 		}
 		}
 	}
 	}
 
 
-	intNS, err := netip.ParseAddr(sb.resolver.NameServer())
-	if err != nil {
-		return err
+	intNS := sb.resolver.NameServer()
+	if !intNS.IsValid() {
+		return fmt.Errorf("no listen-address for internal resolver")
 	}
 	}
 
 
 	// Work out whether ndots has been set from host config or overrides.
 	// Work out whether ndots has been set from host config or overrides.

+ 3 - 0
libnetwork/sandbox_linux.go

@@ -12,6 +12,9 @@ import (
 	"github.com/docker/docker/libnetwork/types"
 	"github.com/docker/docker/libnetwork/types"
 )
 )
 
 
+// Linux-specific container configuration flags.
+type containerConfigOS struct{} //nolint:nolintlint,unused // only populated on windows
+
 func releaseOSSboxResources(ns *osl.Namespace, ep *Endpoint) {
 func releaseOSSboxResources(ns *osl.Namespace, ep *Endpoint) {
 	for _, i := range ns.Interfaces() {
 	for _, i := range ns.Interfaces() {
 		// Only remove the interfaces owned by this endpoint from the sandbox.
 		// Only remove the interfaces owned by this endpoint from the sandbox.

+ 7 - 0
libnetwork/sandbox_options_windows.go

@@ -0,0 +1,7 @@
+package libnetwork
+
+func OptionDNSNoProxy() SandboxOption {
+	return func(sb *Sandbox) {
+		sb.config.dnsNoProxy = true
+	}
+}

+ 5 - 2
libnetwork/sandbox_unsupported.go → libnetwork/sandbox_windows.go

@@ -1,9 +1,12 @@
-//go:build !linux
-
 package libnetwork
 package libnetwork
 
 
 import "github.com/docker/docker/libnetwork/osl"
 import "github.com/docker/docker/libnetwork/osl"
 
 
+// Windows-specific container configuration flags.
+type containerConfigOS struct {
+	dnsNoProxy bool
+}
+
 func releaseOSSboxResources(*osl.Namespace, *Endpoint) {}
 func releaseOSSboxResources(*osl.Namespace, *Endpoint) {}
 
 
 func (sb *Sandbox) updateGateway(*Endpoint) error {
 func (sb *Sandbox) updateGateway(*Endpoint) error {