Browse Source

libnet/d/bridge: Don't set container's gateway when network is internal

So far, internal networks were only isolated from the host by iptables
DROP rules. As a consequence, outbound connections from containers would
timeout instead of being "rejected" through an immediate ICMP dest/port
unreachable, a TCP RST or a failing `connect` syscall.

This was visible when internal containers were trying to resolve a
domain that don't match any container on the same network (be it a truly
"external" domain, or a container that don't exist/is dead). In that
case, the embedded resolver would try to forward DNS queries for the
different values of resolv.conf `search` option, making DNS resolution
slow to return an error, and the slowness being exacerbated by some libc
implementations.

This change makes `connect` syscall to return ENETUNREACH, and thus
solves the broader issue of failing fast when external connections are
attempted.

Signed-off-by: Albin Kerouanton <albinker@gmail.com>
Albin Kerouanton 1 year ago
parent
commit
cbc2a71c27

+ 2 - 1
integration-cli/docker_cli_network_unix_test.go

@@ -31,6 +31,7 @@ import (
 	"github.com/vishvananda/netlink"
 	"golang.org/x/sys/unix"
 	"gotest.tools/v3/assert"
+	is "gotest.tools/v3/assert/cmp"
 	"gotest.tools/v3/icmd"
 )
 
@@ -1611,7 +1612,7 @@ func (s *DockerCLINetworkSuite) TestDockerNetworkInternalMode(c *testing.T) {
 	assert.Assert(c, waitRun("second") == nil)
 	out, _, err := dockerCmdWithError("exec", "first", "ping", "-W", "4", "-c", "1", "8.8.8.8")
 	assert.ErrorContains(c, err, "")
-	assert.Assert(c, strings.Contains(out, "100% packet loss"))
+	assert.Assert(c, is.Contains(out, "Network is unreachable"))
 	_, _, err = dockerCmdWithError("exec", "second", "ping", "-c", "1", "first")
 	assert.NilError(c, err)
 }

+ 9 - 7
libnetwork/drivers/bridge/bridge_linux.go

@@ -1223,14 +1223,16 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
 		return err
 	}
 
-	err = jinfo.SetGateway(network.bridge.gatewayIPv4)
-	if err != nil {
-		return err
-	}
+	if !network.config.Internal {
+		err = jinfo.SetGateway(network.bridge.gatewayIPv4)
+		if err != nil {
+			return err
+		}
 
-	err = jinfo.SetGatewayIPv6(network.bridge.gatewayIPv6)
-	if err != nil {
-		return err
+		err = jinfo.SetGatewayIPv6(network.bridge.gatewayIPv6)
+		if err != nil {
+			return err
+		}
 	}
 
 	return nil

+ 12 - 2
libnetwork/drivers/bridge/setup_ipv4_linux.go

@@ -28,6 +28,14 @@ func selectIPv4Address(addresses []netlink.Addr, selector *net.IPNet) (netlink.A
 }
 
 func setupBridgeIPv4(config *networkConfiguration, i *bridgeInterface) error {
+	// TODO(aker): the bridge driver panics if its bridgeIPv4 field isn't set. Once bridge subnet and bridge IP address
+	//             are decoupled, we should assign it only when it's really needed.
+	i.bridgeIPv4 = config.AddressIPv4
+
+	if config.Internal {
+		return nil
+	}
+
 	if !config.InhibitIPv4 {
 		addrv4List, _, err := i.addresses()
 		if err != nil {
@@ -49,8 +57,7 @@ func setupBridgeIPv4(config *networkConfiguration, i *bridgeInterface) error {
 		}
 	}
 
-	// Store bridge network and default gateway
-	i.bridgeIPv4 = config.AddressIPv4
+	// Store the default gateway
 	i.gatewayIPv4 = config.AddressIPv4.IP
 
 	return nil
@@ -60,6 +67,9 @@ func setupGatewayIPv4(config *networkConfiguration, i *bridgeInterface) error {
 	if !i.bridgeIPv4.Contains(config.DefaultGatewayIPv4) {
 		return &ErrInvalidGateway{}
 	}
+	if config.Internal {
+		return types.InvalidParameterErrorf("no gateway can be set on an internal bridge network")
+	}
 
 	// Store requested default gateway
 	i.gatewayIPv4 = config.DefaultGatewayIPv4

+ 5 - 3
libnetwork/drivers/bridge/setup_verify_linux.go

@@ -11,6 +11,8 @@ import (
 	"github.com/vishvananda/netlink"
 )
 
+// setupVerifyAndReconcile checks what IP addresses the given i interface has and ensures that they match the passed
+// network config. It also removes any extra unicast IPv6 addresses found.
 func setupVerifyAndReconcile(config *networkConfiguration, i *bridgeInterface) error {
 	// Fetch a slice of IPv4 addresses and a slice of IPv6 addresses from the bridge.
 	addrsv4, addrsv6, err := i.addresses()
@@ -21,18 +23,18 @@ func setupVerifyAndReconcile(config *networkConfiguration, i *bridgeInterface) e
 	addrv4, _ := selectIPv4Address(addrsv4, config.AddressIPv4)
 
 	// Verify that the bridge does have an IPv4 address.
-	if addrv4.IPNet == nil {
+	if !config.Internal && addrv4.IPNet == nil {
 		return &ErrNoIPAddr{}
 	}
 
 	// Verify that the bridge IPv4 address matches the requested configuration.
-	if config.AddressIPv4 != nil && !addrv4.IP.Equal(config.AddressIPv4.IP) {
+	if config.AddressIPv4 != nil && addrv4.IPNet != nil && !addrv4.IP.Equal(config.AddressIPv4.IP) {
 		return &IPv4AddrNoMatchError{IP: addrv4.IP, CfgIP: config.AddressIPv4.IP}
 	}
 
 	// Verify that one of the bridge IPv6 addresses matches the requested
 	// configuration.
-	if config.EnableIPv6 && !findIPv6Address(netlink.Addr{IPNet: bridgeIPv6}, addrsv6) {
+	if config.EnableIPv6 && !config.Internal && !findIPv6Address(netlink.Addr{IPNet: bridgeIPv6}, addrsv6) {
 		return (*IPv6AddrNoMatchError)(bridgeIPv6)
 	}
 

+ 3 - 0
libnetwork/sandbox_dns_unix.go

@@ -30,6 +30,9 @@ const (
 func (sb *Sandbox) startResolver(restore bool) {
 	sb.resolverOnce.Do(func() {
 		var err error
+		// The embedded resolver is always started with proxyDNS set as true, even when the sandbox is only attached to
+		// an internal network. This way, it's the driver responsibility to make sure `connect` syscall fails fast when
+		// no external connectivity is available (eg. by not setting a default gateway).
 		sb.resolver = NewResolver(resolverIPSandbox, true, sb)
 		defer func() {
 			if err != nil {