Quellcode durchsuchen

Merge pull request #44742 from akerouanton/fix-44688

Clear conntrack entries for published UDP ports
Bjorn Neergaard vor 2 Jahren
Ursprung
Commit
e9275392c1

+ 7 - 10
libnetwork/drivers/bridge/bridge.go

@@ -1328,10 +1328,9 @@ func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string
 		}
 	}()
 
-	// Clean the connection tracker state of the host for the
-	// specific endpoint. This is needed because some flows may be
-	// bound to the local proxy and won't bre redirect to the new endpoints.
-	clearEndpointConnections(d.nlh, endpoint)
+	// Clean the connection tracker state of the host for the specific endpoint. This is needed because some flows may
+	// be bound to the local proxy, or to the host (for UDP packets), and won't be redirected to the new endpoints.
+	clearConntrackEntries(d.nlh, endpoint)
 
 	if err = d.storeUpdate(endpoint); err != nil {
 		return fmt.Errorf("failed to update bridge endpoint %.7s to store: %v", endpoint.id, err)
@@ -1366,12 +1365,10 @@ func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
 
 	endpoint.portMapping = nil
 
-	// Clean the connection tracker state of the host for the specific endpoint
-	// The host kernel keeps track of the connections (TCP and UDP), so if a new endpoint gets the same IP of
-	// this one (that is going down), is possible that some of the packets would not be routed correctly inside
-	// the new endpoint
-	// Deeper details: https://github.com/docker/docker/issues/8795
-	clearEndpointConnections(d.nlh, endpoint)
+	// Clean the connection tracker state of the host for the specific endpoint. This is a precautionary measure to
+	// avoid new endpoints getting the same IP address to receive unexpected packets due to bad conntrack state leading
+	// to bad NATing.
+	clearConntrackEntries(d.nlh, endpoint)
 
 	if err = d.storeUpdate(endpoint); err != nil {
 		return fmt.Errorf("failed to update bridge endpoint %.7s to store: %v", endpoint.id, err)

+ 23 - 1
libnetwork/drivers/bridge/setup_ip_tables.go

@@ -9,6 +9,7 @@ import (
 	"net"
 
 	"github.com/docker/docker/libnetwork/iptables"
+	"github.com/docker/docker/libnetwork/types"
 	"github.com/sirupsen/logrus"
 	"github.com/vishvananda/netlink"
 )
@@ -418,14 +419,35 @@ func setupInternalNetworkRules(bridgeIface string, addr *net.IPNet, icc, insert
 	return setIcc(version, bridgeIface, icc, insert)
 }
 
-func clearEndpointConnections(nlh *netlink.Handle, ep *bridgeEndpoint) {
+// clearConntrackEntries flushes conntrack entries matching endpoint IP address
+// or matching one of the exposed UDP port.
+// In the first case, this could happen if packets were received by the host
+// between userland proxy startup and iptables setup.
+// In the latter case, this could happen if packets were received whereas there
+// were nowhere to route them, as netfilter creates entries in such case.
+// This is required because iptables NAT rules are evaluated by netfilter only
+// when creating a new conntrack entry. When Docker latter adds NAT rules,
+// netfilter ignore them for any packet matching a pre-existing conntrack entry.
+// As such, we need to flush all those conntrack entries to make sure NAT rules
+// are correctly applied to all packets.
+// See: #8795, #44688 & #44742.
+func clearConntrackEntries(nlh *netlink.Handle, ep *bridgeEndpoint) {
 	var ipv4List []net.IP
 	var ipv6List []net.IP
+	var udpPorts []uint16
+
 	if ep.addr != nil {
 		ipv4List = append(ipv4List, ep.addr.IP)
 	}
 	if ep.addrv6 != nil {
 		ipv6List = append(ipv6List, ep.addrv6.IP)
 	}
+	for _, pb := range ep.portMapping {
+		if pb.Proto == types.UDP {
+			udpPorts = append(udpPorts, pb.HostPort)
+		}
+	}
+
 	iptables.DeleteConntrackEntries(nlh, ipv4List, ipv6List)
+	iptables.DeleteConntrackEntriesByPort(nlh, types.UDP, udpPorts)
 }

+ 37 - 0
libnetwork/iptables/conntrack.go

@@ -8,6 +8,7 @@ import (
 	"net"
 	"syscall"
 
+	"github.com/docker/docker/libnetwork/types"
 	"github.com/sirupsen/logrus"
 	"github.com/vishvananda/netlink"
 )
@@ -53,6 +54,42 @@ func DeleteConntrackEntries(nlh *netlink.Handle, ipv4List []net.IP, ipv6List []n
 	return totalIPv4FlowPurged, totalIPv6FlowPurged, nil
 }
 
+func DeleteConntrackEntriesByPort(nlh *netlink.Handle, proto types.Protocol, ports []uint16) error {
+	if !IsConntrackProgrammable(nlh) {
+		return ErrConntrackNotConfigurable
+	}
+
+	var totalIPv4FlowPurged uint
+	var totalIPv6FlowPurged uint
+
+	for _, port := range ports {
+		filter := &netlink.ConntrackFilter{}
+		if err := filter.AddProtocol(uint8(proto)); err != nil {
+			logrus.Warnf("Failed to delete conntrack state for %s port %d: %v", proto.String(), port, err)
+			continue
+		}
+		if err := filter.AddPort(netlink.ConntrackOrigDstPort, port); err != nil {
+			logrus.Warnf("Failed to delete conntrack state for %s port %d: %v", proto.String(), port, err)
+			continue
+		}
+
+		v4FlowPurged, err := nlh.ConntrackDeleteFilter(netlink.ConntrackTable, syscall.AF_INET, filter)
+		if err != nil {
+			logrus.Warnf("Failed to delete conntrack state for IPv4 %s port %d: %v", proto.String(), port, err)
+		}
+		totalIPv4FlowPurged += v4FlowPurged
+
+		v6FlowPurged, err := nlh.ConntrackDeleteFilter(netlink.ConntrackTable, syscall.AF_INET6, filter)
+		if err != nil {
+			logrus.Warnf("Failed to delete conntrack state for IPv6 %s port %d: %v", proto.String(), port, err)
+		}
+		totalIPv6FlowPurged += v6FlowPurged
+	}
+
+	logrus.Debugf("DeleteConntrackEntriesByPort for %s ports purged ipv4:%d, ipv6:%d", proto.String(), totalIPv4FlowPurged, totalIPv6FlowPurged)
+	return nil
+}
+
 func purgeConntrackState(nlh *netlink.Handle, family netlink.InetFamily, ipAddress net.IP) (uint, error) {
 	filter := &netlink.ConntrackFilter{}
 	// NOTE: doing the flush using the ipAddress is safe because today there cannot be multiple networks with the same subnet