Merge pull request #44752 from corhere/backport-23.0/fix-44688

[23.0 backport] Clear conntrack entries for published UDP ports
This commit is contained in:
Cory Snider 2023-01-05 15:29:27 -05:00 committed by GitHub
commit 7b692e1041
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 67 additions and 11 deletions

View file

@ -1352,10 +1352,9 @@ func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string
}
}()
// Clean the connection tracker state of the host for the
// specific endpoint. This is needed because some flows may be
// bound to the local proxy and won't bre redirect to the new endpoints.
clearEndpointConnections(d.nlh, endpoint)
// Clean the connection tracker state of the host for the specific endpoint. This is needed because some flows may
// be bound to the local proxy, or to the host (for UDP packets), and won't be redirected to the new endpoints.
clearConntrackEntries(d.nlh, endpoint)
if err = d.storeUpdate(endpoint); err != nil {
return fmt.Errorf("failed to update bridge endpoint %.7s to store: %v", endpoint.id, err)
@ -1392,12 +1391,10 @@ func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
endpoint.portMapping = nil
// Clean the connection tracker state of the host for the specific endpoint
// The host kernel keeps track of the connections (TCP and UDP), so if a new endpoint gets the same IP of
// this one (that is going down), is possible that some of the packets would not be routed correctly inside
// the new endpoint
// Deeper details: https://github.com/docker/docker/issues/8795
clearEndpointConnections(d.nlh, endpoint)
// Clean the connection tracker state of the host for the specific endpoint. This is a precautionary measure to
// avoid new endpoints getting the same IP address to receive unexpected packets due to bad conntrack state leading
// to bad NATing.
clearConntrackEntries(d.nlh, endpoint)
if err = d.storeUpdate(endpoint); err != nil {
return fmt.Errorf("failed to update bridge endpoint %.7s to store: %v", endpoint.id, err)

View file

@ -9,6 +9,7 @@ import (
"net"
"github.com/docker/docker/libnetwork/iptables"
"github.com/docker/docker/libnetwork/types"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
)
@ -419,14 +420,35 @@ func setupInternalNetworkRules(bridgeIface string, addr *net.IPNet, icc, insert
return setIcc(version, bridgeIface, icc, insert)
}
func clearEndpointConnections(nlh *netlink.Handle, ep *bridgeEndpoint) {
// clearConntrackEntries flushes conntrack entries matching endpoint IP address
// or matching one of the exposed UDP port.
// In the first case, this could happen if packets were received by the host
// between userland proxy startup and iptables setup.
// In the latter case, this could happen if packets were received whereas there
// were nowhere to route them, as netfilter creates entries in such case.
// This is required because iptables NAT rules are evaluated by netfilter only
// when creating a new conntrack entry. When Docker latter adds NAT rules,
// netfilter ignore them for any packet matching a pre-existing conntrack entry.
// As such, we need to flush all those conntrack entries to make sure NAT rules
// are correctly applied to all packets.
// See: #8795, #44688 & #44742.
func clearConntrackEntries(nlh *netlink.Handle, ep *bridgeEndpoint) {
var ipv4List []net.IP
var ipv6List []net.IP
var udpPorts []uint16
if ep.addr != nil {
ipv4List = append(ipv4List, ep.addr.IP)
}
if ep.addrv6 != nil {
ipv6List = append(ipv6List, ep.addrv6.IP)
}
for _, pb := range ep.portMapping {
if pb.Proto == types.UDP {
udpPorts = append(udpPorts, pb.HostPort)
}
}
iptables.DeleteConntrackEntries(nlh, ipv4List, ipv6List)
iptables.DeleteConntrackEntriesByPort(nlh, types.UDP, udpPorts)
}

View file

@ -8,6 +8,7 @@ import (
"net"
"syscall"
"github.com/docker/docker/libnetwork/types"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
)
@ -53,6 +54,42 @@ func DeleteConntrackEntries(nlh *netlink.Handle, ipv4List []net.IP, ipv6List []n
return totalIPv4FlowPurged, totalIPv6FlowPurged, nil
}
func DeleteConntrackEntriesByPort(nlh *netlink.Handle, proto types.Protocol, ports []uint16) error {
if !IsConntrackProgrammable(nlh) {
return ErrConntrackNotConfigurable
}
var totalIPv4FlowPurged uint
var totalIPv6FlowPurged uint
for _, port := range ports {
filter := &netlink.ConntrackFilter{}
if err := filter.AddProtocol(uint8(proto)); err != nil {
logrus.Warnf("Failed to delete conntrack state for %s port %d: %v", proto.String(), port, err)
continue
}
if err := filter.AddPort(netlink.ConntrackOrigDstPort, port); err != nil {
logrus.Warnf("Failed to delete conntrack state for %s port %d: %v", proto.String(), port, err)
continue
}
v4FlowPurged, err := nlh.ConntrackDeleteFilter(netlink.ConntrackTable, syscall.AF_INET, filter)
if err != nil {
logrus.Warnf("Failed to delete conntrack state for IPv4 %s port %d: %v", proto.String(), port, err)
}
totalIPv4FlowPurged += v4FlowPurged
v6FlowPurged, err := nlh.ConntrackDeleteFilter(netlink.ConntrackTable, syscall.AF_INET6, filter)
if err != nil {
logrus.Warnf("Failed to delete conntrack state for IPv6 %s port %d: %v", proto.String(), port, err)
}
totalIPv6FlowPurged += v6FlowPurged
}
logrus.Debugf("DeleteConntrackEntriesByPort for %s ports purged ipv4:%d, ipv6:%d", proto.String(), totalIPv4FlowPurged, totalIPv6FlowPurged)
return nil
}
func purgeConntrackState(nlh *netlink.Handle, family netlink.InetFamily, ipAddress net.IP) (uint, error) {
filter := &netlink.ConntrackFilter{}
// NOTE: doing the flush using the ipAddress is safe because today there cannot be multiple networks with the same subnet