Merge pull request #44752 from corhere/backport-23.0/fix-44688
[23.0 backport] Clear conntrack entries for published UDP ports
This commit is contained in:
commit
7b692e1041
3 changed files with 67 additions and 11 deletions
|
@ -1352,10 +1352,9 @@ func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string
|
|||
}
|
||||
}()
|
||||
|
||||
// Clean the connection tracker state of the host for the
|
||||
// specific endpoint. This is needed because some flows may be
|
||||
// bound to the local proxy and won't bre redirect to the new endpoints.
|
||||
clearEndpointConnections(d.nlh, endpoint)
|
||||
// Clean the connection tracker state of the host for the specific endpoint. This is needed because some flows may
|
||||
// be bound to the local proxy, or to the host (for UDP packets), and won't be redirected to the new endpoints.
|
||||
clearConntrackEntries(d.nlh, endpoint)
|
||||
|
||||
if err = d.storeUpdate(endpoint); err != nil {
|
||||
return fmt.Errorf("failed to update bridge endpoint %.7s to store: %v", endpoint.id, err)
|
||||
|
@ -1392,12 +1391,10 @@ func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
|
|||
|
||||
endpoint.portMapping = nil
|
||||
|
||||
// Clean the connection tracker state of the host for the specific endpoint
|
||||
// The host kernel keeps track of the connections (TCP and UDP), so if a new endpoint gets the same IP of
|
||||
// this one (that is going down), is possible that some of the packets would not be routed correctly inside
|
||||
// the new endpoint
|
||||
// Deeper details: https://github.com/docker/docker/issues/8795
|
||||
clearEndpointConnections(d.nlh, endpoint)
|
||||
// Clean the connection tracker state of the host for the specific endpoint. This is a precautionary measure to
|
||||
// avoid new endpoints getting the same IP address to receive unexpected packets due to bad conntrack state leading
|
||||
// to bad NATing.
|
||||
clearConntrackEntries(d.nlh, endpoint)
|
||||
|
||||
if err = d.storeUpdate(endpoint); err != nil {
|
||||
return fmt.Errorf("failed to update bridge endpoint %.7s to store: %v", endpoint.id, err)
|
||||
|
|
|
@ -9,6 +9,7 @@ import (
|
|||
"net"
|
||||
|
||||
"github.com/docker/docker/libnetwork/iptables"
|
||||
"github.com/docker/docker/libnetwork/types"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink"
|
||||
)
|
||||
|
@ -419,14 +420,35 @@ func setupInternalNetworkRules(bridgeIface string, addr *net.IPNet, icc, insert
|
|||
return setIcc(version, bridgeIface, icc, insert)
|
||||
}
|
||||
|
||||
func clearEndpointConnections(nlh *netlink.Handle, ep *bridgeEndpoint) {
|
||||
// clearConntrackEntries flushes conntrack entries matching endpoint IP address
|
||||
// or matching one of the exposed UDP port.
|
||||
// In the first case, this could happen if packets were received by the host
|
||||
// between userland proxy startup and iptables setup.
|
||||
// In the latter case, this could happen if packets were received whereas there
|
||||
// were nowhere to route them, as netfilter creates entries in such case.
|
||||
// This is required because iptables NAT rules are evaluated by netfilter only
|
||||
// when creating a new conntrack entry. When Docker latter adds NAT rules,
|
||||
// netfilter ignore them for any packet matching a pre-existing conntrack entry.
|
||||
// As such, we need to flush all those conntrack entries to make sure NAT rules
|
||||
// are correctly applied to all packets.
|
||||
// See: #8795, #44688 & #44742.
|
||||
func clearConntrackEntries(nlh *netlink.Handle, ep *bridgeEndpoint) {
|
||||
var ipv4List []net.IP
|
||||
var ipv6List []net.IP
|
||||
var udpPorts []uint16
|
||||
|
||||
if ep.addr != nil {
|
||||
ipv4List = append(ipv4List, ep.addr.IP)
|
||||
}
|
||||
if ep.addrv6 != nil {
|
||||
ipv6List = append(ipv6List, ep.addrv6.IP)
|
||||
}
|
||||
for _, pb := range ep.portMapping {
|
||||
if pb.Proto == types.UDP {
|
||||
udpPorts = append(udpPorts, pb.HostPort)
|
||||
}
|
||||
}
|
||||
|
||||
iptables.DeleteConntrackEntries(nlh, ipv4List, ipv6List)
|
||||
iptables.DeleteConntrackEntriesByPort(nlh, types.UDP, udpPorts)
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"net"
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/docker/libnetwork/types"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink"
|
||||
)
|
||||
|
@ -53,6 +54,42 @@ func DeleteConntrackEntries(nlh *netlink.Handle, ipv4List []net.IP, ipv6List []n
|
|||
return totalIPv4FlowPurged, totalIPv6FlowPurged, nil
|
||||
}
|
||||
|
||||
func DeleteConntrackEntriesByPort(nlh *netlink.Handle, proto types.Protocol, ports []uint16) error {
|
||||
if !IsConntrackProgrammable(nlh) {
|
||||
return ErrConntrackNotConfigurable
|
||||
}
|
||||
|
||||
var totalIPv4FlowPurged uint
|
||||
var totalIPv6FlowPurged uint
|
||||
|
||||
for _, port := range ports {
|
||||
filter := &netlink.ConntrackFilter{}
|
||||
if err := filter.AddProtocol(uint8(proto)); err != nil {
|
||||
logrus.Warnf("Failed to delete conntrack state for %s port %d: %v", proto.String(), port, err)
|
||||
continue
|
||||
}
|
||||
if err := filter.AddPort(netlink.ConntrackOrigDstPort, port); err != nil {
|
||||
logrus.Warnf("Failed to delete conntrack state for %s port %d: %v", proto.String(), port, err)
|
||||
continue
|
||||
}
|
||||
|
||||
v4FlowPurged, err := nlh.ConntrackDeleteFilter(netlink.ConntrackTable, syscall.AF_INET, filter)
|
||||
if err != nil {
|
||||
logrus.Warnf("Failed to delete conntrack state for IPv4 %s port %d: %v", proto.String(), port, err)
|
||||
}
|
||||
totalIPv4FlowPurged += v4FlowPurged
|
||||
|
||||
v6FlowPurged, err := nlh.ConntrackDeleteFilter(netlink.ConntrackTable, syscall.AF_INET6, filter)
|
||||
if err != nil {
|
||||
logrus.Warnf("Failed to delete conntrack state for IPv6 %s port %d: %v", proto.String(), port, err)
|
||||
}
|
||||
totalIPv6FlowPurged += v6FlowPurged
|
||||
}
|
||||
|
||||
logrus.Debugf("DeleteConntrackEntriesByPort for %s ports purged ipv4:%d, ipv6:%d", proto.String(), totalIPv4FlowPurged, totalIPv6FlowPurged)
|
||||
return nil
|
||||
}
|
||||
|
||||
func purgeConntrackState(nlh *netlink.Handle, family netlink.InetFamily, ipAddress net.IP) (uint, error) {
|
||||
filter := &netlink.ConntrackFilter{}
|
||||
// NOTE: doing the flush using the ipAddress is safe because today there cannot be multiple networks with the same subnet
|
||||
|
|
Loading…
Reference in a new issue