moby/libnetwork/iptables/conntrack.go
Albin Kerouanton b37d34307d
Clear conntrack entries for published UDP ports
Conntrack entries are created for UDP flows even if there's nowhere to
route these packets (ie. no listening socket and no NAT rules to
apply). Moreover, iptables NAT rules are evaluated by netfilter only
when creating a new conntrack entry.

When Docker adds NAT rules, netfilter will ignore them for any packet
matching a pre-existing conntrack entry. In such case, when
dockerd runs with userland proxy enabled, packets got routed to it and
the main symptom will be bad source IP address (as shown by #44688).

If the publishing container is run through Docker Swarm or in
"standalone" Docker but with no userland proxy, affected packets will
be dropped (eg. routed to nowhere).

As such, Docker needs to flush all conntrack entries for published UDP
ports to make sure NAT rules are correctly applied to all packets.

- Fixes #44688
- Fixes #8795
- Fixes #16720
- Fixes #7540
- Fixes moby/libnetwork#2423
- and probably more.

As a precautionary measure, those conntrack entries are also flushed
when revoking external connectivity to avoid those entries to be reused
when a new sandbox is created (although the kernel should already
prevent such case).

Signed-off-by: Albin Kerouanton <albinker@gmail.com>
2023-01-05 12:53:22 +01:00

101 lines
3.5 KiB
Go

//go:build linux
// +build linux
package iptables
import (
"errors"
"net"
"syscall"
"github.com/docker/docker/libnetwork/types"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
)
var (
// ErrConntrackNotConfigurable means that conntrack module is not loaded or does not have the netlink module loaded
ErrConntrackNotConfigurable = errors.New("conntrack is not available")
)
// IsConntrackProgrammable returns true if the handle supports the NETLINK_NETFILTER and the base modules are loaded
func IsConntrackProgrammable(nlh *netlink.Handle) bool {
return nlh.SupportsNetlinkFamily(syscall.NETLINK_NETFILTER)
}
// DeleteConntrackEntries deletes all the conntrack connections on the host for the specified IP
// Returns the number of flows deleted for IPv4, IPv6 else error
func DeleteConntrackEntries(nlh *netlink.Handle, ipv4List []net.IP, ipv6List []net.IP) (uint, uint, error) {
if !IsConntrackProgrammable(nlh) {
return 0, 0, ErrConntrackNotConfigurable
}
var totalIPv4FlowPurged uint
for _, ipAddress := range ipv4List {
flowPurged, err := purgeConntrackState(nlh, syscall.AF_INET, ipAddress)
if err != nil {
logrus.Warnf("Failed to delete conntrack state for %s: %v", ipAddress, err)
continue
}
totalIPv4FlowPurged += flowPurged
}
var totalIPv6FlowPurged uint
for _, ipAddress := range ipv6List {
flowPurged, err := purgeConntrackState(nlh, syscall.AF_INET6, ipAddress)
if err != nil {
logrus.Warnf("Failed to delete conntrack state for %s: %v", ipAddress, err)
continue
}
totalIPv6FlowPurged += flowPurged
}
logrus.Debugf("DeleteConntrackEntries purged ipv4:%d, ipv6:%d", totalIPv4FlowPurged, totalIPv6FlowPurged)
return totalIPv4FlowPurged, totalIPv6FlowPurged, nil
}
func DeleteConntrackEntriesByPort(nlh *netlink.Handle, proto types.Protocol, ports []uint16) error {
if !IsConntrackProgrammable(nlh) {
return ErrConntrackNotConfigurable
}
var totalIPv4FlowPurged uint
var totalIPv6FlowPurged uint
for _, port := range ports {
filter := &netlink.ConntrackFilter{}
if err := filter.AddProtocol(uint8(proto)); err != nil {
logrus.Warnf("Failed to delete conntrack state for %s port %d: %v", proto.String(), port, err)
continue
}
if err := filter.AddPort(netlink.ConntrackOrigDstPort, port); err != nil {
logrus.Warnf("Failed to delete conntrack state for %s port %d: %v", proto.String(), port, err)
continue
}
v4FlowPurged, err := nlh.ConntrackDeleteFilter(netlink.ConntrackTable, syscall.AF_INET, filter)
if err != nil {
logrus.Warnf("Failed to delete conntrack state for IPv4 %s port %d: %v", proto.String(), port, err)
}
totalIPv4FlowPurged += v4FlowPurged
v6FlowPurged, err := nlh.ConntrackDeleteFilter(netlink.ConntrackTable, syscall.AF_INET6, filter)
if err != nil {
logrus.Warnf("Failed to delete conntrack state for IPv6 %s port %d: %v", proto.String(), port, err)
}
totalIPv6FlowPurged += v6FlowPurged
}
logrus.Debugf("DeleteConntrackEntriesByPort for %s ports purged ipv4:%d, ipv6:%d", proto.String(), totalIPv4FlowPurged, totalIPv6FlowPurged)
return nil
}
func purgeConntrackState(nlh *netlink.Handle, family netlink.InetFamily, ipAddress net.IP) (uint, error) {
filter := &netlink.ConntrackFilter{}
// NOTE: doing the flush using the ipAddress is safe because today there cannot be multiple networks with the same subnet
// so it will not be possible to flush flows that are of other containers
if err := filter.AddIP(netlink.ConntrackNatAnyIP, ipAddress); err != nil {
return 0, err
}
return nlh.ConntrackDeleteFilter(netlink.ConntrackTable, family, filter)
}