libnetwork/overlay: remove host mode

Linux kernel prior to v3.16 was not supporting netns for vxlan
interfaces. As such, moby/libnetwork#821 introduced a "host mode" to the
overlay driver. The related kernel fix is available for rhel7 users
since v7.2.

This mode could be forced through the use of the env var
_OVERLAY_HOST_MODE. However this env var has never been documented and
is not referenced in any blog post, so there's little chance many people
rely on it. Moreover, this host mode is deemed as an implementation
details by maintainers. As such, we can consider it dead and we can
remove it without a prior deprecation warning.

Signed-off-by: Albin Kerouanton <albinker@gmail.com>
This commit is contained in:
Albin Kerouanton 2023-02-15 21:45:13 +01:00
parent 1d46597c8b
commit 8ed900263e
No known key found for this signature in database
GPG key ID: 630B8E1DCBDB1864
3 changed files with 16 additions and 295 deletions

View file

@ -1,153 +0,0 @@
//go:build linux
// +build linux
package overlay
import (
"fmt"
"sync"
"github.com/docker/docker/libnetwork/iptables"
"github.com/sirupsen/logrus"
)
const globalChain = "DOCKER-OVERLAY"
var filterOnce sync.Once
var filterChan = make(chan struct{}, 1)
func filterWait() func() {
filterChan <- struct{}{}
return func() { <-filterChan }
}
func chainExists(cname string) bool {
// TODO IPv6 support
iptable := iptables.GetIptable(iptables.IPv4)
if _, err := iptable.Raw("-L", cname); err != nil {
return false
}
return true
}
func setupGlobalChain() {
// TODO IPv6 support
iptable := iptables.GetIptable(iptables.IPv4)
// Because of an ungraceful shutdown, chain could already be present
if !chainExists(globalChain) {
if err := iptable.RawCombinedOutput("-N", globalChain); err != nil {
logrus.Errorf("could not create global overlay chain: %v", err)
return
}
}
if !iptable.Exists(iptables.Filter, globalChain, "-j", "RETURN") {
if err := iptable.RawCombinedOutput("-A", globalChain, "-j", "RETURN"); err != nil {
logrus.Errorf("could not install default return chain in the overlay global chain: %v", err)
}
}
}
func setNetworkChain(cname string, remove bool) error {
// TODO IPv6 support
iptable := iptables.GetIptable(iptables.IPv4)
// Initialize the onetime global overlay chain
filterOnce.Do(setupGlobalChain)
exists := chainExists(cname)
opt := "-N"
// In case of remove, make sure to flush the rules in the chain
if remove && exists {
if err := iptable.RawCombinedOutput("-F", cname); err != nil {
return fmt.Errorf("failed to flush overlay network chain %s rules: %v", cname, err)
}
opt = "-X"
}
if (!remove && !exists) || (remove && exists) {
if err := iptable.RawCombinedOutput(opt, cname); err != nil {
return fmt.Errorf("failed network chain operation %q for chain %s: %v", opt, cname, err)
}
}
if !remove {
if !iptable.Exists(iptables.Filter, cname, "-j", "DROP") {
if err := iptable.RawCombinedOutput("-A", cname, "-j", "DROP"); err != nil {
return fmt.Errorf("failed adding default drop rule to overlay network chain %s: %v", cname, err)
}
}
}
return nil
}
func addNetworkChain(cname string) error {
defer filterWait()()
return setNetworkChain(cname, false)
}
func removeNetworkChain(cname string) error {
defer filterWait()()
return setNetworkChain(cname, true)
}
func setFilters(cname, brName string, remove bool) error {
opt := "-I"
if remove {
opt = "-D"
}
// TODO IPv6 support
iptable := iptables.GetIptable(iptables.IPv4)
// Every time we set filters for a new subnet make sure to move the global overlay hook to the top of the both the OUTPUT and forward chains
if !remove {
for _, chain := range []string{"OUTPUT", "FORWARD"} {
exists := iptable.Exists(iptables.Filter, chain, "-j", globalChain)
if exists {
if err := iptable.RawCombinedOutput("-D", chain, "-j", globalChain); err != nil {
return fmt.Errorf("failed to delete overlay hook in chain %s while moving the hook: %v", chain, err)
}
}
if err := iptable.RawCombinedOutput("-I", chain, "-j", globalChain); err != nil {
return fmt.Errorf("failed to insert overlay hook in chain %s: %v", chain, err)
}
}
}
// Insert/Delete the rule to jump to per-bridge chain
exists := iptable.Exists(iptables.Filter, globalChain, "-o", brName, "-j", cname)
if (!remove && !exists) || (remove && exists) {
if err := iptable.RawCombinedOutput(opt, globalChain, "-o", brName, "-j", cname); err != nil {
return fmt.Errorf("failed to add per-bridge filter rule for bridge %s, network chain %s: %v", brName, cname, err)
}
}
exists = iptable.Exists(iptables.Filter, cname, "-i", brName, "-j", "ACCEPT")
if (!remove && exists) || (remove && !exists) {
return nil
}
if err := iptable.RawCombinedOutput(opt, cname, "-i", brName, "-j", "ACCEPT"); err != nil {
return fmt.Errorf("failed to add overlay filter rile for network chain %s, bridge %s: %v", cname, brName, err)
}
return nil
}
func addFilters(cname, brName string) error {
defer filterWait()()
return setFilters(cname, brName, false)
}
func removeFilters(cname, brName string) error {
defer filterWait()()
return setFilters(cname, brName, true)
}

View file

@ -16,10 +16,8 @@ import (
"github.com/docker/docker/libnetwork/driverapi"
"github.com/docker/docker/libnetwork/netlabel"
"github.com/docker/docker/libnetwork/netutils"
"github.com/docker/docker/libnetwork/ns"
"github.com/docker/docker/libnetwork/osl"
"github.com/docker/docker/libnetwork/resolvconf"
"github.com/docker/docker/libnetwork/types"
"github.com/hashicorp/go-multierror"
"github.com/sirupsen/logrus"
@ -29,7 +27,6 @@ import (
)
var (
hostMode bool
networkOnce sync.Once
networkMu sync.Mutex
vniTbl = make(map[uint32]string)
@ -233,7 +230,7 @@ func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
func (n *network) joinSandbox(s *subnet, incJoinCount bool) error {
// If there is a race between two go routines here only one will win
// the other will wait.
networkOnce.Do(networkOnceInit)
networkOnce.Do(populateVNITbl)
n.Lock()
// If initialization was successful then tell the peerDB to initialize the
@ -307,12 +304,6 @@ func (n *network) destroySandbox() {
}
for _, s := range n.subnets {
if hostMode {
if err := removeFilters(n.id[:12], s.brName); err != nil {
logrus.Warnf("Could not remove overlay filters: %v", err)
}
}
if s.vxlanName != "" {
err := deleteInterface(s.vxlanName)
if err != nil {
@ -321,12 +312,6 @@ func (n *network) destroySandbox() {
}
}
if hostMode {
if err := removeNetworkChain(n.id[:12]); err != nil {
logrus.Warnf("could not remove network chain: %v", err)
}
}
n.sbox.Destroy()
n.sbox = nil
}
@ -378,45 +363,6 @@ func populateVNITbl() {
})
}
func networkOnceInit() {
populateVNITbl()
if os.Getenv("_OVERLAY_HOST_MODE") != "" {
hostMode = true
return
}
err := createVxlan("testvxlan", 1, 0)
if err != nil {
logrus.Errorf("Failed to create testvxlan interface: %v", err)
return
}
defer deleteInterface("testvxlan")
path := "/proc/self/ns/net"
hNs, err := netns.GetFromPath(path)
if err != nil {
logrus.Errorf("Failed to get network namespace from path %s while setting host mode: %v", path, err)
return
}
defer hNs.Close()
nlh := ns.NlHandle()
iface, err := nlh.LinkByName("testvxlan")
if err != nil {
logrus.Errorf("Failed to get link testvxlan while setting host mode: %v", err)
return
}
// If we are not able to move the vxlan interface to a namespace
// then fallback to host mode
if err := nlh.LinkSetNsFd(iface, int(hNs)); err != nil {
hostMode = true
}
}
func (n *network) generateVxlanName(s *subnet) string {
id := n.id
if len(n.id) > 5 {
@ -439,59 +385,26 @@ func (n *network) getBridgeNamePrefix(s *subnet) string {
return fmt.Sprintf("ov-%06x", s.vni)
}
func checkOverlap(nw *net.IPNet) error {
var nameservers []string
if rc, err := os.ReadFile(resolvconf.Path()); err == nil {
nameservers = resolvconf.GetNameserversAsCIDR(rc)
}
if err := netutils.CheckNameserverOverlaps(nameservers, nw); err != nil {
return fmt.Errorf("overlay subnet %s failed check with nameserver: %v: %v", nw.String(), nameservers, err)
}
if err := netutils.CheckRouteOverlaps(nw); err != nil {
return fmt.Errorf("overlay subnet %s failed check with host route table: %v", nw.String(), err)
}
return nil
}
func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error {
if hostMode {
// Try to delete stale bridge interface if it exists
if err := deleteInterface(brName); err != nil {
deleteInterfaceBySubnet(n.getBridgeNamePrefix(s), s)
}
// Try to delete the vxlan interface by vni if already present
deleteVxlanByVNI("", s.vni)
// Try to find this subnet's vni is being used in some
// other namespace by looking at vniTbl that we just
// populated in the once init. If a hit is found then
// it must a stale namespace from previous
// life. Destroy it completely and reclaim resourced.
networkMu.Lock()
path, ok := vniTbl[s.vni]
networkMu.Unlock()
if err := checkOverlap(s.subnetIP); err != nil {
return err
if ok {
deleteVxlanByVNI(path, s.vni)
if err := unix.Unmount(path, unix.MNT_FORCE); err != nil {
logrus.Errorf("unmount of %s failed: %v", path, err)
}
}
os.Remove(path)
if !hostMode {
// Try to find this subnet's vni is being used in some
// other namespace by looking at vniTbl that we just
// populated in the once init. If a hit is found then
// it must a stale namespace from previous
// life. Destroy it completely and reclaim resourced.
networkMu.Lock()
path, ok := vniTbl[s.vni]
delete(vniTbl, s.vni)
networkMu.Unlock()
if ok {
deleteVxlanByVNI(path, s.vni)
if err := unix.Unmount(path, unix.MNT_FORCE); err != nil {
logrus.Errorf("unmount of %s failed: %v", path, err)
}
os.Remove(path)
networkMu.Lock()
delete(vniTbl, s.vni)
networkMu.Unlock()
}
}
// create a bridge and vxlan device for this subnet and move it to the sandbox
@ -530,10 +443,6 @@ func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error
return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err)
}
if hostMode {
return addFilters(n.id[:12], brName)
}
if err := setDefaultVLAN(sbox); err != nil {
// not a fatal error
logrus.WithError(err).Error("set bridge default vlan failed")
@ -657,18 +566,12 @@ func (n *network) cleanupStaleSandboxes() {
func (n *network) initSandbox() error {
n.initEpoch++
if hostMode {
if err := addNetworkChain(n.id[:12]); err != nil {
return err
}
}
// If there are any stale sandboxes related to this network
// from previous daemon life clean it up here
n.cleanupStaleSandboxes()
key := osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch) + n.id)
sbox, err := osl.NewSandbox(key, !hostMode, false)
sbox, err := osl.NewSandbox(key, true, false)
if err != nil {
return fmt.Errorf("could not get network sandbox: %v", err)
}

View file

@ -5,7 +5,6 @@ package overlay
import (
"fmt"
"strings"
"syscall"
"github.com/docker/docker/libnetwork/drivers/overlay/overlayutils"
@ -74,34 +73,6 @@ func createVxlan(name string, vni uint32, mtu int) error {
return nil
}
func deleteInterfaceBySubnet(brPrefix string, s *subnet) error {
nlh := ns.NlHandle()
links, err := nlh.LinkList()
if err != nil {
return fmt.Errorf("failed to list interfaces while deleting bridge interface by subnet: %v", err)
}
for _, l := range links {
name := l.Attrs().Name
if _, ok := l.(*netlink.Bridge); ok && strings.HasPrefix(name, brPrefix) {
addrList, err := nlh.AddrList(l, netlink.FAMILY_V4)
if err != nil {
logrus.Errorf("error getting AddressList for bridge %s", name)
continue
}
for _, addr := range addrList {
if netutils.NetworkOverlaps(addr.IPNet, s.subnetIP) {
err = nlh.LinkDel(l)
if err != nil {
logrus.Errorf("error deleting bridge (%s) with subnet %v: %v", name, addr.IPNet, err)
}
}
}
}
}
return nil
}
func deleteInterface(name string) error {
link, err := ns.NlHandle().LinkByName(name)
if err != nil {