libnetwork/overlay: remove host mode
Linux kernel prior to v3.16 was not supporting netns for vxlan interfaces. As such, moby/libnetwork#821 introduced a "host mode" to the overlay driver. The related kernel fix is available for rhel7 users since v7.2. This mode could be forced through the use of the env var _OVERLAY_HOST_MODE. However this env var has never been documented and is not referenced in any blog post, so there's little chance many people rely on it. Moreover, this host mode is deemed as an implementation details by maintainers. As such, we can consider it dead and we can remove it without a prior deprecation warning. Signed-off-by: Albin Kerouanton <albinker@gmail.com>
This commit is contained in:
parent
1d46597c8b
commit
8ed900263e
3 changed files with 16 additions and 295 deletions
|
@ -1,153 +0,0 @@
|
|||
//go:build linux
|
||||
// +build linux
|
||||
|
||||
package overlay
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"github.com/docker/docker/libnetwork/iptables"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const globalChain = "DOCKER-OVERLAY"
|
||||
|
||||
var filterOnce sync.Once
|
||||
|
||||
var filterChan = make(chan struct{}, 1)
|
||||
|
||||
func filterWait() func() {
|
||||
filterChan <- struct{}{}
|
||||
return func() { <-filterChan }
|
||||
}
|
||||
|
||||
func chainExists(cname string) bool {
|
||||
// TODO IPv6 support
|
||||
iptable := iptables.GetIptable(iptables.IPv4)
|
||||
if _, err := iptable.Raw("-L", cname); err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func setupGlobalChain() {
|
||||
// TODO IPv6 support
|
||||
iptable := iptables.GetIptable(iptables.IPv4)
|
||||
// Because of an ungraceful shutdown, chain could already be present
|
||||
if !chainExists(globalChain) {
|
||||
if err := iptable.RawCombinedOutput("-N", globalChain); err != nil {
|
||||
logrus.Errorf("could not create global overlay chain: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if !iptable.Exists(iptables.Filter, globalChain, "-j", "RETURN") {
|
||||
if err := iptable.RawCombinedOutput("-A", globalChain, "-j", "RETURN"); err != nil {
|
||||
logrus.Errorf("could not install default return chain in the overlay global chain: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func setNetworkChain(cname string, remove bool) error {
|
||||
// TODO IPv6 support
|
||||
iptable := iptables.GetIptable(iptables.IPv4)
|
||||
// Initialize the onetime global overlay chain
|
||||
filterOnce.Do(setupGlobalChain)
|
||||
|
||||
exists := chainExists(cname)
|
||||
|
||||
opt := "-N"
|
||||
// In case of remove, make sure to flush the rules in the chain
|
||||
if remove && exists {
|
||||
if err := iptable.RawCombinedOutput("-F", cname); err != nil {
|
||||
return fmt.Errorf("failed to flush overlay network chain %s rules: %v", cname, err)
|
||||
}
|
||||
opt = "-X"
|
||||
}
|
||||
|
||||
if (!remove && !exists) || (remove && exists) {
|
||||
if err := iptable.RawCombinedOutput(opt, cname); err != nil {
|
||||
return fmt.Errorf("failed network chain operation %q for chain %s: %v", opt, cname, err)
|
||||
}
|
||||
}
|
||||
|
||||
if !remove {
|
||||
if !iptable.Exists(iptables.Filter, cname, "-j", "DROP") {
|
||||
if err := iptable.RawCombinedOutput("-A", cname, "-j", "DROP"); err != nil {
|
||||
return fmt.Errorf("failed adding default drop rule to overlay network chain %s: %v", cname, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func addNetworkChain(cname string) error {
|
||||
defer filterWait()()
|
||||
|
||||
return setNetworkChain(cname, false)
|
||||
}
|
||||
|
||||
func removeNetworkChain(cname string) error {
|
||||
defer filterWait()()
|
||||
|
||||
return setNetworkChain(cname, true)
|
||||
}
|
||||
|
||||
func setFilters(cname, brName string, remove bool) error {
|
||||
opt := "-I"
|
||||
if remove {
|
||||
opt = "-D"
|
||||
}
|
||||
// TODO IPv6 support
|
||||
iptable := iptables.GetIptable(iptables.IPv4)
|
||||
|
||||
// Every time we set filters for a new subnet make sure to move the global overlay hook to the top of the both the OUTPUT and forward chains
|
||||
if !remove {
|
||||
for _, chain := range []string{"OUTPUT", "FORWARD"} {
|
||||
exists := iptable.Exists(iptables.Filter, chain, "-j", globalChain)
|
||||
if exists {
|
||||
if err := iptable.RawCombinedOutput("-D", chain, "-j", globalChain); err != nil {
|
||||
return fmt.Errorf("failed to delete overlay hook in chain %s while moving the hook: %v", chain, err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := iptable.RawCombinedOutput("-I", chain, "-j", globalChain); err != nil {
|
||||
return fmt.Errorf("failed to insert overlay hook in chain %s: %v", chain, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Insert/Delete the rule to jump to per-bridge chain
|
||||
exists := iptable.Exists(iptables.Filter, globalChain, "-o", brName, "-j", cname)
|
||||
if (!remove && !exists) || (remove && exists) {
|
||||
if err := iptable.RawCombinedOutput(opt, globalChain, "-o", brName, "-j", cname); err != nil {
|
||||
return fmt.Errorf("failed to add per-bridge filter rule for bridge %s, network chain %s: %v", brName, cname, err)
|
||||
}
|
||||
}
|
||||
|
||||
exists = iptable.Exists(iptables.Filter, cname, "-i", brName, "-j", "ACCEPT")
|
||||
if (!remove && exists) || (remove && !exists) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := iptable.RawCombinedOutput(opt, cname, "-i", brName, "-j", "ACCEPT"); err != nil {
|
||||
return fmt.Errorf("failed to add overlay filter rile for network chain %s, bridge %s: %v", cname, brName, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func addFilters(cname, brName string) error {
|
||||
defer filterWait()()
|
||||
|
||||
return setFilters(cname, brName, false)
|
||||
}
|
||||
|
||||
func removeFilters(cname, brName string) error {
|
||||
defer filterWait()()
|
||||
|
||||
return setFilters(cname, brName, true)
|
||||
}
|
|
@ -16,10 +16,8 @@ import (
|
|||
|
||||
"github.com/docker/docker/libnetwork/driverapi"
|
||||
"github.com/docker/docker/libnetwork/netlabel"
|
||||
"github.com/docker/docker/libnetwork/netutils"
|
||||
"github.com/docker/docker/libnetwork/ns"
|
||||
"github.com/docker/docker/libnetwork/osl"
|
||||
"github.com/docker/docker/libnetwork/resolvconf"
|
||||
"github.com/docker/docker/libnetwork/types"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
@ -29,7 +27,6 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
hostMode bool
|
||||
networkOnce sync.Once
|
||||
networkMu sync.Mutex
|
||||
vniTbl = make(map[uint32]string)
|
||||
|
@ -233,7 +230,7 @@ func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
|
|||
func (n *network) joinSandbox(s *subnet, incJoinCount bool) error {
|
||||
// If there is a race between two go routines here only one will win
|
||||
// the other will wait.
|
||||
networkOnce.Do(networkOnceInit)
|
||||
networkOnce.Do(populateVNITbl)
|
||||
|
||||
n.Lock()
|
||||
// If initialization was successful then tell the peerDB to initialize the
|
||||
|
@ -307,12 +304,6 @@ func (n *network) destroySandbox() {
|
|||
}
|
||||
|
||||
for _, s := range n.subnets {
|
||||
if hostMode {
|
||||
if err := removeFilters(n.id[:12], s.brName); err != nil {
|
||||
logrus.Warnf("Could not remove overlay filters: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if s.vxlanName != "" {
|
||||
err := deleteInterface(s.vxlanName)
|
||||
if err != nil {
|
||||
|
@ -321,12 +312,6 @@ func (n *network) destroySandbox() {
|
|||
}
|
||||
}
|
||||
|
||||
if hostMode {
|
||||
if err := removeNetworkChain(n.id[:12]); err != nil {
|
||||
logrus.Warnf("could not remove network chain: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
n.sbox.Destroy()
|
||||
n.sbox = nil
|
||||
}
|
||||
|
@ -378,45 +363,6 @@ func populateVNITbl() {
|
|||
})
|
||||
}
|
||||
|
||||
func networkOnceInit() {
|
||||
populateVNITbl()
|
||||
|
||||
if os.Getenv("_OVERLAY_HOST_MODE") != "" {
|
||||
hostMode = true
|
||||
return
|
||||
}
|
||||
|
||||
err := createVxlan("testvxlan", 1, 0)
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to create testvxlan interface: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
defer deleteInterface("testvxlan")
|
||||
|
||||
path := "/proc/self/ns/net"
|
||||
hNs, err := netns.GetFromPath(path)
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to get network namespace from path %s while setting host mode: %v", path, err)
|
||||
return
|
||||
}
|
||||
defer hNs.Close()
|
||||
|
||||
nlh := ns.NlHandle()
|
||||
|
||||
iface, err := nlh.LinkByName("testvxlan")
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to get link testvxlan while setting host mode: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// If we are not able to move the vxlan interface to a namespace
|
||||
// then fallback to host mode
|
||||
if err := nlh.LinkSetNsFd(iface, int(hNs)); err != nil {
|
||||
hostMode = true
|
||||
}
|
||||
}
|
||||
|
||||
func (n *network) generateVxlanName(s *subnet) string {
|
||||
id := n.id
|
||||
if len(n.id) > 5 {
|
||||
|
@ -439,59 +385,26 @@ func (n *network) getBridgeNamePrefix(s *subnet) string {
|
|||
return fmt.Sprintf("ov-%06x", s.vni)
|
||||
}
|
||||
|
||||
func checkOverlap(nw *net.IPNet) error {
|
||||
var nameservers []string
|
||||
|
||||
if rc, err := os.ReadFile(resolvconf.Path()); err == nil {
|
||||
nameservers = resolvconf.GetNameserversAsCIDR(rc)
|
||||
}
|
||||
|
||||
if err := netutils.CheckNameserverOverlaps(nameservers, nw); err != nil {
|
||||
return fmt.Errorf("overlay subnet %s failed check with nameserver: %v: %v", nw.String(), nameservers, err)
|
||||
}
|
||||
|
||||
if err := netutils.CheckRouteOverlaps(nw); err != nil {
|
||||
return fmt.Errorf("overlay subnet %s failed check with host route table: %v", nw.String(), err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error {
|
||||
if hostMode {
|
||||
// Try to delete stale bridge interface if it exists
|
||||
if err := deleteInterface(brName); err != nil {
|
||||
deleteInterfaceBySubnet(n.getBridgeNamePrefix(s), s)
|
||||
}
|
||||
// Try to delete the vxlan interface by vni if already present
|
||||
deleteVxlanByVNI("", s.vni)
|
||||
// Try to find this subnet's vni is being used in some
|
||||
// other namespace by looking at vniTbl that we just
|
||||
// populated in the once init. If a hit is found then
|
||||
// it must a stale namespace from previous
|
||||
// life. Destroy it completely and reclaim resourced.
|
||||
networkMu.Lock()
|
||||
path, ok := vniTbl[s.vni]
|
||||
networkMu.Unlock()
|
||||
|
||||
if err := checkOverlap(s.subnetIP); err != nil {
|
||||
return err
|
||||
if ok {
|
||||
deleteVxlanByVNI(path, s.vni)
|
||||
if err := unix.Unmount(path, unix.MNT_FORCE); err != nil {
|
||||
logrus.Errorf("unmount of %s failed: %v", path, err)
|
||||
}
|
||||
}
|
||||
os.Remove(path)
|
||||
|
||||
if !hostMode {
|
||||
// Try to find this subnet's vni is being used in some
|
||||
// other namespace by looking at vniTbl that we just
|
||||
// populated in the once init. If a hit is found then
|
||||
// it must a stale namespace from previous
|
||||
// life. Destroy it completely and reclaim resourced.
|
||||
networkMu.Lock()
|
||||
path, ok := vniTbl[s.vni]
|
||||
delete(vniTbl, s.vni)
|
||||
networkMu.Unlock()
|
||||
|
||||
if ok {
|
||||
deleteVxlanByVNI(path, s.vni)
|
||||
if err := unix.Unmount(path, unix.MNT_FORCE); err != nil {
|
||||
logrus.Errorf("unmount of %s failed: %v", path, err)
|
||||
}
|
||||
os.Remove(path)
|
||||
|
||||
networkMu.Lock()
|
||||
delete(vniTbl, s.vni)
|
||||
networkMu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
// create a bridge and vxlan device for this subnet and move it to the sandbox
|
||||
|
@ -530,10 +443,6 @@ func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error
|
|||
return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err)
|
||||
}
|
||||
|
||||
if hostMode {
|
||||
return addFilters(n.id[:12], brName)
|
||||
}
|
||||
|
||||
if err := setDefaultVLAN(sbox); err != nil {
|
||||
// not a fatal error
|
||||
logrus.WithError(err).Error("set bridge default vlan failed")
|
||||
|
@ -657,18 +566,12 @@ func (n *network) cleanupStaleSandboxes() {
|
|||
func (n *network) initSandbox() error {
|
||||
n.initEpoch++
|
||||
|
||||
if hostMode {
|
||||
if err := addNetworkChain(n.id[:12]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// If there are any stale sandboxes related to this network
|
||||
// from previous daemon life clean it up here
|
||||
n.cleanupStaleSandboxes()
|
||||
|
||||
key := osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch) + n.id)
|
||||
sbox, err := osl.NewSandbox(key, !hostMode, false)
|
||||
sbox, err := osl.NewSandbox(key, true, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not get network sandbox: %v", err)
|
||||
}
|
||||
|
|
|
@ -5,7 +5,6 @@ package overlay
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/docker/libnetwork/drivers/overlay/overlayutils"
|
||||
|
@ -74,34 +73,6 @@ func createVxlan(name string, vni uint32, mtu int) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func deleteInterfaceBySubnet(brPrefix string, s *subnet) error {
|
||||
nlh := ns.NlHandle()
|
||||
links, err := nlh.LinkList()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to list interfaces while deleting bridge interface by subnet: %v", err)
|
||||
}
|
||||
|
||||
for _, l := range links {
|
||||
name := l.Attrs().Name
|
||||
if _, ok := l.(*netlink.Bridge); ok && strings.HasPrefix(name, brPrefix) {
|
||||
addrList, err := nlh.AddrList(l, netlink.FAMILY_V4)
|
||||
if err != nil {
|
||||
logrus.Errorf("error getting AddressList for bridge %s", name)
|
||||
continue
|
||||
}
|
||||
for _, addr := range addrList {
|
||||
if netutils.NetworkOverlaps(addr.IPNet, s.subnetIP) {
|
||||
err = nlh.LinkDel(l)
|
||||
if err != nil {
|
||||
logrus.Errorf("error deleting bridge (%s) with subnet %v: %v", name, addr.IPNet, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func deleteInterface(name string) error {
|
||||
link, err := ns.NlHandle().LinkByName(name)
|
||||
if err != nil {
|
||||
|
|
Loading…
Add table
Reference in a new issue