Browse Source

Merge pull request #46790 from corhere/libn/overlay-ipv6-vtep

libnetwork/drivers/overlay: support IPv6 transport
Sebastiaan van Stijn 1 năm trước cách đây
mục cha
commit
2f65748927

+ 28 - 10
libnetwork/drivers/overlay/encryption.go

@@ -124,8 +124,8 @@ func (d *driver) checkEncryption(nid string, rIP net.IP, isLocal, add bool) erro
 		return types.ForbiddenErrorf("encryption key is not present")
 	}
 
-	lIP := net.ParseIP(d.bindAddress)
-	aIP := net.ParseIP(d.advertiseAddress)
+	lIP := d.bindAddress
+	aIP := d.advertiseAddress
 	nodes := map[string]net.IP{}
 
 	switch {
@@ -225,7 +225,19 @@ func removeEncryption(localIP, remoteIP net.IP, em *encrMap) error {
 	return nil
 }
 
-func programMangle(vni uint32, add bool) error {
+func (d *driver) transportIPTable() (*iptables.IPTable, error) {
+	v6, err := d.isIPv6Transport()
+	if err != nil {
+		return nil, err
+	}
+	version := iptables.IPv4
+	if v6 {
+		version = iptables.IPv6
+	}
+	return iptables.GetIptable(version), nil
+}
+
+func (d *driver) programMangle(vni uint32, add bool) error {
 	var (
 		m      = strconv.FormatUint(mark, 10)
 		chain  = "OUTPUT"
@@ -234,8 +246,11 @@ func programMangle(vni uint32, add bool) error {
 		action = "install"
 	)
 
-	// TODO IPv6 support
-	iptable := iptables.GetIptable(iptables.IPv4)
+	iptable, err := d.transportIPTable()
+	if err != nil {
+		// Fail closed if unsure. Better safe than cleartext.
+		return err
+	}
 
 	if !add {
 		a = iptables.Delete
@@ -249,7 +264,7 @@ func programMangle(vni uint32, add bool) error {
 	return nil
 }
 
-func programInput(vni uint32, add bool) error {
+func (d *driver) programInput(vni uint32, add bool) error {
 	var (
 		plainVxlan = matchVXLAN(overlayutils.VXLANUDPPort(), vni)
 		chain      = "INPUT"
@@ -261,8 +276,11 @@ func programInput(vni uint32, add bool) error {
 		return append(args, "-j", jump)
 	}
 
-	// TODO IPv6 support
-	iptable := iptables.GetIptable(iptables.IPv4)
+	iptable, err := d.transportIPTable()
+	if err != nil {
+		// Fail closed if unsure. Better safe than cleartext.
+		return err
+	}
 
 	if !add {
 		msg = "remove"
@@ -495,8 +513,8 @@ func (d *driver) updateKeys(newKey, primary, pruneKey *key) error {
 		newIdx = -1
 		priIdx = -1
 		delIdx = -1
-		lIP    = net.ParseIP(d.bindAddress)
-		aIP    = net.ParseIP(d.advertiseAddress)
+		lIP    = d.bindAddress
+		aIP    = d.advertiseAddress
 	)
 
 	d.Lock()

+ 4 - 4
libnetwork/drivers/overlay/joinleave.go

@@ -107,7 +107,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
 		}
 	}
 
-	d.peerAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), false, false, true)
+	d.peerAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, d.advertiseAddress, false, false, true)
 
 	if err = d.checkEncryption(nid, nil, true, true); err != nil {
 		log.G(context.TODO()).Warn(err)
@@ -116,7 +116,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
 	buf, err := proto.Marshal(&PeerRecord{
 		EndpointIP:       ep.addr.String(),
 		EndpointMAC:      ep.mac.String(),
-		TunnelEndpointIP: d.advertiseAddress,
+		TunnelEndpointIP: d.advertiseAddress.String(),
 	})
 	if err != nil {
 		return err
@@ -162,7 +162,7 @@ func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key stri
 
 	// Ignore local peers. We already know about them and they
 	// should not be added to vxlan fdb.
-	if peer.TunnelEndpointIP == d.advertiseAddress {
+	if net.ParseIP(peer.TunnelEndpointIP).Equal(d.advertiseAddress) {
 		return
 	}
 
@@ -209,7 +209,7 @@ func (d *driver) Leave(nid, eid string) error {
 		return types.InternalMaskableErrorf("could not find endpoint with id %s", eid)
 	}
 
-	d.peerDelete(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), true)
+	d.peerDelete(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, d.advertiseAddress, true)
 
 	n.leaveSandbox()
 

+ 11 - 8
libnetwork/drivers/overlay/ov_network.go

@@ -155,8 +155,8 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo d
 	// Make sure no rule is on the way from any stale secure network
 	if !n.secure {
 		for _, vni := range vnis {
-			programMangle(vni, false)
-			programInput(vni, false)
+			d.programMangle(vni, false)
+			d.programInput(vni, false)
 		}
 	}
 
@@ -215,14 +215,14 @@ func (d *driver) DeleteNetwork(nid string) error {
 
 	if n.secure {
 		for _, s := range n.subnets {
-			if err := programMangle(s.vni, false); err != nil {
+			if err := d.programMangle(s.vni, false); err != nil {
 				log.G(context.TODO()).WithFields(log.Fields{
 					"error":      err,
 					"network_id": n.id,
 					"subnet":     s.subnetIP,
 				}).Warn("Failed to clean up iptables rules during overlay network deletion")
 			}
-			if err := programInput(s.vni, false); err != nil {
+			if err := d.programInput(s.vni, false); err != nil {
 				log.G(context.TODO()).WithFields(log.Fields{
 					"error":      err,
 					"network_id": n.id,
@@ -430,8 +430,11 @@ func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error
 		return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err)
 	}
 
-	err := createVxlan(vxlanName, s.vni, n.maxMTU())
+	v6transport, err := n.driver.isIPv6Transport()
 	if err != nil {
+		log.G(context.TODO()).WithError(err).Errorf("Assuming IPv4 transport; overlay network %s will not pass traffic if the Swarm data plane is IPv6.", n.id)
+	}
+	if err := createVxlan(vxlanName, s.vni, n.maxMTU(), v6transport); err != nil {
 		return err
 	}
 
@@ -522,12 +525,12 @@ func (n *network) initSubnetSandbox(s *subnet) error {
 	// Program iptables rules for mandatory encryption of the secure
 	// network, or clean up leftover rules for a stale secure network which
 	// was previously assigned the same VNI.
-	if err := programMangle(s.vni, n.secure); err != nil {
+	if err := n.driver.programMangle(s.vni, n.secure); err != nil {
 		return err
 	}
-	if err := programInput(s.vni, n.secure); err != nil {
+	if err := n.driver.programInput(s.vni, n.secure); err != nil {
 		if n.secure {
-			return multierror.Append(err, programMangle(s.vni, false))
+			return multierror.Append(err, n.driver.programMangle(s.vni, false))
 		}
 	}
 

+ 15 - 1
libnetwork/drivers/overlay/ov_utils.go

@@ -5,6 +5,7 @@ package overlay
 import (
 	"context"
 	"fmt"
+	"net"
 	"syscall"
 
 	"github.com/containerd/log"
@@ -56,7 +57,7 @@ func createVethPair() (string, string, error) {
 	return name1, name2, nil
 }
 
-func createVxlan(name string, vni uint32, mtu int) error {
+func createVxlan(name string, vni uint32, mtu int, vtepIPv6 bool) error {
 	vxlan := &netlink.Vxlan{
 		LinkAttrs: netlink.LinkAttrs{Name: name, MTU: mtu},
 		VxlanId:   int(vni),
@@ -67,6 +68,19 @@ func createVxlan(name string, vni uint32, mtu int) error {
 		L2miss:    true,
 	}
 
+	// The kernel restricts the destination VTEP (virtual tunnel endpoint) in
+	// VXLAN forwarding database entries to a single address family, defaulting
+	// to IPv4 unless either an IPv6 group or default remote destination address
+	// is configured when the VXLAN link is created.
+	//
+	// Set up the VXLAN link for IPv6 destination addresses by setting the VXLAN
+	// group address to the IPv6 unspecified address, like iproute2.
+	// https://github.com/iproute2/iproute2/commit/97d564b90ccb1e4a3c756d9caae161f55b2b63a2
+	// https://patchwork.ozlabs.org/project/netdev/patch/20180917171325.GA2660@localhost.localdomain/
+	if vtepIPv6 {
+		vxlan.Group = net.IPv6unspecified
+	}
+
 	if err := ns.NlHandle().LinkAdd(vxlan); err != nil {
 		return fmt.Errorf("error creating vxlan interface: %v", err)
 	}

+ 35 - 17
libnetwork/drivers/overlay/overlay.go

@@ -7,6 +7,7 @@ package overlay
 import (
 	"context"
 	"fmt"
+	"net"
 	"sync"
 
 	"github.com/containerd/log"
@@ -27,16 +28,16 @@ const (
 var _ discoverapi.Discover = (*driver)(nil)
 
 type driver struct {
-	bindAddress      string
-	advertiseAddress string
-	config           map[string]interface{}
-	peerDb           peerNetworkMap
-	secMap           *encrMap
-	networks         networkTable
-	initOS           sync.Once
-	localJoinOnce    sync.Once
-	keys             []*key
-	peerOpMu         sync.Mutex
+	bindAddress, advertiseAddress net.IP
+
+	config        map[string]interface{}
+	peerDb        peerNetworkMap
+	secMap        *encrMap
+	networks      networkTable
+	initOS        sync.Once
+	localJoinOnce sync.Once
+	keys          []*key
+	peerOpMu      sync.Mutex
 	sync.Mutex
 }
 
@@ -71,11 +72,27 @@ func (d *driver) IsBuiltIn() bool {
 	return true
 }
 
-func (d *driver) nodeJoin(advertiseAddress, bindAddress string, self bool) {
-	if self {
+// isIPv6Transport reports whether the outer Layer-3 transport for VXLAN datagrams is IPv6.
+func (d *driver) isIPv6Transport() (bool, error) {
+	// Infer whether remote peers' virtual tunnel endpoints will be IPv4 or IPv6
+	// from the address family of our own advertise address. This is a
+	// reasonable inference to make as Linux VXLAN links do not support
+	// mixed-address-family remote peers.
+	if d.advertiseAddress == nil {
+		return false, fmt.Errorf("overlay: cannot determine address family of transport: the local data-plane address is not currently known")
+	}
+	return d.advertiseAddress.To4() == nil, nil
+}
+
+func (d *driver) nodeJoin(data discoverapi.NodeDiscoveryData) error {
+	if data.Self {
+		advAddr, bindAddr := net.ParseIP(data.Address), net.ParseIP(data.BindAddress)
+		if advAddr == nil {
+			return fmt.Errorf("invalid discovery data")
+		}
 		d.Lock()
-		d.advertiseAddress = advertiseAddress
-		d.bindAddress = bindAddress
+		d.advertiseAddress = advAddr
+		d.bindAddress = bindAddr
 		d.Unlock()
 
 		// If containers are already running on this network update the
@@ -84,6 +101,7 @@ func (d *driver) nodeJoin(advertiseAddress, bindAddress string, self bool) {
 			d.peerDBUpdateSelf()
 		})
 	}
+	return nil
 }
 
 // DiscoverNew is a notification for a new discovery event, such as a new node joining a cluster
@@ -91,10 +109,10 @@ func (d *driver) DiscoverNew(dType discoverapi.DiscoveryType, data interface{})
 	switch dType {
 	case discoverapi.NodeDiscovery:
 		nodeData, ok := data.(discoverapi.NodeDiscoveryData)
-		if !ok || nodeData.Address == "" {
-			return fmt.Errorf("invalid discovery data")
+		if !ok {
+			return fmt.Errorf("invalid discovery data type: %T", data)
 		}
-		d.nodeJoin(nodeData.Address, nodeData.BindAddress, nodeData.Self)
+		return d.nodeJoin(nodeData)
 	case discoverapi.EncryptionKeysConfig:
 		encrData, ok := data.(discoverapi.DriverEncryptionConfig)
 		if !ok {

+ 1 - 1
libnetwork/drivers/overlay/peerdb.go

@@ -429,7 +429,7 @@ func (d *driver) peerFlushOp(nid string) error {
 func (d *driver) peerDBUpdateSelf() {
 	d.peerDbWalk(func(nid string, pkey *peerKey, pEntry *peerEntry) bool {
 		if pEntry.isLocal {
-			pEntry.vtep = net.ParseIP(d.advertiseAddress)
+			pEntry.vtep = d.advertiseAddress
 		}
 		return false
 	})