Merge pull request #46790 from corhere/libn/overlay-ipv6-vtep

libnetwork/drivers/overlay: support IPv6 transport
This commit is contained in:
Sebastiaan van Stijn 2023-11-23 18:23:27 +01:00 committed by GitHub
commit 2f65748927
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 94 additions and 41 deletions

View file

@ -124,8 +124,8 @@ func (d *driver) checkEncryption(nid string, rIP net.IP, isLocal, add bool) erro
return types.ForbiddenErrorf("encryption key is not present") return types.ForbiddenErrorf("encryption key is not present")
} }
lIP := net.ParseIP(d.bindAddress) lIP := d.bindAddress
aIP := net.ParseIP(d.advertiseAddress) aIP := d.advertiseAddress
nodes := map[string]net.IP{} nodes := map[string]net.IP{}
switch { switch {
@ -225,7 +225,19 @@ func removeEncryption(localIP, remoteIP net.IP, em *encrMap) error {
return nil return nil
} }
func programMangle(vni uint32, add bool) error { func (d *driver) transportIPTable() (*iptables.IPTable, error) {
v6, err := d.isIPv6Transport()
if err != nil {
return nil, err
}
version := iptables.IPv4
if v6 {
version = iptables.IPv6
}
return iptables.GetIptable(version), nil
}
func (d *driver) programMangle(vni uint32, add bool) error {
var ( var (
m = strconv.FormatUint(mark, 10) m = strconv.FormatUint(mark, 10)
chain = "OUTPUT" chain = "OUTPUT"
@ -234,8 +246,11 @@ func programMangle(vni uint32, add bool) error {
action = "install" action = "install"
) )
// TODO IPv6 support iptable, err := d.transportIPTable()
iptable := iptables.GetIptable(iptables.IPv4) if err != nil {
// Fail closed if unsure. Better safe than cleartext.
return err
}
if !add { if !add {
a = iptables.Delete a = iptables.Delete
@ -249,7 +264,7 @@ func programMangle(vni uint32, add bool) error {
return nil return nil
} }
func programInput(vni uint32, add bool) error { func (d *driver) programInput(vni uint32, add bool) error {
var ( var (
plainVxlan = matchVXLAN(overlayutils.VXLANUDPPort(), vni) plainVxlan = matchVXLAN(overlayutils.VXLANUDPPort(), vni)
chain = "INPUT" chain = "INPUT"
@ -261,8 +276,11 @@ func programInput(vni uint32, add bool) error {
return append(args, "-j", jump) return append(args, "-j", jump)
} }
// TODO IPv6 support iptable, err := d.transportIPTable()
iptable := iptables.GetIptable(iptables.IPv4) if err != nil {
// Fail closed if unsure. Better safe than cleartext.
return err
}
if !add { if !add {
msg = "remove" msg = "remove"
@ -495,8 +513,8 @@ func (d *driver) updateKeys(newKey, primary, pruneKey *key) error {
newIdx = -1 newIdx = -1
priIdx = -1 priIdx = -1
delIdx = -1 delIdx = -1
lIP = net.ParseIP(d.bindAddress) lIP = d.bindAddress
aIP = net.ParseIP(d.advertiseAddress) aIP = d.advertiseAddress
) )
d.Lock() d.Lock()

View file

@ -107,7 +107,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
} }
} }
d.peerAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), false, false, true) d.peerAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, d.advertiseAddress, false, false, true)
if err = d.checkEncryption(nid, nil, true, true); err != nil { if err = d.checkEncryption(nid, nil, true, true); err != nil {
log.G(context.TODO()).Warn(err) log.G(context.TODO()).Warn(err)
@ -116,7 +116,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
buf, err := proto.Marshal(&PeerRecord{ buf, err := proto.Marshal(&PeerRecord{
EndpointIP: ep.addr.String(), EndpointIP: ep.addr.String(),
EndpointMAC: ep.mac.String(), EndpointMAC: ep.mac.String(),
TunnelEndpointIP: d.advertiseAddress, TunnelEndpointIP: d.advertiseAddress.String(),
}) })
if err != nil { if err != nil {
return err return err
@ -162,7 +162,7 @@ func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key stri
// Ignore local peers. We already know about them and they // Ignore local peers. We already know about them and they
// should not be added to vxlan fdb. // should not be added to vxlan fdb.
if peer.TunnelEndpointIP == d.advertiseAddress { if net.ParseIP(peer.TunnelEndpointIP).Equal(d.advertiseAddress) {
return return
} }
@ -209,7 +209,7 @@ func (d *driver) Leave(nid, eid string) error {
return types.InternalMaskableErrorf("could not find endpoint with id %s", eid) return types.InternalMaskableErrorf("could not find endpoint with id %s", eid)
} }
d.peerDelete(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), true) d.peerDelete(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, d.advertiseAddress, true)
n.leaveSandbox() n.leaveSandbox()

View file

@ -155,8 +155,8 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo d
// Make sure no rule is on the way from any stale secure network // Make sure no rule is on the way from any stale secure network
if !n.secure { if !n.secure {
for _, vni := range vnis { for _, vni := range vnis {
programMangle(vni, false) d.programMangle(vni, false)
programInput(vni, false) d.programInput(vni, false)
} }
} }
@ -215,14 +215,14 @@ func (d *driver) DeleteNetwork(nid string) error {
if n.secure { if n.secure {
for _, s := range n.subnets { for _, s := range n.subnets {
if err := programMangle(s.vni, false); err != nil { if err := d.programMangle(s.vni, false); err != nil {
log.G(context.TODO()).WithFields(log.Fields{ log.G(context.TODO()).WithFields(log.Fields{
"error": err, "error": err,
"network_id": n.id, "network_id": n.id,
"subnet": s.subnetIP, "subnet": s.subnetIP,
}).Warn("Failed to clean up iptables rules during overlay network deletion") }).Warn("Failed to clean up iptables rules during overlay network deletion")
} }
if err := programInput(s.vni, false); err != nil { if err := d.programInput(s.vni, false); err != nil {
log.G(context.TODO()).WithFields(log.Fields{ log.G(context.TODO()).WithFields(log.Fields{
"error": err, "error": err,
"network_id": n.id, "network_id": n.id,
@ -430,8 +430,11 @@ func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error
return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err) return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err)
} }
err := createVxlan(vxlanName, s.vni, n.maxMTU()) v6transport, err := n.driver.isIPv6Transport()
if err != nil { if err != nil {
log.G(context.TODO()).WithError(err).Errorf("Assuming IPv4 transport; overlay network %s will not pass traffic if the Swarm data plane is IPv6.", n.id)
}
if err := createVxlan(vxlanName, s.vni, n.maxMTU(), v6transport); err != nil {
return err return err
} }
@ -522,12 +525,12 @@ func (n *network) initSubnetSandbox(s *subnet) error {
// Program iptables rules for mandatory encryption of the secure // Program iptables rules for mandatory encryption of the secure
// network, or clean up leftover rules for a stale secure network which // network, or clean up leftover rules for a stale secure network which
// was previously assigned the same VNI. // was previously assigned the same VNI.
if err := programMangle(s.vni, n.secure); err != nil { if err := n.driver.programMangle(s.vni, n.secure); err != nil {
return err return err
} }
if err := programInput(s.vni, n.secure); err != nil { if err := n.driver.programInput(s.vni, n.secure); err != nil {
if n.secure { if n.secure {
return multierror.Append(err, programMangle(s.vni, false)) return multierror.Append(err, n.driver.programMangle(s.vni, false))
} }
} }

View file

@ -5,6 +5,7 @@ package overlay
import ( import (
"context" "context"
"fmt" "fmt"
"net"
"syscall" "syscall"
"github.com/containerd/log" "github.com/containerd/log"
@ -56,7 +57,7 @@ func createVethPair() (string, string, error) {
return name1, name2, nil return name1, name2, nil
} }
func createVxlan(name string, vni uint32, mtu int) error { func createVxlan(name string, vni uint32, mtu int, vtepIPv6 bool) error {
vxlan := &netlink.Vxlan{ vxlan := &netlink.Vxlan{
LinkAttrs: netlink.LinkAttrs{Name: name, MTU: mtu}, LinkAttrs: netlink.LinkAttrs{Name: name, MTU: mtu},
VxlanId: int(vni), VxlanId: int(vni),
@ -67,6 +68,19 @@ func createVxlan(name string, vni uint32, mtu int) error {
L2miss: true, L2miss: true,
} }
// The kernel restricts the destination VTEP (virtual tunnel endpoint) in
// VXLAN forwarding database entries to a single address family, defaulting
// to IPv4 unless either an IPv6 group or default remote destination address
// is configured when the VXLAN link is created.
//
// Set up the VXLAN link for IPv6 destination addresses by setting the VXLAN
// group address to the IPv6 unspecified address, like iproute2.
// https://github.com/iproute2/iproute2/commit/97d564b90ccb1e4a3c756d9caae161f55b2b63a2
// https://patchwork.ozlabs.org/project/netdev/patch/20180917171325.GA2660@localhost.localdomain/
if vtepIPv6 {
vxlan.Group = net.IPv6unspecified
}
if err := ns.NlHandle().LinkAdd(vxlan); err != nil { if err := ns.NlHandle().LinkAdd(vxlan); err != nil {
return fmt.Errorf("error creating vxlan interface: %v", err) return fmt.Errorf("error creating vxlan interface: %v", err)
} }

View file

@ -7,6 +7,7 @@ package overlay
import ( import (
"context" "context"
"fmt" "fmt"
"net"
"sync" "sync"
"github.com/containerd/log" "github.com/containerd/log"
@ -27,16 +28,16 @@ const (
var _ discoverapi.Discover = (*driver)(nil) var _ discoverapi.Discover = (*driver)(nil)
type driver struct { type driver struct {
bindAddress string bindAddress, advertiseAddress net.IP
advertiseAddress string
config map[string]interface{} config map[string]interface{}
peerDb peerNetworkMap peerDb peerNetworkMap
secMap *encrMap secMap *encrMap
networks networkTable networks networkTable
initOS sync.Once initOS sync.Once
localJoinOnce sync.Once localJoinOnce sync.Once
keys []*key keys []*key
peerOpMu sync.Mutex peerOpMu sync.Mutex
sync.Mutex sync.Mutex
} }
@ -71,11 +72,27 @@ func (d *driver) IsBuiltIn() bool {
return true return true
} }
func (d *driver) nodeJoin(advertiseAddress, bindAddress string, self bool) { // isIPv6Transport reports whether the outer Layer-3 transport for VXLAN datagrams is IPv6.
if self { func (d *driver) isIPv6Transport() (bool, error) {
// Infer whether remote peers' virtual tunnel endpoints will be IPv4 or IPv6
// from the address family of our own advertise address. This is a
// reasonable inference to make as Linux VXLAN links do not support
// mixed-address-family remote peers.
if d.advertiseAddress == nil {
return false, fmt.Errorf("overlay: cannot determine address family of transport: the local data-plane address is not currently known")
}
return d.advertiseAddress.To4() == nil, nil
}
func (d *driver) nodeJoin(data discoverapi.NodeDiscoveryData) error {
if data.Self {
advAddr, bindAddr := net.ParseIP(data.Address), net.ParseIP(data.BindAddress)
if advAddr == nil {
return fmt.Errorf("invalid discovery data")
}
d.Lock() d.Lock()
d.advertiseAddress = advertiseAddress d.advertiseAddress = advAddr
d.bindAddress = bindAddress d.bindAddress = bindAddr
d.Unlock() d.Unlock()
// If containers are already running on this network update the // If containers are already running on this network update the
@ -84,6 +101,7 @@ func (d *driver) nodeJoin(advertiseAddress, bindAddress string, self bool) {
d.peerDBUpdateSelf() d.peerDBUpdateSelf()
}) })
} }
return nil
} }
// DiscoverNew is a notification for a new discovery event, such as a new node joining a cluster // DiscoverNew is a notification for a new discovery event, such as a new node joining a cluster
@ -91,10 +109,10 @@ func (d *driver) DiscoverNew(dType discoverapi.DiscoveryType, data interface{})
switch dType { switch dType {
case discoverapi.NodeDiscovery: case discoverapi.NodeDiscovery:
nodeData, ok := data.(discoverapi.NodeDiscoveryData) nodeData, ok := data.(discoverapi.NodeDiscoveryData)
if !ok || nodeData.Address == "" { if !ok {
return fmt.Errorf("invalid discovery data") return fmt.Errorf("invalid discovery data type: %T", data)
} }
d.nodeJoin(nodeData.Address, nodeData.BindAddress, nodeData.Self) return d.nodeJoin(nodeData)
case discoverapi.EncryptionKeysConfig: case discoverapi.EncryptionKeysConfig:
encrData, ok := data.(discoverapi.DriverEncryptionConfig) encrData, ok := data.(discoverapi.DriverEncryptionConfig)
if !ok { if !ok {

View file

@ -429,7 +429,7 @@ func (d *driver) peerFlushOp(nid string) error {
func (d *driver) peerDBUpdateSelf() { func (d *driver) peerDBUpdateSelf() {
d.peerDbWalk(func(nid string, pkey *peerKey, pEntry *peerEntry) bool { d.peerDbWalk(func(nid string, pkey *peerKey, pEntry *peerEntry) bool {
if pEntry.isLocal { if pEntry.isLocal {
pEntry.vtep = net.ParseIP(d.advertiseAddress) pEntry.vtep = d.advertiseAddress
} }
return false return false
}) })