Merge pull request #46790 from corhere/libn/overlay-ipv6-vtep
libnetwork/drivers/overlay: support IPv6 transport
This commit is contained in:
commit
2f65748927
6 changed files with 94 additions and 41 deletions
|
@ -124,8 +124,8 @@ func (d *driver) checkEncryption(nid string, rIP net.IP, isLocal, add bool) erro
|
||||||
return types.ForbiddenErrorf("encryption key is not present")
|
return types.ForbiddenErrorf("encryption key is not present")
|
||||||
}
|
}
|
||||||
|
|
||||||
lIP := net.ParseIP(d.bindAddress)
|
lIP := d.bindAddress
|
||||||
aIP := net.ParseIP(d.advertiseAddress)
|
aIP := d.advertiseAddress
|
||||||
nodes := map[string]net.IP{}
|
nodes := map[string]net.IP{}
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
|
@ -225,7 +225,19 @@ func removeEncryption(localIP, remoteIP net.IP, em *encrMap) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func programMangle(vni uint32, add bool) error {
|
func (d *driver) transportIPTable() (*iptables.IPTable, error) {
|
||||||
|
v6, err := d.isIPv6Transport()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
version := iptables.IPv4
|
||||||
|
if v6 {
|
||||||
|
version = iptables.IPv6
|
||||||
|
}
|
||||||
|
return iptables.GetIptable(version), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *driver) programMangle(vni uint32, add bool) error {
|
||||||
var (
|
var (
|
||||||
m = strconv.FormatUint(mark, 10)
|
m = strconv.FormatUint(mark, 10)
|
||||||
chain = "OUTPUT"
|
chain = "OUTPUT"
|
||||||
|
@ -234,8 +246,11 @@ func programMangle(vni uint32, add bool) error {
|
||||||
action = "install"
|
action = "install"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TODO IPv6 support
|
iptable, err := d.transportIPTable()
|
||||||
iptable := iptables.GetIptable(iptables.IPv4)
|
if err != nil {
|
||||||
|
// Fail closed if unsure. Better safe than cleartext.
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
if !add {
|
if !add {
|
||||||
a = iptables.Delete
|
a = iptables.Delete
|
||||||
|
@ -249,7 +264,7 @@ func programMangle(vni uint32, add bool) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func programInput(vni uint32, add bool) error {
|
func (d *driver) programInput(vni uint32, add bool) error {
|
||||||
var (
|
var (
|
||||||
plainVxlan = matchVXLAN(overlayutils.VXLANUDPPort(), vni)
|
plainVxlan = matchVXLAN(overlayutils.VXLANUDPPort(), vni)
|
||||||
chain = "INPUT"
|
chain = "INPUT"
|
||||||
|
@ -261,8 +276,11 @@ func programInput(vni uint32, add bool) error {
|
||||||
return append(args, "-j", jump)
|
return append(args, "-j", jump)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO IPv6 support
|
iptable, err := d.transportIPTable()
|
||||||
iptable := iptables.GetIptable(iptables.IPv4)
|
if err != nil {
|
||||||
|
// Fail closed if unsure. Better safe than cleartext.
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
if !add {
|
if !add {
|
||||||
msg = "remove"
|
msg = "remove"
|
||||||
|
@ -495,8 +513,8 @@ func (d *driver) updateKeys(newKey, primary, pruneKey *key) error {
|
||||||
newIdx = -1
|
newIdx = -1
|
||||||
priIdx = -1
|
priIdx = -1
|
||||||
delIdx = -1
|
delIdx = -1
|
||||||
lIP = net.ParseIP(d.bindAddress)
|
lIP = d.bindAddress
|
||||||
aIP = net.ParseIP(d.advertiseAddress)
|
aIP = d.advertiseAddress
|
||||||
)
|
)
|
||||||
|
|
||||||
d.Lock()
|
d.Lock()
|
||||||
|
|
|
@ -107,7 +107,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
d.peerAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), false, false, true)
|
d.peerAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, d.advertiseAddress, false, false, true)
|
||||||
|
|
||||||
if err = d.checkEncryption(nid, nil, true, true); err != nil {
|
if err = d.checkEncryption(nid, nil, true, true); err != nil {
|
||||||
log.G(context.TODO()).Warn(err)
|
log.G(context.TODO()).Warn(err)
|
||||||
|
@ -116,7 +116,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
|
||||||
buf, err := proto.Marshal(&PeerRecord{
|
buf, err := proto.Marshal(&PeerRecord{
|
||||||
EndpointIP: ep.addr.String(),
|
EndpointIP: ep.addr.String(),
|
||||||
EndpointMAC: ep.mac.String(),
|
EndpointMAC: ep.mac.String(),
|
||||||
TunnelEndpointIP: d.advertiseAddress,
|
TunnelEndpointIP: d.advertiseAddress.String(),
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -162,7 +162,7 @@ func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key stri
|
||||||
|
|
||||||
// Ignore local peers. We already know about them and they
|
// Ignore local peers. We already know about them and they
|
||||||
// should not be added to vxlan fdb.
|
// should not be added to vxlan fdb.
|
||||||
if peer.TunnelEndpointIP == d.advertiseAddress {
|
if net.ParseIP(peer.TunnelEndpointIP).Equal(d.advertiseAddress) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -209,7 +209,7 @@ func (d *driver) Leave(nid, eid string) error {
|
||||||
return types.InternalMaskableErrorf("could not find endpoint with id %s", eid)
|
return types.InternalMaskableErrorf("could not find endpoint with id %s", eid)
|
||||||
}
|
}
|
||||||
|
|
||||||
d.peerDelete(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), true)
|
d.peerDelete(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, d.advertiseAddress, true)
|
||||||
|
|
||||||
n.leaveSandbox()
|
n.leaveSandbox()
|
||||||
|
|
||||||
|
|
|
@ -155,8 +155,8 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo d
|
||||||
// Make sure no rule is on the way from any stale secure network
|
// Make sure no rule is on the way from any stale secure network
|
||||||
if !n.secure {
|
if !n.secure {
|
||||||
for _, vni := range vnis {
|
for _, vni := range vnis {
|
||||||
programMangle(vni, false)
|
d.programMangle(vni, false)
|
||||||
programInput(vni, false)
|
d.programInput(vni, false)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -215,14 +215,14 @@ func (d *driver) DeleteNetwork(nid string) error {
|
||||||
|
|
||||||
if n.secure {
|
if n.secure {
|
||||||
for _, s := range n.subnets {
|
for _, s := range n.subnets {
|
||||||
if err := programMangle(s.vni, false); err != nil {
|
if err := d.programMangle(s.vni, false); err != nil {
|
||||||
log.G(context.TODO()).WithFields(log.Fields{
|
log.G(context.TODO()).WithFields(log.Fields{
|
||||||
"error": err,
|
"error": err,
|
||||||
"network_id": n.id,
|
"network_id": n.id,
|
||||||
"subnet": s.subnetIP,
|
"subnet": s.subnetIP,
|
||||||
}).Warn("Failed to clean up iptables rules during overlay network deletion")
|
}).Warn("Failed to clean up iptables rules during overlay network deletion")
|
||||||
}
|
}
|
||||||
if err := programInput(s.vni, false); err != nil {
|
if err := d.programInput(s.vni, false); err != nil {
|
||||||
log.G(context.TODO()).WithFields(log.Fields{
|
log.G(context.TODO()).WithFields(log.Fields{
|
||||||
"error": err,
|
"error": err,
|
||||||
"network_id": n.id,
|
"network_id": n.id,
|
||||||
|
@ -430,8 +430,11 @@ func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error
|
||||||
return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err)
|
return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err := createVxlan(vxlanName, s.vni, n.maxMTU())
|
v6transport, err := n.driver.isIPv6Transport()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
log.G(context.TODO()).WithError(err).Errorf("Assuming IPv4 transport; overlay network %s will not pass traffic if the Swarm data plane is IPv6.", n.id)
|
||||||
|
}
|
||||||
|
if err := createVxlan(vxlanName, s.vni, n.maxMTU(), v6transport); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -522,12 +525,12 @@ func (n *network) initSubnetSandbox(s *subnet) error {
|
||||||
// Program iptables rules for mandatory encryption of the secure
|
// Program iptables rules for mandatory encryption of the secure
|
||||||
// network, or clean up leftover rules for a stale secure network which
|
// network, or clean up leftover rules for a stale secure network which
|
||||||
// was previously assigned the same VNI.
|
// was previously assigned the same VNI.
|
||||||
if err := programMangle(s.vni, n.secure); err != nil {
|
if err := n.driver.programMangle(s.vni, n.secure); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := programInput(s.vni, n.secure); err != nil {
|
if err := n.driver.programInput(s.vni, n.secure); err != nil {
|
||||||
if n.secure {
|
if n.secure {
|
||||||
return multierror.Append(err, programMangle(s.vni, false))
|
return multierror.Append(err, n.driver.programMangle(s.vni, false))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@ package overlay
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"net"
|
||||||
"syscall"
|
"syscall"
|
||||||
|
|
||||||
"github.com/containerd/log"
|
"github.com/containerd/log"
|
||||||
|
@ -56,7 +57,7 @@ func createVethPair() (string, string, error) {
|
||||||
return name1, name2, nil
|
return name1, name2, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func createVxlan(name string, vni uint32, mtu int) error {
|
func createVxlan(name string, vni uint32, mtu int, vtepIPv6 bool) error {
|
||||||
vxlan := &netlink.Vxlan{
|
vxlan := &netlink.Vxlan{
|
||||||
LinkAttrs: netlink.LinkAttrs{Name: name, MTU: mtu},
|
LinkAttrs: netlink.LinkAttrs{Name: name, MTU: mtu},
|
||||||
VxlanId: int(vni),
|
VxlanId: int(vni),
|
||||||
|
@ -67,6 +68,19 @@ func createVxlan(name string, vni uint32, mtu int) error {
|
||||||
L2miss: true,
|
L2miss: true,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The kernel restricts the destination VTEP (virtual tunnel endpoint) in
|
||||||
|
// VXLAN forwarding database entries to a single address family, defaulting
|
||||||
|
// to IPv4 unless either an IPv6 group or default remote destination address
|
||||||
|
// is configured when the VXLAN link is created.
|
||||||
|
//
|
||||||
|
// Set up the VXLAN link for IPv6 destination addresses by setting the VXLAN
|
||||||
|
// group address to the IPv6 unspecified address, like iproute2.
|
||||||
|
// https://github.com/iproute2/iproute2/commit/97d564b90ccb1e4a3c756d9caae161f55b2b63a2
|
||||||
|
// https://patchwork.ozlabs.org/project/netdev/patch/20180917171325.GA2660@localhost.localdomain/
|
||||||
|
if vtepIPv6 {
|
||||||
|
vxlan.Group = net.IPv6unspecified
|
||||||
|
}
|
||||||
|
|
||||||
if err := ns.NlHandle().LinkAdd(vxlan); err != nil {
|
if err := ns.NlHandle().LinkAdd(vxlan); err != nil {
|
||||||
return fmt.Errorf("error creating vxlan interface: %v", err)
|
return fmt.Errorf("error creating vxlan interface: %v", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,6 +7,7 @@ package overlay
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"net"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/containerd/log"
|
"github.com/containerd/log"
|
||||||
|
@ -27,16 +28,16 @@ const (
|
||||||
var _ discoverapi.Discover = (*driver)(nil)
|
var _ discoverapi.Discover = (*driver)(nil)
|
||||||
|
|
||||||
type driver struct {
|
type driver struct {
|
||||||
bindAddress string
|
bindAddress, advertiseAddress net.IP
|
||||||
advertiseAddress string
|
|
||||||
config map[string]interface{}
|
config map[string]interface{}
|
||||||
peerDb peerNetworkMap
|
peerDb peerNetworkMap
|
||||||
secMap *encrMap
|
secMap *encrMap
|
||||||
networks networkTable
|
networks networkTable
|
||||||
initOS sync.Once
|
initOS sync.Once
|
||||||
localJoinOnce sync.Once
|
localJoinOnce sync.Once
|
||||||
keys []*key
|
keys []*key
|
||||||
peerOpMu sync.Mutex
|
peerOpMu sync.Mutex
|
||||||
sync.Mutex
|
sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -71,11 +72,27 @@ func (d *driver) IsBuiltIn() bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *driver) nodeJoin(advertiseAddress, bindAddress string, self bool) {
|
// isIPv6Transport reports whether the outer Layer-3 transport for VXLAN datagrams is IPv6.
|
||||||
if self {
|
func (d *driver) isIPv6Transport() (bool, error) {
|
||||||
|
// Infer whether remote peers' virtual tunnel endpoints will be IPv4 or IPv6
|
||||||
|
// from the address family of our own advertise address. This is a
|
||||||
|
// reasonable inference to make as Linux VXLAN links do not support
|
||||||
|
// mixed-address-family remote peers.
|
||||||
|
if d.advertiseAddress == nil {
|
||||||
|
return false, fmt.Errorf("overlay: cannot determine address family of transport: the local data-plane address is not currently known")
|
||||||
|
}
|
||||||
|
return d.advertiseAddress.To4() == nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *driver) nodeJoin(data discoverapi.NodeDiscoveryData) error {
|
||||||
|
if data.Self {
|
||||||
|
advAddr, bindAddr := net.ParseIP(data.Address), net.ParseIP(data.BindAddress)
|
||||||
|
if advAddr == nil {
|
||||||
|
return fmt.Errorf("invalid discovery data")
|
||||||
|
}
|
||||||
d.Lock()
|
d.Lock()
|
||||||
d.advertiseAddress = advertiseAddress
|
d.advertiseAddress = advAddr
|
||||||
d.bindAddress = bindAddress
|
d.bindAddress = bindAddr
|
||||||
d.Unlock()
|
d.Unlock()
|
||||||
|
|
||||||
// If containers are already running on this network update the
|
// If containers are already running on this network update the
|
||||||
|
@ -84,6 +101,7 @@ func (d *driver) nodeJoin(advertiseAddress, bindAddress string, self bool) {
|
||||||
d.peerDBUpdateSelf()
|
d.peerDBUpdateSelf()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// DiscoverNew is a notification for a new discovery event, such as a new node joining a cluster
|
// DiscoverNew is a notification for a new discovery event, such as a new node joining a cluster
|
||||||
|
@ -91,10 +109,10 @@ func (d *driver) DiscoverNew(dType discoverapi.DiscoveryType, data interface{})
|
||||||
switch dType {
|
switch dType {
|
||||||
case discoverapi.NodeDiscovery:
|
case discoverapi.NodeDiscovery:
|
||||||
nodeData, ok := data.(discoverapi.NodeDiscoveryData)
|
nodeData, ok := data.(discoverapi.NodeDiscoveryData)
|
||||||
if !ok || nodeData.Address == "" {
|
if !ok {
|
||||||
return fmt.Errorf("invalid discovery data")
|
return fmt.Errorf("invalid discovery data type: %T", data)
|
||||||
}
|
}
|
||||||
d.nodeJoin(nodeData.Address, nodeData.BindAddress, nodeData.Self)
|
return d.nodeJoin(nodeData)
|
||||||
case discoverapi.EncryptionKeysConfig:
|
case discoverapi.EncryptionKeysConfig:
|
||||||
encrData, ok := data.(discoverapi.DriverEncryptionConfig)
|
encrData, ok := data.(discoverapi.DriverEncryptionConfig)
|
||||||
if !ok {
|
if !ok {
|
||||||
|
|
|
@ -429,7 +429,7 @@ func (d *driver) peerFlushOp(nid string) error {
|
||||||
func (d *driver) peerDBUpdateSelf() {
|
func (d *driver) peerDBUpdateSelf() {
|
||||||
d.peerDbWalk(func(nid string, pkey *peerKey, pEntry *peerEntry) bool {
|
d.peerDbWalk(func(nid string, pkey *peerKey, pEntry *peerEntry) bool {
|
||||||
if pEntry.isLocal {
|
if pEntry.isLocal {
|
||||||
pEntry.vtep = net.ParseIP(d.advertiseAddress)
|
pEntry.vtep = d.advertiseAddress
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
})
|
})
|
||||||
|
|
Loading…
Reference in a new issue