Merge pull request from GHSA-232p-vwff-86mp
[23.0 backport] libnetwork: ensure encryption is mandatory on encrypted overlay networks
This commit is contained in:
commit
59118bff50
8 changed files with 231 additions and 55 deletions
47
libnetwork/drivers/overlay/bpf.go
Normal file
47
libnetwork/drivers/overlay/bpf.go
Normal file
|
@ -0,0 +1,47 @@
|
|||
package overlay
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/bpf"
|
||||
)
|
||||
|
||||
// vniMatchBPF returns a BPF program suitable for passing to the iptables bpf
|
||||
// match which matches on the VXAN Network ID of encapsulated packets. The
|
||||
// program assumes that it will be used in a rule which only matches UDP
|
||||
// datagrams.
|
||||
func vniMatchBPF(vni uint32) []bpf.RawInstruction {
|
||||
asm, err := bpf.Assemble([]bpf.Instruction{
|
||||
bpf.LoadMemShift{Off: 0}, // ldx 4*([0] & 0xf) ; Load length of IPv4 header into X
|
||||
bpf.LoadIndirect{Off: 12, Size: 4}, // ld [x + 12] ; Load VXLAN ID (UDP header + 4 bytes) into A
|
||||
bpf.ALUOpConstant{Op: bpf.ALUOpAnd, Val: 0xffffff00}, // and #0xffffff00 ; VXLAN ID is in top 24 bits
|
||||
bpf.JumpIf{Cond: bpf.JumpEqual, Val: vni << 8, SkipTrue: 1}, // jeq ($vni << 8), match
|
||||
bpf.RetConstant{Val: 0}, // ret #0
|
||||
bpf.RetConstant{Val: ^uint32(0)}, // match: ret #-1
|
||||
})
|
||||
// bpf.Assemble() only errors if an instruction is invalid. As the only variable
|
||||
// part of the program is an instruction value for which the entire range is
|
||||
// valid, whether the program can be successfully assembled is independent of
|
||||
// the input. Given that the only recourse is to fix this function and
|
||||
// recompile, there's little value in bubbling the error up to the caller.
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return asm
|
||||
}
|
||||
|
||||
// marshalXTBPF marshals a BPF program into the "decimal" byte code format
|
||||
// which is suitable for passing to the [iptables bpf match].
|
||||
//
|
||||
// iptables -m bpf --bytecode
|
||||
//
|
||||
// [iptables bpf match]: https://ipset.netfilter.org/iptables-extensions.man.html#lbAH
|
||||
func marshalXTBPF(prog []bpf.RawInstruction) string { //nolint:unused
|
||||
var b strings.Builder
|
||||
fmt.Fprintf(&b, "%d", len(prog))
|
||||
for _, ins := range prog {
|
||||
fmt.Fprintf(&b, ",%d %d %d %d", ins.Op, ins.Jt, ins.Jf, ins.K)
|
||||
}
|
||||
return b.String()
|
||||
}
|
14
libnetwork/drivers/overlay/bpf_test.go
Normal file
14
libnetwork/drivers/overlay/bpf_test.go
Normal file
|
@ -0,0 +1,14 @@
|
|||
package overlay
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func FuzzVNIMatchBPFDoesNotPanic(f *testing.F) {
|
||||
for _, seed := range []uint32{0, 1, 42, 0xfffffe, 0xffffff, 0xfffffffe, 0xffffffff} {
|
||||
f.Add(seed)
|
||||
}
|
||||
f.Fuzz(func(t *testing.T, vni uint32) {
|
||||
_ = vniMatchBPF(vni)
|
||||
})
|
||||
}
|
|
@ -18,12 +18,41 @@ import (
|
|||
"github.com/docker/docker/libnetwork/iptables"
|
||||
"github.com/docker/docker/libnetwork/ns"
|
||||
"github.com/docker/docker/libnetwork/types"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink"
|
||||
)
|
||||
|
||||
/*
|
||||
Encrypted overlay networks use IPsec in transport mode to encrypt and
|
||||
authenticate the VXLAN UDP datagrams. This driver implements a bespoke control
|
||||
plane which negotiates the security parameters for each peer-to-peer tunnel.
|
||||
|
||||
IPsec Terminology
|
||||
|
||||
- ESP: IPSec Encapsulating Security Payload
|
||||
- SPI: Security Parameter Index
|
||||
- ICV: Integrity Check Value
|
||||
- SA: Security Association https://en.wikipedia.org/wiki/IPsec#Security_association
|
||||
|
||||
|
||||
Developer documentation for Linux IPsec is rather sparse online. The following
|
||||
slide deck provides a decent overview.
|
||||
https://libreswan.org/wiki/images/e/e0/Netdev-0x12-ipsec-flow.pdf
|
||||
|
||||
The Linux IPsec stack is part of XFRM, the netlink packet transformation
|
||||
interface.
|
||||
https://man7.org/linux/man-pages/man8/ip-xfrm.8.html
|
||||
*/
|
||||
|
||||
const (
|
||||
r = 0xD0C4E3
|
||||
// Value used to mark outgoing packets which should have our IPsec
|
||||
// processing applied. It is also used as a label to identify XFRM
|
||||
// states (Security Associations) and policies (Security Policies)
|
||||
// programmed by us so we know which ones we can clean up without
|
||||
// disrupting other VPN connections on the system.
|
||||
mark = 0xD0C4E3
|
||||
|
||||
pktExpansion = 26 // SPI(4) + SeqN(4) + IV(8) + PadLength(1) + NextHeader(1) + ICV(8)
|
||||
)
|
||||
|
||||
|
@ -33,7 +62,9 @@ const (
|
|||
bidir
|
||||
)
|
||||
|
||||
var spMark = netlink.XfrmMark{Value: uint32(r), Mask: 0xffffffff}
|
||||
// Mark value for matching packets which should have our IPsec security policy
|
||||
// applied.
|
||||
var spMark = netlink.XfrmMark{Value: mark, Mask: 0xffffffff}
|
||||
|
||||
type key struct {
|
||||
value []byte
|
||||
|
@ -47,6 +78,9 @@ func (k *key) String() string {
|
|||
return ""
|
||||
}
|
||||
|
||||
// Security Parameter Indices for the IPsec flows between local node and a
|
||||
// remote peer, which identify the Security Associations (XFRM states) to be
|
||||
// applied when encrypting and decrypting packets.
|
||||
type spi struct {
|
||||
forward int
|
||||
reverse int
|
||||
|
@ -79,8 +113,8 @@ func (e *encrMap) String() string {
|
|||
return b.String()
|
||||
}
|
||||
|
||||
func (d *driver) checkEncryption(nid string, rIP net.IP, vxlanID uint32, isLocal, add bool) error {
|
||||
logrus.Debugf("checkEncryption(%.7s, %v, %d, %t)", nid, rIP, vxlanID, isLocal)
|
||||
func (d *driver) checkEncryption(nid string, rIP net.IP, isLocal, add bool) error {
|
||||
logrus.Debugf("checkEncryption(%.7s, %v, %t)", nid, rIP, isLocal)
|
||||
|
||||
n := d.network(nid)
|
||||
if n == nil || !n.secure {
|
||||
|
@ -115,7 +149,7 @@ func (d *driver) checkEncryption(nid string, rIP net.IP, vxlanID uint32, isLocal
|
|||
|
||||
if add {
|
||||
for _, rIP := range nodes {
|
||||
if err := setupEncryption(lIP, aIP, rIP, vxlanID, d.secMap, d.keys); err != nil {
|
||||
if err := setupEncryption(lIP, aIP, rIP, d.secMap, d.keys); err != nil {
|
||||
logrus.Warnf("Failed to program network encryption between %s and %s: %v", lIP, rIP, err)
|
||||
}
|
||||
}
|
||||
|
@ -130,22 +164,14 @@ func (d *driver) checkEncryption(nid string, rIP net.IP, vxlanID uint32, isLocal
|
|||
return nil
|
||||
}
|
||||
|
||||
func setupEncryption(localIP, advIP, remoteIP net.IP, vni uint32, em *encrMap, keys []*key) error {
|
||||
logrus.Debugf("Programming encryption for vxlan %d between %s and %s", vni, localIP, remoteIP)
|
||||
// setupEncryption programs the encryption parameters for secure communication
|
||||
// between the local node and a remote node.
|
||||
func setupEncryption(localIP, advIP, remoteIP net.IP, em *encrMap, keys []*key) error {
|
||||
logrus.Debugf("Programming encryption between %s and %s", localIP, remoteIP)
|
||||
rIPs := remoteIP.String()
|
||||
|
||||
indices := make([]*spi, 0, len(keys))
|
||||
|
||||
err := programMangle(vni, true)
|
||||
if err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
|
||||
err = programInput(vni, true)
|
||||
if err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
|
||||
for i, k := range keys {
|
||||
spis := &spi{buildSPI(advIP, remoteIP, k.tag), buildSPI(remoteIP, advIP, k.tag)}
|
||||
dir := reverse
|
||||
|
@ -200,67 +226,96 @@ func removeEncryption(localIP, remoteIP net.IP, em *encrMap) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func programMangle(vni uint32, add bool) (err error) {
|
||||
type matchVXLANFunc func(port, vni uint32) []string
|
||||
|
||||
// programVXLANRuleFunc returns a function which tries calling programWithMatch
|
||||
// with the u32 match, falling back to the BPF match if installing u32 variant
|
||||
// of the rules fails.
|
||||
func programVXLANRuleFunc(programWithMatch func(matchVXLAN matchVXLANFunc, vni uint32, add bool) error) func(vni uint32, add bool) error {
|
||||
return func(vni uint32, add bool) error {
|
||||
if add {
|
||||
if err := programWithMatch(matchVXLANWithU32, vni, add); err != nil {
|
||||
// That didn't work. Maybe the xt_u32 module isn't available? Try again with xt_bpf.
|
||||
err2 := programWithMatch(matchVXLANWithBPF, vni, add)
|
||||
if err2 != nil {
|
||||
return multierror.Append(err, err2)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
} else {
|
||||
// Delete both flavours.
|
||||
err := programWithMatch(matchVXLANWithU32, vni, add)
|
||||
return multierror.Append(err, programWithMatch(matchVXLANWithBPF, vni, add)).ErrorOrNil()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var programMangle = programVXLANRuleFunc(func(matchVXLAN matchVXLANFunc, vni uint32, add bool) error {
|
||||
var (
|
||||
p = strconv.FormatUint(uint64(overlayutils.VXLANUDPPort()), 10)
|
||||
c = fmt.Sprintf("0>>22&0x3C@12&0xFFFFFF00=%d", int(vni)<<8)
|
||||
m = strconv.FormatUint(uint64(r), 10)
|
||||
m = strconv.FormatUint(mark, 10)
|
||||
chain = "OUTPUT"
|
||||
rule = []string{"-p", "udp", "--dport", p, "-m", "u32", "--u32", c, "-j", "MARK", "--set-mark", m}
|
||||
a = "-A"
|
||||
rule = append(matchVXLAN(overlayutils.VXLANUDPPort(), vni), "-j", "MARK", "--set-mark", m)
|
||||
a = iptables.Append
|
||||
action = "install"
|
||||
)
|
||||
|
||||
// TODO IPv6 support
|
||||
iptable := iptables.GetIptable(iptables.IPv4)
|
||||
|
||||
if add == iptable.Exists(iptables.Mangle, chain, rule...) {
|
||||
return
|
||||
}
|
||||
|
||||
if !add {
|
||||
a = "-D"
|
||||
a = iptables.Delete
|
||||
action = "remove"
|
||||
}
|
||||
|
||||
if err = iptable.RawCombinedOutput(append([]string{"-t", string(iptables.Mangle), a, chain}, rule...)...); err != nil {
|
||||
logrus.Warnf("could not %s mangle rule: %v", action, err)
|
||||
if err := iptable.ProgramRule(iptables.Mangle, chain, a, rule); err != nil {
|
||||
return fmt.Errorf("could not %s mangle rule: %w", action, err)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
func programInput(vni uint32, add bool) (err error) {
|
||||
var programInput = programVXLANRuleFunc(func(matchVXLAN matchVXLANFunc, vni uint32, add bool) error {
|
||||
var (
|
||||
port = strconv.FormatUint(uint64(overlayutils.VXLANUDPPort()), 10)
|
||||
vniMatch = fmt.Sprintf("0>>22&0x3C@12&0xFFFFFF00=%d", int(vni)<<8)
|
||||
plainVxlan = []string{"-p", "udp", "--dport", port, "-m", "u32", "--u32", vniMatch, "-j"}
|
||||
ipsecVxlan = append([]string{"-m", "policy", "--dir", "in", "--pol", "ipsec"}, plainVxlan...)
|
||||
block = append(plainVxlan, "DROP")
|
||||
accept = append(ipsecVxlan, "ACCEPT")
|
||||
plainVxlan = matchVXLAN(overlayutils.VXLANUDPPort(), vni)
|
||||
chain = "INPUT"
|
||||
action = iptables.Append
|
||||
msg = "add"
|
||||
)
|
||||
|
||||
rule := func(policy, jump string) []string {
|
||||
args := append([]string{"-m", "policy", "--dir", "in", "--pol", policy}, plainVxlan...)
|
||||
return append(args, "-j", jump)
|
||||
}
|
||||
|
||||
// TODO IPv6 support
|
||||
iptable := iptables.GetIptable(iptables.IPv4)
|
||||
|
||||
if !add {
|
||||
action = iptables.Delete
|
||||
msg = "remove"
|
||||
}
|
||||
|
||||
if err := iptable.ProgramRule(iptables.Filter, chain, action, accept); err != nil {
|
||||
logrus.Errorf("could not %s input rule: %v. Please do it manually.", msg, err)
|
||||
action := func(a iptables.Action) iptables.Action {
|
||||
if !add {
|
||||
return iptables.Delete
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
if err := iptable.ProgramRule(iptables.Filter, chain, action, block); err != nil {
|
||||
logrus.Errorf("could not %s input rule: %v. Please do it manually.", msg, err)
|
||||
// Accept incoming VXLAN datagrams for the VNI which were subjected to IPSec processing.
|
||||
// Append to the bottom of the chain to give administrator-configured rules precedence.
|
||||
if err := iptable.ProgramRule(iptables.Filter, chain, action(iptables.Append), rule("ipsec", "ACCEPT")); err != nil {
|
||||
return fmt.Errorf("could not %s input accept rule: %w", msg, err)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
// Drop incoming VXLAN datagrams for the VNI which were received in cleartext.
|
||||
// Insert at the top of the chain so the packets are dropped even if an
|
||||
// administrator-configured rule exists which would otherwise unconditionally
|
||||
// accept incoming VXLAN traffic.
|
||||
if err := iptable.ProgramRule(iptables.Filter, chain, action(iptables.Insert), rule("none", "DROP")); err != nil {
|
||||
return fmt.Errorf("could not %s input drop rule: %w", msg, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (fSA *netlink.XfrmState, rSA *netlink.XfrmState, err error) {
|
||||
var (
|
||||
|
@ -280,7 +335,7 @@ func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (f
|
|||
Proto: netlink.XFRM_PROTO_ESP,
|
||||
Spi: spi.reverse,
|
||||
Mode: netlink.XFRM_MODE_TRANSPORT,
|
||||
Reqid: r,
|
||||
Reqid: mark,
|
||||
}
|
||||
if add {
|
||||
rSA.Aead = buildAeadAlgo(k, spi.reverse)
|
||||
|
@ -306,7 +361,7 @@ func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (f
|
|||
Proto: netlink.XFRM_PROTO_ESP,
|
||||
Spi: spi.forward,
|
||||
Mode: netlink.XFRM_MODE_TRANSPORT,
|
||||
Reqid: r,
|
||||
Reqid: mark,
|
||||
}
|
||||
if add {
|
||||
fSA.Aead = buildAeadAlgo(k, spi.forward)
|
||||
|
@ -355,7 +410,7 @@ func programSP(fSA *netlink.XfrmState, rSA *netlink.XfrmState, add bool) error {
|
|||
Proto: netlink.XFRM_PROTO_ESP,
|
||||
Mode: netlink.XFRM_MODE_TRANSPORT,
|
||||
Spi: fSA.Spi,
|
||||
Reqid: r,
|
||||
Reqid: mark,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
@ -569,7 +624,7 @@ func updateNodeKey(lIP, aIP, rIP net.IP, idxs []*spi, curKeys []*key, newIdx, pr
|
|||
Proto: netlink.XFRM_PROTO_ESP,
|
||||
Mode: netlink.XFRM_MODE_TRANSPORT,
|
||||
Spi: fSA2.Spi,
|
||||
Reqid: r,
|
||||
Reqid: mark,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
@ -638,7 +693,7 @@ func clearEncryptionStates() {
|
|||
}
|
||||
for _, sa := range saList {
|
||||
sa := sa
|
||||
if sa.Reqid == r {
|
||||
if sa.Reqid == mark {
|
||||
if err := nlh.XfrmStateDel(&sa); err != nil {
|
||||
logrus.Warnf("Failed to delete stale SA %s: %v", sa, err)
|
||||
continue
|
||||
|
|
17
libnetwork/drivers/overlay/encryption_bpf.go
Normal file
17
libnetwork/drivers/overlay/encryption_bpf.go
Normal file
|
@ -0,0 +1,17 @@
|
|||
package overlay
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// matchVXLANWithBPF returns an iptables rule fragment which matches VXLAN
|
||||
// datagrams with the given destination port and VXLAN Network ID utilizing the
|
||||
// xt_bpf netfilter kernel module. The returned slice's backing array is
|
||||
// guaranteed not to alias any other slice's.
|
||||
func matchVXLANWithBPF(port, vni uint32) []string {
|
||||
dport := strconv.FormatUint(uint64(port), 10)
|
||||
vniMatch := marshalXTBPF(vniMatchBPF(vni))
|
||||
|
||||
// https://ipset.netfilter.org/iptables-extensions.man.html#lbAH
|
||||
return []string{"-p", "udp", "--dport", dport, "-m", "bpf", "--bytecode", vniMatch}
|
||||
}
|
30
libnetwork/drivers/overlay/encryption_u32.go
Normal file
30
libnetwork/drivers/overlay/encryption_u32.go
Normal file
|
@ -0,0 +1,30 @@
|
|||
package overlay
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// matchVXLANWithU32 returns an iptables rule fragment which matches VXLAN
|
||||
// datagrams with the given destination port and VXLAN Network ID utilizing the
|
||||
// xt_u32 netfilter kernel module. The returned slice's backing array is
|
||||
// guaranteed not to alias any other slice's.
|
||||
func matchVXLANWithU32(port, vni uint32) []string {
|
||||
dport := strconv.FormatUint(uint64(port), 10)
|
||||
|
||||
// The u32 expression language is documented in iptables-extensions(8).
|
||||
// https://ipset.netfilter.org/iptables-extensions.man.html#lbCK
|
||||
//
|
||||
// 0>>22&0x3C ; Compute number of octets in IPv4 header
|
||||
// @ ; Make this the new offset into the packet
|
||||
// ; (jump to start of UDP header)
|
||||
// 12&0xFFFFFF00 ; Read 32-bit value at offset 12 and mask off the bottom octet
|
||||
// = ; Test whether the value is equal to a constant
|
||||
//
|
||||
// A UDP header is eight octets long so offset 12 from the start of the
|
||||
// UDP header is four octets into the payload: the VNI field of the
|
||||
// VXLAN header.
|
||||
vniMatch := fmt.Sprintf("0>>22&0x3C@12&0xFFFFFF00=%d", int(vni)<<8)
|
||||
|
||||
return []string{"-p", "udp", "--dport", dport, "-m", "u32", "--u32", vniMatch}
|
||||
}
|
|
@ -117,7 +117,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
|
|||
|
||||
d.peerAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), false, false, true)
|
||||
|
||||
if err = d.checkEncryption(nid, nil, n.vxlanID(s), true, true); err != nil {
|
||||
if err = d.checkEncryption(nid, nil, true, true); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ import (
|
|||
"github.com/docker/docker/libnetwork/resolvconf"
|
||||
"github.com/docker/docker/libnetwork/types"
|
||||
"github.com/docker/docker/pkg/reexec"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink"
|
||||
"github.com/vishvananda/netlink/nl"
|
||||
|
@ -666,6 +667,18 @@ func (n *network) initSubnetSandbox(s *subnet, restore bool) error {
|
|||
brName := n.generateBridgeName(s)
|
||||
vxlanName := n.generateVxlanName(s)
|
||||
|
||||
// Program iptables rules for mandatory encryption of the secure
|
||||
// network, or clean up leftover rules for a stale secure network which
|
||||
// was previously assigned the same VNI.
|
||||
if err := programMangle(s.vni, n.secure); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := programInput(s.vni, n.secure); err != nil {
|
||||
if n.secure {
|
||||
return multierror.Append(err, programMangle(s.vni, false))
|
||||
}
|
||||
}
|
||||
|
||||
if restore {
|
||||
if err := n.restoreSubnetSandbox(s, brName, vxlanName); err != nil {
|
||||
return err
|
||||
|
|
|
@ -387,7 +387,7 @@ func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask
|
|||
return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err)
|
||||
}
|
||||
|
||||
if err := d.checkEncryption(nid, vtep, n.vxlanID(s), false, true); err != nil {
|
||||
if err := d.checkEncryption(nid, vtep, false, true); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
|
||||
|
@ -447,7 +447,7 @@ func (d *driver) peerDeleteOp(nid, eid string, peerIP net.IP, peerIPMask net.IPM
|
|||
return nil
|
||||
}
|
||||
|
||||
if err := d.checkEncryption(nid, vtep, 0, localPeer, false); err != nil {
|
||||
if err := d.checkEncryption(nid, vtep, localPeer, false); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue