Преглед изворни кода

libnet/d/overlay: add BPF-powered VNI matcher

Some newer distros such as RHEL 9 have stopped making the xt_u32 kernel
module available with the kernels they ship. They do ship the xt_bpf
kernel module, which can do everything xt_u32 can and more. Add an
alternative implementation of the iptables match rule which uses xt_bpf
to implement exactly the same logic as the u32 filter using a BPF
program. Try programming the BPF-powered rules as a fallback when
programming the u32-powered rules fails.

Signed-off-by: Cory Snider <csnider@mirantis.com>
(cherry picked from commit 105b9834fbd24e687a5b5da14af65bb7cb6f016a)
Signed-off-by: Cory Snider <csnider@mirantis.com>
Cory Snider пре 2 година
родитељ
комит
98cbcb8003

+ 47 - 0
libnetwork/drivers/overlay/bpf.go

@@ -0,0 +1,47 @@
+package overlay
+
+import (
+	"fmt"
+	"strings"
+
+	"golang.org/x/net/bpf"
+)
+
+// vniMatchBPF returns a BPF program suitable for passing to the iptables bpf
+// match which matches on the VXAN Network ID of encapsulated packets. The
+// program assumes that it will be used in a rule which only matches UDP
+// datagrams.
+func vniMatchBPF(vni uint32) []bpf.RawInstruction {
+	asm, err := bpf.Assemble([]bpf.Instruction{
+		bpf.LoadMemShift{Off: 0},                                    // ldx 4*([0] & 0xf) ; Load length of IPv4 header into X
+		bpf.LoadIndirect{Off: 12, Size: 4},                          // ld [x + 12]       ; Load VXLAN ID (UDP header + 4 bytes) into A
+		bpf.ALUOpConstant{Op: bpf.ALUOpAnd, Val: 0xffffff00},        // and #0xffffff00   ; VXLAN ID is in top 24 bits
+		bpf.JumpIf{Cond: bpf.JumpEqual, Val: vni << 8, SkipTrue: 1}, // jeq ($vni << 8), match
+		bpf.RetConstant{Val: 0},                                     // ret #0
+		bpf.RetConstant{Val: ^uint32(0)},                            // match: ret #-1
+	})
+	// bpf.Assemble() only errors if an instruction is invalid. As the only variable
+	// part of the program is an instruction value for which the entire range is
+	// valid, whether the program can be successfully assembled is independent of
+	// the input. Given that the only recourse is to fix this function and
+	// recompile, there's little value in bubbling the error up to the caller.
+	if err != nil {
+		panic(err)
+	}
+	return asm
+}
+
+// marshalXTBPF marshals a BPF program into the "decimal" byte code format
+// which is suitable for passing to the [iptables bpf match].
+//
+//	iptables -m bpf --bytecode
+//
+// [iptables bpf match]: https://ipset.netfilter.org/iptables-extensions.man.html#lbAH
+func marshalXTBPF(prog []bpf.RawInstruction) string { //nolint:unused
+	var b strings.Builder
+	fmt.Fprintf(&b, "%d", len(prog))
+	for _, ins := range prog {
+		fmt.Fprintf(&b, ",%d %d %d %d", ins.Op, ins.Jt, ins.Jf, ins.K)
+	}
+	return b.String()
+}

+ 14 - 0
libnetwork/drivers/overlay/bpf_test.go

@@ -0,0 +1,14 @@
+package overlay
+
+import (
+	"testing"
+)
+
+func FuzzVNIMatchBPFDoesNotPanic(f *testing.F) {
+	for _, seed := range []uint32{0, 1, 42, 0xfffffe, 0xffffff, 0xfffffffe, 0xffffffff} {
+		f.Add(seed)
+	}
+	f.Fuzz(func(t *testing.T, vni uint32) {
+		_ = vniMatchBPF(vni)
+	})
+}

+ 29 - 4
libnetwork/drivers/overlay/encryption.go

@@ -18,6 +18,7 @@ import (
 	"github.com/docker/docker/libnetwork/iptables"
 	"github.com/docker/docker/libnetwork/ns"
 	"github.com/docker/docker/libnetwork/types"
+	"github.com/hashicorp/go-multierror"
 	"github.com/sirupsen/logrus"
 	"github.com/vishvananda/netlink"
 )
@@ -225,7 +226,31 @@ func removeEncryption(localIP, remoteIP net.IP, em *encrMap) error {
 	return nil
 }
 
-func programMangle(vni uint32, add bool) error {
+type matchVXLANFunc func(port, vni uint32) []string
+
+// programVXLANRuleFunc returns a function which tries calling programWithMatch
+// with the u32 match, falling back to the BPF match if installing u32 variant
+// of the rules fails.
+func programVXLANRuleFunc(programWithMatch func(matchVXLAN matchVXLANFunc, vni uint32, add bool) error) func(vni uint32, add bool) error {
+	return func(vni uint32, add bool) error {
+		if add {
+			if err := programWithMatch(matchVXLANWithU32, vni, add); err != nil {
+				// That didn't work. Maybe the xt_u32 module isn't available? Try again with xt_bpf.
+				err2 := programWithMatch(matchVXLANWithBPF, vni, add)
+				if err2 != nil {
+					return multierror.Append(err, err2)
+				}
+			}
+			return nil
+		} else {
+			// Delete both flavours.
+			err := programWithMatch(matchVXLANWithU32, vni, add)
+			return multierror.Append(err, programWithMatch(matchVXLANWithBPF, vni, add)).ErrorOrNil()
+		}
+	}
+}
+
+var programMangle = programVXLANRuleFunc(func(matchVXLAN matchVXLANFunc, vni uint32, add bool) error {
 	var (
 		m      = strconv.FormatUint(mark, 10)
 		chain  = "OUTPUT"
@@ -247,9 +272,9 @@ func programMangle(vni uint32, add bool) error {
 	}
 
 	return nil
-}
+})
 
-func programInput(vni uint32, add bool) error {
+var programInput = programVXLANRuleFunc(func(matchVXLAN matchVXLANFunc, vni uint32, add bool) error {
 	var (
 		plainVxlan = matchVXLAN(overlayutils.VXLANUDPPort(), vni)
 		ipsecVxlan = append([]string{"-m", "policy", "--dir", "in", "--pol", "ipsec"}, plainVxlan...)
@@ -279,7 +304,7 @@ func programInput(vni uint32, add bool) error {
 	}
 
 	return nil
-}
+})
 
 func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (fSA *netlink.XfrmState, rSA *netlink.XfrmState, err error) {
 	var (

+ 17 - 0
libnetwork/drivers/overlay/encryption_bpf.go

@@ -0,0 +1,17 @@
+package overlay
+
+import (
+	"strconv"
+)
+
+// matchVXLANWithBPF returns an iptables rule fragment which matches VXLAN
+// datagrams with the given destination port and VXLAN Network ID utilizing the
+// xt_bpf netfilter kernel module. The returned slice's backing array is
+// guaranteed not to alias any other slice's.
+func matchVXLANWithBPF(port, vni uint32) []string {
+	dport := strconv.FormatUint(uint64(port), 10)
+	vniMatch := marshalXTBPF(vniMatchBPF(vni))
+
+	// https://ipset.netfilter.org/iptables-extensions.man.html#lbAH
+	return []string{"-p", "udp", "--dport", dport, "-m", "bpf", "--bytecode", vniMatch}
+}

+ 5 - 5
libnetwork/drivers/overlay/encryption_u32.go

@@ -5,11 +5,11 @@ import (
 	"strconv"
 )
 
-// matchVXLAN returns an iptables rule fragment which matches VXLAN datagrams
-// with the given destination port and VXLAN Network ID utilizing the xt_u32
-// netfilter kernel module. The returned slice's backing array is guaranteed not
-// to alias any other slice's.
-func matchVXLAN(port, vni uint32) []string {
+// matchVXLANWithU32 returns an iptables rule fragment which matches VXLAN
+// datagrams with the given destination port and VXLAN Network ID utilizing the
+// xt_u32 netfilter kernel module. The returned slice's backing array is
+// guaranteed not to alias any other slice's.
+func matchVXLANWithU32(port, vni uint32) []string {
 	dport := strconv.FormatUint(uint64(port), 10)
 
 	// The u32 expression language is documented in iptables-extensions(8).