diff --git a/libnetwork/drivers/overlay/bpf.go b/libnetwork/drivers/overlay/bpf.go new file mode 100644 index 0000000000..cb96fb7ab1 --- /dev/null +++ b/libnetwork/drivers/overlay/bpf.go @@ -0,0 +1,47 @@ +package overlay + +import ( + "fmt" + "strings" + + "golang.org/x/net/bpf" +) + +// vniMatchBPF returns a BPF program suitable for passing to the iptables bpf +// match which matches on the VXAN Network ID of encapsulated packets. The +// program assumes that it will be used in a rule which only matches UDP +// datagrams. +func vniMatchBPF(vni uint32) []bpf.RawInstruction { + asm, err := bpf.Assemble([]bpf.Instruction{ + bpf.LoadMemShift{Off: 0}, // ldx 4*([0] & 0xf) ; Load length of IPv4 header into X + bpf.LoadIndirect{Off: 12, Size: 4}, // ld [x + 12] ; Load VXLAN ID (UDP header + 4 bytes) into A + bpf.ALUOpConstant{Op: bpf.ALUOpAnd, Val: 0xffffff00}, // and #0xffffff00 ; VXLAN ID is in top 24 bits + bpf.JumpIf{Cond: bpf.JumpEqual, Val: vni << 8, SkipTrue: 1}, // jeq ($vni << 8), match + bpf.RetConstant{Val: 0}, // ret #0 + bpf.RetConstant{Val: ^uint32(0)}, // match: ret #-1 + }) + // bpf.Assemble() only errors if an instruction is invalid. As the only variable + // part of the program is an instruction value for which the entire range is + // valid, whether the program can be successfully assembled is independent of + // the input. Given that the only recourse is to fix this function and + // recompile, there's little value in bubbling the error up to the caller. + if err != nil { + panic(err) + } + return asm +} + +// marshalXTBPF marshals a BPF program into the "decimal" byte code format +// which is suitable for passing to the [iptables bpf match]. +// +// iptables -m bpf --bytecode +// +// [iptables bpf match]: https://ipset.netfilter.org/iptables-extensions.man.html#lbAH +func marshalXTBPF(prog []bpf.RawInstruction) string { //nolint:unused + var b strings.Builder + fmt.Fprintf(&b, "%d", len(prog)) + for _, ins := range prog { + fmt.Fprintf(&b, ",%d %d %d %d", ins.Op, ins.Jt, ins.Jf, ins.K) + } + return b.String() +} diff --git a/libnetwork/drivers/overlay/bpf_test.go b/libnetwork/drivers/overlay/bpf_test.go new file mode 100644 index 0000000000..f636d14e7a --- /dev/null +++ b/libnetwork/drivers/overlay/bpf_test.go @@ -0,0 +1,14 @@ +package overlay + +import ( + "testing" +) + +func FuzzVNIMatchBPFDoesNotPanic(f *testing.F) { + for _, seed := range []uint32{0, 1, 42, 0xfffffe, 0xffffff, 0xfffffffe, 0xffffffff} { + f.Add(seed) + } + f.Fuzz(func(t *testing.T, vni uint32) { + _ = vniMatchBPF(vni) + }) +} diff --git a/libnetwork/drivers/overlay/encryption.go b/libnetwork/drivers/overlay/encryption.go index 91800e99a9..81b978cef5 100644 --- a/libnetwork/drivers/overlay/encryption.go +++ b/libnetwork/drivers/overlay/encryption.go @@ -18,6 +18,7 @@ import ( "github.com/docker/docker/libnetwork/iptables" "github.com/docker/docker/libnetwork/ns" "github.com/docker/docker/libnetwork/types" + "github.com/hashicorp/go-multierror" "github.com/sirupsen/logrus" "github.com/vishvananda/netlink" ) @@ -225,7 +226,31 @@ func removeEncryption(localIP, remoteIP net.IP, em *encrMap) error { return nil } -func programMangle(vni uint32, add bool) error { +type matchVXLANFunc func(port, vni uint32) []string + +// programVXLANRuleFunc returns a function which tries calling programWithMatch +// with the u32 match, falling back to the BPF match if installing u32 variant +// of the rules fails. +func programVXLANRuleFunc(programWithMatch func(matchVXLAN matchVXLANFunc, vni uint32, add bool) error) func(vni uint32, add bool) error { + return func(vni uint32, add bool) error { + if add { + if err := programWithMatch(matchVXLANWithU32, vni, add); err != nil { + // That didn't work. Maybe the xt_u32 module isn't available? Try again with xt_bpf. + err2 := programWithMatch(matchVXLANWithBPF, vni, add) + if err2 != nil { + return multierror.Append(err, err2) + } + } + return nil + } else { + // Delete both flavours. + err := programWithMatch(matchVXLANWithU32, vni, add) + return multierror.Append(err, programWithMatch(matchVXLANWithBPF, vni, add)).ErrorOrNil() + } + } +} + +var programMangle = programVXLANRuleFunc(func(matchVXLAN matchVXLANFunc, vni uint32, add bool) error { var ( m = strconv.FormatUint(mark, 10) chain = "OUTPUT" @@ -247,9 +272,9 @@ func programMangle(vni uint32, add bool) error { } return nil -} +}) -func programInput(vni uint32, add bool) error { +var programInput = programVXLANRuleFunc(func(matchVXLAN matchVXLANFunc, vni uint32, add bool) error { var ( plainVxlan = matchVXLAN(overlayutils.VXLANUDPPort(), vni) ipsecVxlan = append([]string{"-m", "policy", "--dir", "in", "--pol", "ipsec"}, plainVxlan...) @@ -279,7 +304,7 @@ func programInput(vni uint32, add bool) error { } return nil -} +}) func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (fSA *netlink.XfrmState, rSA *netlink.XfrmState, err error) { var ( diff --git a/libnetwork/drivers/overlay/encryption_bpf.go b/libnetwork/drivers/overlay/encryption_bpf.go new file mode 100644 index 0000000000..de57c21744 --- /dev/null +++ b/libnetwork/drivers/overlay/encryption_bpf.go @@ -0,0 +1,17 @@ +package overlay + +import ( + "strconv" +) + +// matchVXLANWithBPF returns an iptables rule fragment which matches VXLAN +// datagrams with the given destination port and VXLAN Network ID utilizing the +// xt_bpf netfilter kernel module. The returned slice's backing array is +// guaranteed not to alias any other slice's. +func matchVXLANWithBPF(port, vni uint32) []string { + dport := strconv.FormatUint(uint64(port), 10) + vniMatch := marshalXTBPF(vniMatchBPF(vni)) + + // https://ipset.netfilter.org/iptables-extensions.man.html#lbAH + return []string{"-p", "udp", "--dport", dport, "-m", "bpf", "--bytecode", vniMatch} +} diff --git a/libnetwork/drivers/overlay/encryption_u32.go b/libnetwork/drivers/overlay/encryption_u32.go index c93f7c96fc..94a74031ac 100644 --- a/libnetwork/drivers/overlay/encryption_u32.go +++ b/libnetwork/drivers/overlay/encryption_u32.go @@ -5,11 +5,11 @@ import ( "strconv" ) -// matchVXLAN returns an iptables rule fragment which matches VXLAN datagrams -// with the given destination port and VXLAN Network ID utilizing the xt_u32 -// netfilter kernel module. The returned slice's backing array is guaranteed not -// to alias any other slice's. -func matchVXLAN(port, vni uint32) []string { +// matchVXLANWithU32 returns an iptables rule fragment which matches VXLAN +// datagrams with the given destination port and VXLAN Network ID utilizing the +// xt_u32 netfilter kernel module. The returned slice's backing array is +// guaranteed not to alias any other slice's. +func matchVXLANWithU32(port, vni uint32) []string { dport := strconv.FormatUint(uint64(port), 10) // The u32 expression language is documented in iptables-extensions(8).