libnet/d/overlay: add BPF-powered VNI matcher

Some newer distros such as RHEL 9 have stopped making the xt_u32 kernel
module available with the kernels they ship. They do ship the xt_bpf
kernel module, which can do everything xt_u32 can and more. Add an
alternative implementation of the iptables match rule which uses xt_bpf
to implement exactly the same logic as the u32 filter using a BPF
program. Try programming the BPF-powered rules as a fallback when
programming the u32-powered rules fails.

Signed-off-by: Cory Snider <csnider@mirantis.com>
(cherry picked from commit 105b9834fb)
Signed-off-by: Cory Snider <csnider@mirantis.com>
This commit is contained in:
Cory Snider 2023-03-10 15:29:27 -05:00
parent 5c5fac2374
commit 98cbcb8003
5 changed files with 112 additions and 9 deletions

View file

@ -0,0 +1,47 @@
package overlay
import (
"fmt"
"strings"
"golang.org/x/net/bpf"
)
// vniMatchBPF returns a BPF program suitable for passing to the iptables bpf
// match which matches on the VXAN Network ID of encapsulated packets. The
// program assumes that it will be used in a rule which only matches UDP
// datagrams.
func vniMatchBPF(vni uint32) []bpf.RawInstruction {
asm, err := bpf.Assemble([]bpf.Instruction{
bpf.LoadMemShift{Off: 0}, // ldx 4*([0] & 0xf) ; Load length of IPv4 header into X
bpf.LoadIndirect{Off: 12, Size: 4}, // ld [x + 12] ; Load VXLAN ID (UDP header + 4 bytes) into A
bpf.ALUOpConstant{Op: bpf.ALUOpAnd, Val: 0xffffff00}, // and #0xffffff00 ; VXLAN ID is in top 24 bits
bpf.JumpIf{Cond: bpf.JumpEqual, Val: vni << 8, SkipTrue: 1}, // jeq ($vni << 8), match
bpf.RetConstant{Val: 0}, // ret #0
bpf.RetConstant{Val: ^uint32(0)}, // match: ret #-1
})
// bpf.Assemble() only errors if an instruction is invalid. As the only variable
// part of the program is an instruction value for which the entire range is
// valid, whether the program can be successfully assembled is independent of
// the input. Given that the only recourse is to fix this function and
// recompile, there's little value in bubbling the error up to the caller.
if err != nil {
panic(err)
}
return asm
}
// marshalXTBPF marshals a BPF program into the "decimal" byte code format
// which is suitable for passing to the [iptables bpf match].
//
// iptables -m bpf --bytecode
//
// [iptables bpf match]: https://ipset.netfilter.org/iptables-extensions.man.html#lbAH
func marshalXTBPF(prog []bpf.RawInstruction) string { //nolint:unused
var b strings.Builder
fmt.Fprintf(&b, "%d", len(prog))
for _, ins := range prog {
fmt.Fprintf(&b, ",%d %d %d %d", ins.Op, ins.Jt, ins.Jf, ins.K)
}
return b.String()
}

View file

@ -0,0 +1,14 @@
package overlay
import (
"testing"
)
func FuzzVNIMatchBPFDoesNotPanic(f *testing.F) {
for _, seed := range []uint32{0, 1, 42, 0xfffffe, 0xffffff, 0xfffffffe, 0xffffffff} {
f.Add(seed)
}
f.Fuzz(func(t *testing.T, vni uint32) {
_ = vniMatchBPF(vni)
})
}

View file

@ -18,6 +18,7 @@ import (
"github.com/docker/docker/libnetwork/iptables"
"github.com/docker/docker/libnetwork/ns"
"github.com/docker/docker/libnetwork/types"
"github.com/hashicorp/go-multierror"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
)
@ -225,7 +226,31 @@ func removeEncryption(localIP, remoteIP net.IP, em *encrMap) error {
return nil
}
func programMangle(vni uint32, add bool) error {
type matchVXLANFunc func(port, vni uint32) []string
// programVXLANRuleFunc returns a function which tries calling programWithMatch
// with the u32 match, falling back to the BPF match if installing u32 variant
// of the rules fails.
func programVXLANRuleFunc(programWithMatch func(matchVXLAN matchVXLANFunc, vni uint32, add bool) error) func(vni uint32, add bool) error {
return func(vni uint32, add bool) error {
if add {
if err := programWithMatch(matchVXLANWithU32, vni, add); err != nil {
// That didn't work. Maybe the xt_u32 module isn't available? Try again with xt_bpf.
err2 := programWithMatch(matchVXLANWithBPF, vni, add)
if err2 != nil {
return multierror.Append(err, err2)
}
}
return nil
} else {
// Delete both flavours.
err := programWithMatch(matchVXLANWithU32, vni, add)
return multierror.Append(err, programWithMatch(matchVXLANWithBPF, vni, add)).ErrorOrNil()
}
}
}
var programMangle = programVXLANRuleFunc(func(matchVXLAN matchVXLANFunc, vni uint32, add bool) error {
var (
m = strconv.FormatUint(mark, 10)
chain = "OUTPUT"
@ -247,9 +272,9 @@ func programMangle(vni uint32, add bool) error {
}
return nil
}
})
func programInput(vni uint32, add bool) error {
var programInput = programVXLANRuleFunc(func(matchVXLAN matchVXLANFunc, vni uint32, add bool) error {
var (
plainVxlan = matchVXLAN(overlayutils.VXLANUDPPort(), vni)
ipsecVxlan = append([]string{"-m", "policy", "--dir", "in", "--pol", "ipsec"}, plainVxlan...)
@ -279,7 +304,7 @@ func programInput(vni uint32, add bool) error {
}
return nil
}
})
func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (fSA *netlink.XfrmState, rSA *netlink.XfrmState, err error) {
var (

View file

@ -0,0 +1,17 @@
package overlay
import (
"strconv"
)
// matchVXLANWithBPF returns an iptables rule fragment which matches VXLAN
// datagrams with the given destination port and VXLAN Network ID utilizing the
// xt_bpf netfilter kernel module. The returned slice's backing array is
// guaranteed not to alias any other slice's.
func matchVXLANWithBPF(port, vni uint32) []string {
dport := strconv.FormatUint(uint64(port), 10)
vniMatch := marshalXTBPF(vniMatchBPF(vni))
// https://ipset.netfilter.org/iptables-extensions.man.html#lbAH
return []string{"-p", "udp", "--dport", dport, "-m", "bpf", "--bytecode", vniMatch}
}

View file

@ -5,11 +5,11 @@ import (
"strconv"
)
// matchVXLAN returns an iptables rule fragment which matches VXLAN datagrams
// with the given destination port and VXLAN Network ID utilizing the xt_u32
// netfilter kernel module. The returned slice's backing array is guaranteed not
// to alias any other slice's.
func matchVXLAN(port, vni uint32) []string {
// matchVXLANWithU32 returns an iptables rule fragment which matches VXLAN
// datagrams with the given destination port and VXLAN Network ID utilizing the
// xt_u32 netfilter kernel module. The returned slice's backing array is
// guaranteed not to alias any other slice's.
func matchVXLANWithU32(port, vni uint32) []string {
dport := strconv.FormatUint(uint64(port), 10)
// The u32 expression language is documented in iptables-extensions(8).