libnet/d/overlay: add BPF-powered VNI matcher
Some newer distros such as RHEL 9 have stopped making the xt_u32 kernel
module available with the kernels they ship. They do ship the xt_bpf
kernel module, which can do everything xt_u32 can and more. Add an
alternative implementation of the iptables match rule which uses xt_bpf
to implement exactly the same logic as the u32 filter using a BPF
program. Try programming the BPF-powered rules as a fallback when
programming the u32-powered rules fails.
Signed-off-by: Cory Snider <csnider@mirantis.com>
(cherry picked from commit 105b9834fb
)
Signed-off-by: Cory Snider <csnider@mirantis.com>
This commit is contained in:
parent
5c5fac2374
commit
98cbcb8003
5 changed files with 112 additions and 9 deletions
47
libnetwork/drivers/overlay/bpf.go
Normal file
47
libnetwork/drivers/overlay/bpf.go
Normal file
|
@ -0,0 +1,47 @@
|
|||
package overlay
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/bpf"
|
||||
)
|
||||
|
||||
// vniMatchBPF returns a BPF program suitable for passing to the iptables bpf
|
||||
// match which matches on the VXAN Network ID of encapsulated packets. The
|
||||
// program assumes that it will be used in a rule which only matches UDP
|
||||
// datagrams.
|
||||
func vniMatchBPF(vni uint32) []bpf.RawInstruction {
|
||||
asm, err := bpf.Assemble([]bpf.Instruction{
|
||||
bpf.LoadMemShift{Off: 0}, // ldx 4*([0] & 0xf) ; Load length of IPv4 header into X
|
||||
bpf.LoadIndirect{Off: 12, Size: 4}, // ld [x + 12] ; Load VXLAN ID (UDP header + 4 bytes) into A
|
||||
bpf.ALUOpConstant{Op: bpf.ALUOpAnd, Val: 0xffffff00}, // and #0xffffff00 ; VXLAN ID is in top 24 bits
|
||||
bpf.JumpIf{Cond: bpf.JumpEqual, Val: vni << 8, SkipTrue: 1}, // jeq ($vni << 8), match
|
||||
bpf.RetConstant{Val: 0}, // ret #0
|
||||
bpf.RetConstant{Val: ^uint32(0)}, // match: ret #-1
|
||||
})
|
||||
// bpf.Assemble() only errors if an instruction is invalid. As the only variable
|
||||
// part of the program is an instruction value for which the entire range is
|
||||
// valid, whether the program can be successfully assembled is independent of
|
||||
// the input. Given that the only recourse is to fix this function and
|
||||
// recompile, there's little value in bubbling the error up to the caller.
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return asm
|
||||
}
|
||||
|
||||
// marshalXTBPF marshals a BPF program into the "decimal" byte code format
|
||||
// which is suitable for passing to the [iptables bpf match].
|
||||
//
|
||||
// iptables -m bpf --bytecode
|
||||
//
|
||||
// [iptables bpf match]: https://ipset.netfilter.org/iptables-extensions.man.html#lbAH
|
||||
func marshalXTBPF(prog []bpf.RawInstruction) string { //nolint:unused
|
||||
var b strings.Builder
|
||||
fmt.Fprintf(&b, "%d", len(prog))
|
||||
for _, ins := range prog {
|
||||
fmt.Fprintf(&b, ",%d %d %d %d", ins.Op, ins.Jt, ins.Jf, ins.K)
|
||||
}
|
||||
return b.String()
|
||||
}
|
14
libnetwork/drivers/overlay/bpf_test.go
Normal file
14
libnetwork/drivers/overlay/bpf_test.go
Normal file
|
@ -0,0 +1,14 @@
|
|||
package overlay
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func FuzzVNIMatchBPFDoesNotPanic(f *testing.F) {
|
||||
for _, seed := range []uint32{0, 1, 42, 0xfffffe, 0xffffff, 0xfffffffe, 0xffffffff} {
|
||||
f.Add(seed)
|
||||
}
|
||||
f.Fuzz(func(t *testing.T, vni uint32) {
|
||||
_ = vniMatchBPF(vni)
|
||||
})
|
||||
}
|
|
@ -18,6 +18,7 @@ import (
|
|||
"github.com/docker/docker/libnetwork/iptables"
|
||||
"github.com/docker/docker/libnetwork/ns"
|
||||
"github.com/docker/docker/libnetwork/types"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink"
|
||||
)
|
||||
|
@ -225,7 +226,31 @@ func removeEncryption(localIP, remoteIP net.IP, em *encrMap) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func programMangle(vni uint32, add bool) error {
|
||||
type matchVXLANFunc func(port, vni uint32) []string
|
||||
|
||||
// programVXLANRuleFunc returns a function which tries calling programWithMatch
|
||||
// with the u32 match, falling back to the BPF match if installing u32 variant
|
||||
// of the rules fails.
|
||||
func programVXLANRuleFunc(programWithMatch func(matchVXLAN matchVXLANFunc, vni uint32, add bool) error) func(vni uint32, add bool) error {
|
||||
return func(vni uint32, add bool) error {
|
||||
if add {
|
||||
if err := programWithMatch(matchVXLANWithU32, vni, add); err != nil {
|
||||
// That didn't work. Maybe the xt_u32 module isn't available? Try again with xt_bpf.
|
||||
err2 := programWithMatch(matchVXLANWithBPF, vni, add)
|
||||
if err2 != nil {
|
||||
return multierror.Append(err, err2)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
} else {
|
||||
// Delete both flavours.
|
||||
err := programWithMatch(matchVXLANWithU32, vni, add)
|
||||
return multierror.Append(err, programWithMatch(matchVXLANWithBPF, vni, add)).ErrorOrNil()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var programMangle = programVXLANRuleFunc(func(matchVXLAN matchVXLANFunc, vni uint32, add bool) error {
|
||||
var (
|
||||
m = strconv.FormatUint(mark, 10)
|
||||
chain = "OUTPUT"
|
||||
|
@ -247,9 +272,9 @@ func programMangle(vni uint32, add bool) error {
|
|||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
})
|
||||
|
||||
func programInput(vni uint32, add bool) error {
|
||||
var programInput = programVXLANRuleFunc(func(matchVXLAN matchVXLANFunc, vni uint32, add bool) error {
|
||||
var (
|
||||
plainVxlan = matchVXLAN(overlayutils.VXLANUDPPort(), vni)
|
||||
ipsecVxlan = append([]string{"-m", "policy", "--dir", "in", "--pol", "ipsec"}, plainVxlan...)
|
||||
|
@ -279,7 +304,7 @@ func programInput(vni uint32, add bool) error {
|
|||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
})
|
||||
|
||||
func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (fSA *netlink.XfrmState, rSA *netlink.XfrmState, err error) {
|
||||
var (
|
||||
|
|
17
libnetwork/drivers/overlay/encryption_bpf.go
Normal file
17
libnetwork/drivers/overlay/encryption_bpf.go
Normal file
|
@ -0,0 +1,17 @@
|
|||
package overlay
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// matchVXLANWithBPF returns an iptables rule fragment which matches VXLAN
|
||||
// datagrams with the given destination port and VXLAN Network ID utilizing the
|
||||
// xt_bpf netfilter kernel module. The returned slice's backing array is
|
||||
// guaranteed not to alias any other slice's.
|
||||
func matchVXLANWithBPF(port, vni uint32) []string {
|
||||
dport := strconv.FormatUint(uint64(port), 10)
|
||||
vniMatch := marshalXTBPF(vniMatchBPF(vni))
|
||||
|
||||
// https://ipset.netfilter.org/iptables-extensions.man.html#lbAH
|
||||
return []string{"-p", "udp", "--dport", dport, "-m", "bpf", "--bytecode", vniMatch}
|
||||
}
|
|
@ -5,11 +5,11 @@ import (
|
|||
"strconv"
|
||||
)
|
||||
|
||||
// matchVXLAN returns an iptables rule fragment which matches VXLAN datagrams
|
||||
// with the given destination port and VXLAN Network ID utilizing the xt_u32
|
||||
// netfilter kernel module. The returned slice's backing array is guaranteed not
|
||||
// to alias any other slice's.
|
||||
func matchVXLAN(port, vni uint32) []string {
|
||||
// matchVXLANWithU32 returns an iptables rule fragment which matches VXLAN
|
||||
// datagrams with the given destination port and VXLAN Network ID utilizing the
|
||||
// xt_u32 netfilter kernel module. The returned slice's backing array is
|
||||
// guaranteed not to alias any other slice's.
|
||||
func matchVXLANWithU32(port, vni uint32) []string {
|
||||
dport := strconv.FormatUint(uint64(port), 10)
|
||||
|
||||
// The u32 expression language is documented in iptables-extensions(8).
|
||||
|
|
Loading…
Reference in a new issue