bpf_linux_test.go 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. package overlay
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "errors"
  6. "fmt"
  7. "net"
  8. "net/netip"
  9. "testing"
  10. "time"
  11. "golang.org/x/net/bpf"
  12. "golang.org/x/net/ipv4"
  13. "golang.org/x/sys/unix"
  14. )
  15. func TestVNIMatchBPF(t *testing.T) {
  16. // The BPF filter program under test uses Linux extensions which are not
  17. // emulated by any user-space BPF interpreters. It is also classic BPF,
  18. // which cannot be tested in-kernel using the bpf(BPF_PROG_RUN) syscall.
  19. // The best we can do without actually programming it into an iptables
  20. // rule and end-to-end testing it is to attach it as a socket filter to
  21. // a raw socket and test which loopback packets make it through.
  22. //
  23. // Modern kernels transpile cBPF programs into eBPF for execution, so a
  24. // possible future direction would be to extract the transpiler and
  25. // convert the program under test to eBPF so it could be loaded and run
  26. // using the bpf(2) syscall.
  27. // https://elixir.bootlin.com/linux/v6.2/source/net/core/filter.c#L559
  28. // Though the effort would be better spent on adding nftables support to
  29. // libnetwork so this whole BPF program could be replaced with a native
  30. // nftables '@th' match expression.
  31. //
  32. // The filter could be manually e2e-tested for both IPv4 and IPv6 by
  33. // programming ip[6]tables rules which log matching packets and sending
  34. // test packets loopback using netcat. All the necessary information
  35. // (bytecode and an acceptable test vector) is logged by this test.
  36. //
  37. // $ sudo ip6tables -A INPUT -p udp -s ::1 -d ::1 -m bpf \
  38. // --bytecode "${bpf_program_under_test}" \
  39. // -j LOG --log-prefix '[IPv6 VNI match]:'
  40. // $ <<<"${udp_payload_hexdump}" xxd -r -p | nc -u -6 localhost 30000
  41. // $ sudo dmesg
  42. loopback := net.IPv4(127, 0, 0, 1)
  43. // Reserve an ephemeral UDP port for loopback testing.
  44. // Binding to a TUN device would be more hermetic, but is much more effort to set up.
  45. reservation, err := net.ListenUDP("udp", &net.UDPAddr{IP: loopback, Port: 0})
  46. if err != nil {
  47. t.Fatal(err)
  48. }
  49. defer reservation.Close()
  50. daddr := reservation.LocalAddr().(*net.UDPAddr).AddrPort()
  51. sender, err := net.DialUDP("udp", nil, reservation.LocalAddr().(*net.UDPAddr))
  52. if err != nil {
  53. t.Fatal(err)
  54. }
  55. defer sender.Close()
  56. saddr := sender.LocalAddr().(*net.UDPAddr).AddrPort()
  57. // There doesn't seem to be a way to receive the entire Layer-3 IPv6
  58. // packet including the fixed IP header using the portable raw sockets
  59. // API. That can only be done from an AF_PACKET socket, and it is
  60. // unclear whether 'ld poff' would behave the same in a BPF program
  61. // attached to such a socket as in an xt_bpf match.
  62. c, err := net.ListenIP("ip4:udp", &net.IPAddr{IP: loopback})
  63. if err != nil {
  64. if errors.Is(err, unix.EPERM) {
  65. t.Skip("test requires CAP_NET_RAW")
  66. }
  67. t.Fatal(err)
  68. }
  69. defer c.Close()
  70. pc := ipv4.NewPacketConn(c)
  71. testvectors := []uint32{
  72. 0,
  73. 1,
  74. 0x08,
  75. 42,
  76. 0x80,
  77. 0xfe,
  78. 0xff,
  79. 0x100,
  80. 0xfff, // 4095
  81. 0x1000, // 4096
  82. 0x1001,
  83. 0x10000,
  84. 0xfffffe,
  85. 0xffffff, // Max VNI
  86. }
  87. for _, vni := range []uint32{1, 42, 0x100, 0x1000, 0xfffffe, 0xffffff} {
  88. t.Run(fmt.Sprintf("vni=%d", vni), func(t *testing.T) {
  89. setBPF(t, pc, vniMatchBPF(vni))
  90. for _, v := range testvectors {
  91. pkt := appendVXLANHeader(nil, v)
  92. pkt = append(pkt, []byte{0xde, 0xad, 0xbe, 0xef}...)
  93. if _, err := sender.Write(pkt); err != nil {
  94. t.Fatal(err)
  95. }
  96. rpkt, ok := readUDPPacketFromRawSocket(t, pc, saddr, daddr)
  97. // Sanity check: the only packets readUDPPacketFromRawSocket
  98. // should return are ones we sent.
  99. if ok && !bytes.Equal(pkt, rpkt) {
  100. t.Fatalf("received unexpected packet: % x", rpkt)
  101. }
  102. if ok != (v == vni) {
  103. t.Errorf("unexpected packet tagged with vni=%d (got %v, want %v)", v, ok, v == vni)
  104. }
  105. }
  106. })
  107. }
  108. }
  109. func appendVXLANHeader(b []byte, vni uint32) []byte {
  110. // https://tools.ietf.org/html/rfc7348#section-5
  111. b = append(b, []byte{0x08, 0x00, 0x00, 0x00}...)
  112. return binary.BigEndian.AppendUint32(b, vni<<8)
  113. }
  114. func setBPF(t *testing.T, c *ipv4.PacketConn, fprog []bpf.RawInstruction) {
  115. // https://natanyellin.com/posts/ebpf-filtering-done-right/
  116. blockall, _ := bpf.Assemble([]bpf.Instruction{bpf.RetConstant{Val: 0}})
  117. if err := c.SetBPF(blockall); err != nil {
  118. t.Fatal(err)
  119. }
  120. ms := make([]ipv4.Message, 100)
  121. for {
  122. n, err := c.ReadBatch(ms, unix.MSG_DONTWAIT)
  123. if err != nil {
  124. if errors.Is(err, unix.EAGAIN) {
  125. break
  126. }
  127. t.Fatal(err)
  128. }
  129. if n == 0 {
  130. break
  131. }
  132. }
  133. t.Logf("setting socket filter: %v", marshalXTBPF(fprog))
  134. if err := c.SetBPF(fprog); err != nil {
  135. t.Fatal(err)
  136. }
  137. }
  138. // readUDPPacketFromRawSocket reads raw IP packets from pc until a UDP packet
  139. // which matches the (src, dst) 4-tuple is found or the receive buffer is empty,
  140. // and returns the payload of the UDP packet.
  141. func readUDPPacketFromRawSocket(t *testing.T, pc *ipv4.PacketConn, src, dst netip.AddrPort) ([]byte, bool) {
  142. t.Helper()
  143. ms := []ipv4.Message{
  144. {Buffers: [][]byte{make([]byte, 1500)}},
  145. }
  146. // Set a time limit to prevent an infinite loop if there is a lot of
  147. // loopback traffic being captured which prevents the buffer from
  148. // emptying.
  149. deadline := time.Now().Add(1 * time.Second)
  150. for time.Now().Before(deadline) {
  151. n, err := pc.ReadBatch(ms, unix.MSG_DONTWAIT)
  152. if err != nil {
  153. if !errors.Is(err, unix.EAGAIN) {
  154. t.Fatal(err)
  155. }
  156. break
  157. }
  158. if n == 0 {
  159. break
  160. }
  161. pkt := ms[0].Buffers[0][:ms[0].N]
  162. psrc, pdst, payload, ok := parseUDP(pkt)
  163. // Discard captured packets which belong to other unrelated flows.
  164. if !ok || psrc != src || pdst != dst {
  165. t.Logf("discarding packet:\n% x", pkt)
  166. continue
  167. }
  168. t.Logf("received packet (%v -> %v):\n% x", psrc, pdst, payload)
  169. // While not strictly required, copy payload into a new
  170. // slice which does not share a backing array with pkt
  171. // so the IP and UDP headers can be garbage collected.
  172. return append([]byte(nil), payload...), true
  173. }
  174. return nil, false
  175. }
  176. func parseIPv4(b []byte) (src, dst netip.Addr, protocol byte, payload []byte, ok bool) {
  177. if len(b) < 20 {
  178. return netip.Addr{}, netip.Addr{}, 0, nil, false
  179. }
  180. hlen := int(b[0]&0x0f) * 4
  181. if hlen < 20 {
  182. return netip.Addr{}, netip.Addr{}, 0, nil, false
  183. }
  184. src, _ = netip.AddrFromSlice(b[12:16])
  185. dst, _ = netip.AddrFromSlice(b[16:20])
  186. protocol = b[9]
  187. payload = b[hlen:]
  188. return src, dst, protocol, payload, true
  189. }
  190. // parseUDP parses the IP and UDP headers from the raw Layer-3 packet data in b.
  191. func parseUDP(b []byte) (src, dst netip.AddrPort, payload []byte, ok bool) {
  192. srcip, dstip, protocol, ippayload, ok := parseIPv4(b)
  193. if !ok {
  194. return netip.AddrPort{}, netip.AddrPort{}, nil, false
  195. }
  196. if protocol != 17 {
  197. return netip.AddrPort{}, netip.AddrPort{}, nil, false
  198. }
  199. if len(ippayload) < 8 {
  200. return netip.AddrPort{}, netip.AddrPort{}, nil, false
  201. }
  202. sport := binary.BigEndian.Uint16(ippayload[0:2])
  203. dport := binary.BigEndian.Uint16(ippayload[2:4])
  204. src = netip.AddrPortFrom(srcip, sport)
  205. dst = netip.AddrPortFrom(dstip, dport)
  206. payload = ippayload[8:]
  207. return src, dst, payload, true
  208. }