Explorar o código

cgroup2: implement `docker info`

ref: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
Akihiro Suda %!s(int64=5) %!d(string=hai) anos
pai
achega
f350b53241
Modificáronse 66 ficheiros con 7376 adicións e 89 borrados
  1. 7 0
      api/swagger.yaml
  2. 1 0
      api/types/types.go
  3. 6 1
      cmd/dockerd/daemon.go
  4. 3 3
      daemon/daemon.go
  5. 15 2
      daemon/daemon_unix.go
  6. 10 1
      daemon/daemon_unsupported.go
  7. 5 0
      daemon/daemon_windows.go
  8. 1 2
      daemon/info.go
  9. 43 26
      daemon/info_unix.go
  10. 1 0
      docs/api/version-history.md
  11. 151 0
      pkg/sysinfo/cgroup2_linux.go
  12. 3 0
      pkg/sysinfo/sysinfo.go
  13. 28 46
      pkg/sysinfo/sysinfo_linux.go
  14. 6 1
      pkg/sysinfo/sysinfo_unix.go
  15. 6 1
      pkg/sysinfo/sysinfo_windows.go
  16. 13 4
      runconfig/config.go
  17. 3 2
      runconfig/config_test.go
  18. 1 0
      vendor.conf
  19. 23 0
      vendor/github.com/cilium/ebpf/LICENSE
  20. 203 0
      vendor/github.com/cilium/ebpf/abi.go
  21. 149 0
      vendor/github.com/cilium/ebpf/asm/alu.go
  22. 107 0
      vendor/github.com/cilium/ebpf/asm/alu_string.go
  23. 2 0
      vendor/github.com/cilium/ebpf/asm/doc.go
  24. 143 0
      vendor/github.com/cilium/ebpf/asm/func.go
  25. 133 0
      vendor/github.com/cilium/ebpf/asm/func_string.go
  26. 416 0
      vendor/github.com/cilium/ebpf/asm/instruction.go
  27. 109 0
      vendor/github.com/cilium/ebpf/asm/jump.go
  28. 53 0
      vendor/github.com/cilium/ebpf/asm/jump_string.go
  29. 189 0
      vendor/github.com/cilium/ebpf/asm/load_store.go
  30. 80 0
      vendor/github.com/cilium/ebpf/asm/load_store_string.go
  31. 237 0
      vendor/github.com/cilium/ebpf/asm/opcode.go
  32. 38 0
      vendor/github.com/cilium/ebpf/asm/opcode_string.go
  33. 42 0
      vendor/github.com/cilium/ebpf/asm/register.go
  34. 148 0
      vendor/github.com/cilium/ebpf/collection.go
  35. 17 0
      vendor/github.com/cilium/ebpf/doc.go
  36. 392 0
      vendor/github.com/cilium/ebpf/elf_reader.go
  37. 8 0
      vendor/github.com/cilium/ebpf/go.mod
  38. 64 0
      vendor/github.com/cilium/ebpf/internal/cpu.go
  39. 24 0
      vendor/github.com/cilium/ebpf/internal/endian.go
  40. 85 0
      vendor/github.com/cilium/ebpf/internal/feature.go
  41. 127 0
      vendor/github.com/cilium/ebpf/internal/unix/types_linux.go
  42. 193 0
      vendor/github.com/cilium/ebpf/internal/unix/types_other.go
  43. 58 0
      vendor/github.com/cilium/ebpf/linker.go
  44. 604 0
      vendor/github.com/cilium/ebpf/map.go
  45. 192 0
      vendor/github.com/cilium/ebpf/marshalers.go
  46. 504 0
      vendor/github.com/cilium/ebpf/prog.go
  47. 14 0
      vendor/github.com/cilium/ebpf/ptr_32_be.go
  48. 14 0
      vendor/github.com/cilium/ebpf/ptr_32_le.go
  49. 14 0
      vendor/github.com/cilium/ebpf/ptr_64.go
  50. 20 0
      vendor/github.com/cilium/ebpf/readme.md
  51. 447 0
      vendor/github.com/cilium/ebpf/syscalls.go
  52. 189 0
      vendor/github.com/cilium/ebpf/types.go
  53. 78 0
      vendor/github.com/cilium/ebpf/types_string.go
  54. 83 0
      vendor/github.com/containerd/cgroups/v2/cpu.go
  55. 199 0
      vendor/github.com/containerd/cgroups/v2/devicefilter.go
  56. 83 0
      vendor/github.com/containerd/cgroups/v2/ebpf.go
  57. 50 0
      vendor/github.com/containerd/cgroups/v2/errors.go
  58. 37 0
      vendor/github.com/containerd/cgroups/v2/hugetlb.go
  59. 64 0
      vendor/github.com/containerd/cgroups/v2/io.go
  60. 739 0
      vendor/github.com/containerd/cgroups/v2/manager.go
  61. 52 0
      vendor/github.com/containerd/cgroups/v2/memory.go
  62. 60 0
      vendor/github.com/containerd/cgroups/v2/paths.go
  63. 37 0
      vendor/github.com/containerd/cgroups/v2/pids.go
  64. 46 0
      vendor/github.com/containerd/cgroups/v2/rdma.go
  65. 65 0
      vendor/github.com/containerd/cgroups/v2/state.go
  66. 442 0
      vendor/github.com/containerd/cgroups/v2/utils.go

+ 7 - 0
api/swagger.yaml

@@ -4047,6 +4047,13 @@ definitions:
         enum: ["cgroupfs", "systemd", "none"]
         default: "cgroupfs"
         example: "cgroupfs"
+      CgroupVersion:
+        description: |
+          The version of the cgroup.
+        type: "string"
+        enum: ["1", "2"]
+        default: "1"
+        example: "1"
       NEventsListener:
         description: "Number of event listeners subscribed."
         type: "integer"

+ 1 - 0
api/types/types.go

@@ -175,6 +175,7 @@ type Info struct {
 	SystemTime         string
 	LoggingDriver      string
 	CgroupDriver       string
+	CgroupVersion      string `json:",omitempty"`
 	NEventsListener    int
 	KernelVersion      string
 	OperatingSystem    string

+ 6 - 1
cmd/dockerd/daemon.go

@@ -45,6 +45,7 @@ import (
 	"github.com/docker/docker/pkg/pidfile"
 	"github.com/docker/docker/pkg/plugingetter"
 	"github.com/docker/docker/pkg/signal"
+	"github.com/docker/docker/pkg/sysinfo"
 	"github.com/docker/docker/pkg/system"
 	"github.com/docker/docker/plugin"
 	"github.com/docker/docker/rootless"
@@ -456,7 +457,11 @@ func warnOnDeprecatedConfigOptions(config *config.Config) {
 }
 
 func initRouter(opts routerOptions) {
-	decoder := runconfig.ContainerDecoder{}
+	decoder := runconfig.ContainerDecoder{
+		GetSysInfo: func() *sysinfo.SysInfo {
+			return opts.daemon.RawSysInfo(true)
+		},
+	}
 
 	routers := []router.Router{
 		// we need to add the checkpoint router before the container router or the DELETE gets masked

+ 3 - 3
daemon/daemon.go

@@ -42,6 +42,7 @@ import (
 	"github.com/docker/docker/errdefs"
 	bkconfig "github.com/moby/buildkit/cmd/buildkitd/config"
 	"github.com/moby/buildkit/util/resolver"
+	rsystem "github.com/opencontainers/runc/libcontainer/system"
 	"github.com/sirupsen/logrus"
 
 	// register graph drivers
@@ -56,7 +57,6 @@ import (
 	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/locker"
 	"github.com/docker/docker/pkg/plugingetter"
-	"github.com/docker/docker/pkg/sysinfo"
 	"github.com/docker/docker/pkg/system"
 	"github.com/docker/docker/pkg/truncindex"
 	"github.com/docker/docker/plugin"
@@ -1026,10 +1026,10 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
 		return nil, err
 	}
 
-	sysInfo := sysinfo.New(false)
+	sysInfo := d.RawSysInfo(false)
 	// Check if Devices cgroup is mounted, it is hard requirement for container security,
 	// on Linux.
-	if runtime.GOOS == "linux" && !sysInfo.CgroupDevicesEnabled {
+	if runtime.GOOS == "linux" && !sysInfo.CgroupDevicesEnabled && !rsystem.RunningInUserNS() {
 		return nil, errors.New("Devices cgroup isn't mounted")
 	}
 

+ 15 - 2
daemon/daemon_unix.go

@@ -644,7 +644,7 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.
 	if hostConfig == nil {
 		return nil, nil
 	}
-	sysInfo := sysinfo.New(true)
+	sysInfo := daemon.RawSysInfo(true)
 
 	w, err := verifyPlatformContainerResources(&hostConfig.Resources, sysInfo, update)
 
@@ -1745,7 +1745,7 @@ func (daemon *Daemon) initCgroupsPath(path string) error {
 	}
 
 	path = filepath.Join(mnt, root, path)
-	sysInfo := sysinfo.New(true)
+	sysInfo := daemon.RawSysInfo(true)
 	if err := maybeCreateCPURealTimeFile(sysInfo.CPURealtimePeriod, daemon.configStore.CPURealtimePeriod, "cpu.rt_period_us", path); err != nil {
 		return err
 	}
@@ -1779,3 +1779,16 @@ func (daemon *Daemon) setupSeccompProfile() error {
 func (daemon *Daemon) useShimV2() bool {
 	return cgroups.IsCgroup2UnifiedMode()
 }
+
+// RawSysInfo returns *sysinfo.SysInfo .
+func (daemon *Daemon) RawSysInfo(quiet bool) *sysinfo.SysInfo {
+	var opts []sysinfo.Opt
+	if daemon.getCgroupDriver() == cgroupSystemdDriver {
+		rootlesskitParentEUID := os.Getenv("ROOTLESSKIT_PARENT_EUID")
+		if rootlesskitParentEUID != "" {
+			groupPath := fmt.Sprintf("/user.slice/user-%s.slice", rootlesskitParentEUID)
+			opts = append(opts, sysinfo.WithCgroup2GroupPath(groupPath))
+		}
+	}
+	return sysinfo.New(quiet, opts...)
+}

+ 10 - 1
daemon/daemon_unsupported.go

@@ -1,9 +1,18 @@
 // +build !linux,!freebsd,!windows
 
 package daemon // import "github.com/docker/docker/daemon"
-import "github.com/docker/docker/daemon/config"
+
+import (
+	"github.com/docker/docker/daemon/config"
+	"github.com/docker/docker/pkg/sysinfo"
+)
 
 const platformSupported = false
 
 func setupResolvConf(config *config.Config) {
 }
+
+// RawSysInfo returns *sysinfo.SysInfo .
+func (daemon *Daemon) RawSysInfo(quiet bool) *sysinfo.SysInfo {
+	return sysinfo.New(quiet)
+}

+ 5 - 0
daemon/daemon_windows.go

@@ -657,3 +657,8 @@ func setupResolvConf(config *config.Config) {
 func (daemon *Daemon) useShimV2() bool {
 	return true
 }
+
+// RawSysInfo returns *sysinfo.SysInfo .
+func (daemon *Daemon) RawSysInfo(quiet bool) *sysinfo.SysInfo {
+	return sysinfo.New(quiet)
+}

+ 1 - 2
daemon/info.go

@@ -28,7 +28,7 @@ import (
 func (daemon *Daemon) SystemInfo() *types.Info {
 	defer metrics.StartTimer(hostInfoFunctions.WithValues("system_info"))()
 
-	sysInfo := sysinfo.New(true)
+	sysInfo := daemon.RawSysInfo(true)
 	cRunning, cPaused, cStopped := stateCtr.get()
 
 	v := &types.Info{
@@ -47,7 +47,6 @@ func (daemon *Daemon) SystemInfo() *types.Info {
 		NGoroutines:        runtime.NumGoroutine(),
 		SystemTime:         time.Now().Format(time.RFC3339Nano),
 		LoggingDriver:      daemon.defaultLogConfig.Type,
-		CgroupDriver:       daemon.getCgroupDriver(),
 		NEventsListener:    daemon.EventsService.SubscribersCount(),
 		KernelVersion:      kernelVersion(),
 		OperatingSystem:    operatingSystem(),

+ 43 - 26
daemon/info_unix.go

@@ -19,6 +19,12 @@ import (
 
 // fillPlatformInfo fills the platform related info.
 func (daemon *Daemon) fillPlatformInfo(v *types.Info, sysInfo *sysinfo.SysInfo) {
+	v.CgroupDriver = daemon.getCgroupDriver()
+	v.CgroupVersion = "1"
+	if sysInfo.CgroupUnified {
+		v.CgroupVersion = "2"
+	}
+
 	v.MemoryLimit = sysInfo.MemoryLimit
 	v.SwapLimit = sysInfo.SwapLimit
 	v.KernelMemory = sysInfo.KernelMemory
@@ -81,32 +87,43 @@ func (daemon *Daemon) fillPlatformInfo(v *types.Info, sysInfo *sysinfo.SysInfo)
 		v.InitCommit.ID = "N/A"
 	}
 
-	if !v.MemoryLimit {
-		v.Warnings = append(v.Warnings, "WARNING: No memory limit support")
-	}
-	if !v.SwapLimit {
-		v.Warnings = append(v.Warnings, "WARNING: No swap limit support")
-	}
-	if !v.KernelMemory {
-		v.Warnings = append(v.Warnings, "WARNING: No kernel memory limit support")
-	}
-	if !v.KernelMemoryTCP {
-		v.Warnings = append(v.Warnings, "WARNING: No kernel memory TCP limit support")
-	}
-	if !v.OomKillDisable {
-		v.Warnings = append(v.Warnings, "WARNING: No oom kill disable support")
-	}
-	if !v.CPUCfsQuota {
-		v.Warnings = append(v.Warnings, "WARNING: No cpu cfs quota support")
-	}
-	if !v.CPUCfsPeriod {
-		v.Warnings = append(v.Warnings, "WARNING: No cpu cfs period support")
-	}
-	if !v.CPUShares {
-		v.Warnings = append(v.Warnings, "WARNING: No cpu shares support")
-	}
-	if !v.CPUSet {
-		v.Warnings = append(v.Warnings, "WARNING: No cpuset support")
+	if v.CgroupDriver == cgroupNoneDriver {
+		if v.CgroupVersion == "2" {
+			v.Warnings = append(v.Warnings, "WARNING: Running in rootless-mode without cgroup. To enable cgroup in rootless-mode, you need to set exec-opt \"native.cgroupdriver=systemd\".")
+		} else {
+			v.Warnings = append(v.Warnings, "WARNING: Running in rootless-mode without cgroup. To enable cgroup in rootless-mode, you need to boot the system in cgroup v2 mode and set exec-opt \"native.cgroupdriver=systemd\".")
+		}
+	} else {
+		if !v.MemoryLimit {
+			v.Warnings = append(v.Warnings, "WARNING: No memory limit support")
+		}
+		if !v.SwapLimit {
+			v.Warnings = append(v.Warnings, "WARNING: No swap limit support")
+		}
+		if !v.KernelMemory {
+			v.Warnings = append(v.Warnings, "WARNING: No kernel memory limit support")
+		}
+		if !v.KernelMemoryTCP {
+			v.Warnings = append(v.Warnings, "WARNING: No kernel memory TCP limit support")
+		}
+		if !v.OomKillDisable {
+			v.Warnings = append(v.Warnings, "WARNING: No oom kill disable support")
+		}
+		if !v.CPUCfsQuota {
+			v.Warnings = append(v.Warnings, "WARNING: No cpu cfs quota support")
+		}
+		if !v.CPUCfsPeriod {
+			v.Warnings = append(v.Warnings, "WARNING: No cpu cfs period support")
+		}
+		if !v.CPUShares {
+			v.Warnings = append(v.Warnings, "WARNING: No cpu shares support")
+		}
+		if !v.CPUSet {
+			v.Warnings = append(v.Warnings, "WARNING: No cpuset support")
+		}
+		if v.CgroupVersion == "2" {
+			v.Warnings = append(v.Warnings, "WARNING: Support for cgroup v2 is experimental")
+		}
 	}
 	if !v.IPv4Forwarding {
 		v.Warnings = append(v.Warnings, "WARNING: IPv4 forwarding is disabled")

+ 1 - 0
docs/api/version-history.md

@@ -17,6 +17,7 @@ keywords: "API, Docker, rcli, REST, documentation"
 
 [Docker Engine API v1.41](https://docs.docker.com/engine/api/v1.41/) documentation
 
+* `GET /info` now returns an `CgroupVersion` field, containing the cgroup version.
 * `POST /services/create` and `POST /services/{id}/update` now supports `BindOptions.NonRecursive`.
 * The `ClusterStore` and `ClusterAdvertise` fields in `GET /info` are deprecated
   and are now omitted if they contain an empty value. This change is not versioned,

+ 151 - 0
pkg/sysinfo/cgroup2_linux.go

@@ -0,0 +1,151 @@
+package sysinfo // import "github.com/docker/docker/pkg/sysinfo"
+
+import (
+	"io/ioutil"
+	"path"
+	"strings"
+
+	cgroupsV2 "github.com/containerd/cgroups/v2"
+	rsystem "github.com/opencontainers/runc/libcontainer/system"
+	"github.com/sirupsen/logrus"
+)
+
+type infoCollectorV2 func(info *SysInfo, controllers map[string]struct{}, dirPath string) (warnings []string)
+
+func newV2(quiet bool, opts *opts) *SysInfo {
+	var warnings []string
+	sysInfo := &SysInfo{
+		CgroupUnified: true,
+	}
+	g := opts.cg2GroupPath
+	if g == "" {
+		g = "/"
+	}
+	m, err := cgroupsV2.LoadManager("/sys/fs/cgroup", g)
+	if err != nil {
+		logrus.Warn(err)
+	} else {
+		controllersM := make(map[string]struct{})
+		controllers, err := m.Controllers()
+		if err != nil {
+			logrus.Warn(err)
+		}
+		for _, c := range controllers {
+			controllersM[c] = struct{}{}
+		}
+		opsV2 := []infoCollectorV2{
+			applyMemoryCgroupInfoV2,
+			applyCPUCgroupInfoV2,
+			applyIOCgroupInfoV2,
+			applyCPUSetCgroupInfoV2,
+			applyPIDSCgroupInfoV2,
+			applyDevicesCgroupInfoV2,
+		}
+		dirPath := path.Join("/sys/fs/cgroup", path.Clean(g))
+		for _, o := range opsV2 {
+			w := o(sysInfo, controllersM, dirPath)
+			warnings = append(warnings, w...)
+		}
+	}
+
+	ops := []infoCollector{
+		applyNetworkingInfo,
+		applyAppArmorInfo,
+		applySeccompInfo,
+		applyCgroupNsInfo,
+	}
+	for _, o := range ops {
+		w := o(sysInfo, nil)
+		warnings = append(warnings, w...)
+	}
+	if !quiet {
+		for _, w := range warnings {
+			logrus.Warn(w)
+		}
+	}
+	return sysInfo
+}
+
+func applyMemoryCgroupInfoV2(info *SysInfo, controllers map[string]struct{}, _ string) []string {
+	var warnings []string
+	if _, ok := controllers["memory"]; !ok {
+		warnings = append(warnings, "Unable to find memory controller")
+		return warnings
+	}
+
+	info.MemoryLimit = true
+	info.SwapLimit = true
+	info.MemoryReservation = true
+	info.OomKillDisable = false
+	info.MemorySwappiness = false
+	info.KernelMemory = false
+	info.KernelMemoryTCP = false
+	return warnings
+}
+
+func applyCPUCgroupInfoV2(info *SysInfo, controllers map[string]struct{}, _ string) []string {
+	var warnings []string
+	if _, ok := controllers["cpu"]; !ok {
+		warnings = append(warnings, "Unable to find cpu controller")
+		return warnings
+	}
+	info.CPUShares = true
+	info.CPUCfsPeriod = true
+	info.CPUCfsQuota = true
+	info.CPURealtimePeriod = false
+	info.CPURealtimeRuntime = false
+	return warnings
+}
+
+func applyIOCgroupInfoV2(info *SysInfo, controllers map[string]struct{}, _ string) []string {
+	var warnings []string
+	if _, ok := controllers["io"]; !ok {
+		warnings = append(warnings, "Unable to find io controller")
+		return warnings
+	}
+
+	info.BlkioWeight = true
+	info.BlkioWeightDevice = true
+	info.BlkioReadBpsDevice = true
+	info.BlkioWriteBpsDevice = true
+	info.BlkioReadIOpsDevice = true
+	info.BlkioWriteIOpsDevice = true
+	return warnings
+}
+
+func applyCPUSetCgroupInfoV2(info *SysInfo, controllers map[string]struct{}, dirPath string) []string {
+	var warnings []string
+	if _, ok := controllers["cpuset"]; !ok {
+		warnings = append(warnings, "Unable to find cpuset controller")
+		return warnings
+	}
+	info.Cpuset = true
+
+	cpus, err := ioutil.ReadFile(path.Join(dirPath, "cpuset.cpus.effective"))
+	if err != nil {
+		return warnings
+	}
+	info.Cpus = strings.TrimSpace(string(cpus))
+
+	mems, err := ioutil.ReadFile(path.Join(dirPath, "cpuset.mems.effective"))
+	if err != nil {
+		return warnings
+	}
+	info.Mems = strings.TrimSpace(string(mems))
+	return warnings
+}
+
+func applyPIDSCgroupInfoV2(info *SysInfo, controllers map[string]struct{}, _ string) []string {
+	var warnings []string
+	if _, ok := controllers["pids"]; !ok {
+		warnings = append(warnings, "Unable to find pids controller")
+		return warnings
+	}
+	info.PidsLimit = true
+	return warnings
+}
+
+func applyDevicesCgroupInfoV2(info *SysInfo, controllers map[string]struct{}, _ string) []string {
+	info.CgroupDevicesEnabled = !rsystem.RunningInUserNS()
+	return nil
+}

+ 3 - 0
pkg/sysinfo/sysinfo.go

@@ -30,6 +30,9 @@ type SysInfo struct {
 
 	// Whether the cgroup has the mountpoint of "devices" or not
 	CgroupDevicesEnabled bool
+
+	// Whether the cgroup is in unified mode (v2).
+	CgroupUnified bool
 }
 
 type cgroupMemInfo struct {

+ 28 - 46
pkg/sysinfo/sysinfo_linux.go

@@ -28,10 +28,37 @@ func findCgroupMountpoints() (map[string]string, error) {
 
 type infoCollector func(info *SysInfo, cgMounts map[string]string) (warnings []string)
 
+type opts struct {
+	cg2GroupPath string
+}
+
+// Opt for New().
+type Opt func(*opts)
+
+// WithCgroup2GroupPath specifies the cgroup v2 group path to inspect availability
+// of the controllers.
+//
+// WithCgroup2GroupPath is expected to be used for rootless mode with systemd driver.
+//
+// e.g. g = "/user.slice/user-1000.slice/user@1000.service"
+func WithCgroup2GroupPath(g string) Opt {
+	return func(o *opts) {
+		o.cg2GroupPath = path.Clean(g)
+	}
+}
+
 // New returns a new SysInfo, using the filesystem to detect which features
 // the kernel supports. If `quiet` is `false` warnings are printed in logs
 // whenever an error occurs or misconfigurations are present.
-func New(quiet bool) *SysInfo {
+func New(quiet bool, options ...Opt) *SysInfo {
+	var opts opts
+	for _, o := range options {
+		o(&opts)
+	}
+	if cgroups.IsCgroup2UnifiedMode() {
+		return newV2(quiet, &opts)
+	}
+
 	var ops []infoCollector
 	var warnings []string
 	sysInfo := &SysInfo{}
@@ -60,9 +87,6 @@ func New(quiet bool) *SysInfo {
 		w := o(sysInfo, cgMounts)
 		warnings = append(warnings, w...)
 	}
-	if cgroups.IsCgroup2UnifiedMode() {
-		warnings = append(warnings, "Your system is running cgroup v2 (unsupported)")
-	}
 	if !quiet {
 		for _, w := range warnings {
 			logrus.Warn(w)
@@ -73,15 +97,6 @@ func New(quiet bool) *SysInfo {
 
 // applyMemoryCgroupInfo reads the memory information from the memory cgroup mount point.
 func applyMemoryCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
-	if cgroups.IsCgroup2UnifiedMode() {
-		// TODO: check cgroup2 info correctly
-		info.MemoryLimit = true
-		info.SwapLimit = true
-		info.MemoryReservation = true
-		info.OomKillDisable = true
-		info.MemorySwappiness = true
-		return nil
-	}
 	var warnings []string
 	mountPoint, ok := cgMounts["memory"]
 	if !ok {
@@ -120,15 +135,6 @@ func applyMemoryCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
 
 // applyCPUCgroupInfo reads the cpu information from the cpu cgroup mount point.
 func applyCPUCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
-	if cgroups.IsCgroup2UnifiedMode() {
-		// TODO: check cgroup2 info correctly
-		info.CPUShares = true
-		info.CPUCfsPeriod = true
-		info.CPUCfsQuota = true
-		info.CPURealtimePeriod = true
-		info.CPURealtimeRuntime = true
-		return nil
-	}
 	var warnings []string
 	mountPoint, ok := cgMounts["cpu"]
 	if !ok {
@@ -166,15 +172,6 @@ func applyCPUCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
 
 // applyBlkioCgroupInfo reads the blkio information from the blkio cgroup mount point.
 func applyBlkioCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
-	if cgroups.IsCgroup2UnifiedMode() {
-		// TODO: check cgroup2 info correctly
-		info.BlkioWeight = true
-		info.BlkioReadBpsDevice = true
-		info.BlkioWriteBpsDevice = true
-		info.BlkioReadIOpsDevice = true
-		info.BlkioWriteIOpsDevice = true
-		return nil
-	}
 	var warnings []string
 	mountPoint, ok := cgMounts["blkio"]
 	if !ok {
@@ -216,11 +213,6 @@ func applyBlkioCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
 
 // applyCPUSetCgroupInfo reads the cpuset information from the cpuset cgroup mount point.
 func applyCPUSetCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
-	if cgroups.IsCgroup2UnifiedMode() {
-		// TODO: check cgroup2 info correctly
-		info.Cpuset = true
-		return nil
-	}
 	var warnings []string
 	mountPoint, ok := cgMounts["cpuset"]
 	if !ok {
@@ -248,11 +240,6 @@ func applyCPUSetCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
 
 // applyPIDSCgroupInfo reads the pids information from the pids cgroup mount point.
 func applyPIDSCgroupInfo(info *SysInfo, _ map[string]string) []string {
-	if cgroups.IsCgroup2UnifiedMode() {
-		// TODO: check cgroup2 info correctly
-		info.PidsLimit = true
-		return nil
-	}
 	var warnings []string
 	_, err := cgroups.FindCgroupMountpoint("", "pids")
 	if err != nil {
@@ -265,11 +252,6 @@ func applyPIDSCgroupInfo(info *SysInfo, _ map[string]string) []string {
 
 // applyDevicesCgroupInfo reads the pids information from the devices cgroup mount point.
 func applyDevicesCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
-	if cgroups.IsCgroup2UnifiedMode() {
-		// TODO: check cgroup2 info correctly
-		info.CgroupDevicesEnabled = true
-		return nil
-	}
 	var warnings []string
 	_, ok := cgMounts["devices"]
 	info.CgroupDevicesEnabled = ok

+ 6 - 1
pkg/sysinfo/sysinfo_unix.go

@@ -2,8 +2,13 @@
 
 package sysinfo // import "github.com/docker/docker/pkg/sysinfo"
 
+type opts struct{}
+
+// Opt for New().
+type Opt func(*opts)
+
 // New returns an empty SysInfo for non linux for now.
-func New(quiet bool) *SysInfo {
+func New(quiet bool, options ...Opt) *SysInfo {
 	sysInfo := &SysInfo{}
 	return sysInfo
 }

+ 6 - 1
pkg/sysinfo/sysinfo_windows.go

@@ -1,7 +1,12 @@
 package sysinfo // import "github.com/docker/docker/pkg/sysinfo"
 
+type opts struct{}
+
+// Opt for New().
+type Opt func(*opts)
+
 // New returns an empty SysInfo for windows for now.
-func New(quiet bool) *SysInfo {
+func New(quiet bool, options ...Opt) *SysInfo {
 	sysInfo := &SysInfo{}
 	return sysInfo
 }

+ 13 - 4
runconfig/config.go

@@ -11,11 +11,20 @@ import (
 
 // ContainerDecoder implements httputils.ContainerDecoder
 // calling DecodeContainerConfig.
-type ContainerDecoder struct{}
+type ContainerDecoder struct {
+	GetSysInfo func() *sysinfo.SysInfo
+}
 
 // DecodeConfig makes ContainerDecoder to implement httputils.ContainerDecoder
 func (r ContainerDecoder) DecodeConfig(src io.Reader) (*container.Config, *container.HostConfig, *networktypes.NetworkingConfig, error) {
-	return decodeContainerConfig(src)
+	var si *sysinfo.SysInfo
+	if r.GetSysInfo != nil {
+		si = r.GetSysInfo()
+	} else {
+		si = sysinfo.New(true)
+	}
+
+	return decodeContainerConfig(src, si)
 }
 
 // DecodeHostConfig makes ContainerDecoder to implement httputils.ContainerDecoder
@@ -27,7 +36,7 @@ func (r ContainerDecoder) DecodeHostConfig(src io.Reader) (*container.HostConfig
 // struct and returns both a Config and a HostConfig struct
 // Be aware this function is not checking whether the resulted structs are nil,
 // it's your business to do so
-func decodeContainerConfig(src io.Reader) (*container.Config, *container.HostConfig, *networktypes.NetworkingConfig, error) {
+func decodeContainerConfig(src io.Reader, si *sysinfo.SysInfo) (*container.Config, *container.HostConfig, *networktypes.NetworkingConfig, error) {
 	var w ContainerConfigWrapper
 
 	decoder := json.NewDecoder(src)
@@ -63,7 +72,7 @@ func decodeContainerConfig(src io.Reader) (*container.Config, *container.HostCon
 	}
 
 	// Validate Resources
-	if err := validateResources(hc, sysinfo.New(true)); err != nil {
+	if err := validateResources(hc, si); err != nil {
 		return nil, nil, nil, err
 	}
 

+ 3 - 2
runconfig/config_test.go

@@ -12,6 +12,7 @@ import (
 	"github.com/docker/docker/api/types/container"
 	networktypes "github.com/docker/docker/api/types/network"
 	"github.com/docker/docker/api/types/strslice"
+	"github.com/docker/docker/pkg/sysinfo"
 )
 
 type f struct {
@@ -46,7 +47,7 @@ func TestDecodeContainerConfig(t *testing.T) {
 			t.Fatal(err)
 		}
 
-		c, h, _, err := decodeContainerConfig(bytes.NewReader(b))
+		c, h, _, err := decodeContainerConfig(bytes.NewReader(b), sysinfo.New(true))
 		if err != nil {
 			t.Fatal(fmt.Errorf("Error parsing %s: %v", f, err))
 		}
@@ -130,5 +131,5 @@ func callDecodeContainerConfigIsolation(isolation string) (*container.Config, *c
 	if b, err = json.Marshal(w); err != nil {
 		return nil, nil, nil, fmt.Errorf("Error on marshal %s", err.Error())
 	}
-	return decodeContainerConfig(bytes.NewReader(b))
+	return decodeContainerConfig(bytes.NewReader(b), sysinfo.New(true))
 }

+ 1 - 0
vendor.conf

@@ -129,6 +129,7 @@ github.com/containerd/go-runc                       7016d3ce2328dd2cb1192b2076eb
 github.com/containerd/typeurl                       b45ef1f1f737e10bd45b25b669df25f0da8b9ba0
 github.com/containerd/ttrpc                         0be804eadb152bc3b3c20c5edc314c4633833398
 github.com/gogo/googleapis                          01e0f9cca9b92166042241267ee2a5cdf5cff46c # v1.3.2
+github.com/cilium/ebpf                              60c3aa43f488292fe2ee50fb8b833b383ca8ebbb
 
 # cluster
 github.com/docker/swarmkit                          ebe39a32e3ed4c3a3783a02c11cccf388818694c

+ 23 - 0
vendor/github.com/cilium/ebpf/LICENSE

@@ -0,0 +1,23 @@
+MIT License
+
+Copyright (c) 2017 Nathan Sweet
+Copyright (c) 2018, 2019 Cloudflare
+Copyright (c) 2019 Authors of Cilium
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

+ 203 - 0
vendor/github.com/cilium/ebpf/abi.go

@@ -0,0 +1,203 @@
+package ebpf
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"io"
+	"os"
+	"syscall"
+
+	"github.com/cilium/ebpf/internal"
+	"github.com/pkg/errors"
+)
+
+// MapABI are the attributes of a Map which are available across all supported kernels.
+type MapABI struct {
+	Type       MapType
+	KeySize    uint32
+	ValueSize  uint32
+	MaxEntries uint32
+	Flags      uint32
+}
+
+func newMapABIFromSpec(spec *MapSpec) *MapABI {
+	return &MapABI{
+		spec.Type,
+		spec.KeySize,
+		spec.ValueSize,
+		spec.MaxEntries,
+		spec.Flags,
+	}
+}
+
+func newMapABIFromFd(fd *bpfFD) (string, *MapABI, error) {
+	info, err := bpfGetMapInfoByFD(fd)
+	if err != nil {
+		if errors.Cause(err) == syscall.EINVAL {
+			abi, err := newMapABIFromProc(fd)
+			return "", abi, err
+		}
+		return "", nil, err
+	}
+
+	return "", &MapABI{
+		MapType(info.mapType),
+		info.keySize,
+		info.valueSize,
+		info.maxEntries,
+		info.flags,
+	}, nil
+}
+
+func newMapABIFromProc(fd *bpfFD) (*MapABI, error) {
+	var abi MapABI
+	err := scanFdInfo(fd, map[string]interface{}{
+		"map_type":    &abi.Type,
+		"key_size":    &abi.KeySize,
+		"value_size":  &abi.ValueSize,
+		"max_entries": &abi.MaxEntries,
+		"map_flags":   &abi.Flags,
+	})
+	if err != nil {
+		return nil, err
+	}
+	return &abi, nil
+}
+
+// Equal returns true if two ABIs have the same values.
+func (abi *MapABI) Equal(other *MapABI) bool {
+	switch {
+	case abi.Type != other.Type:
+		return false
+	case abi.KeySize != other.KeySize:
+		return false
+	case abi.ValueSize != other.ValueSize:
+		return false
+	case abi.MaxEntries != other.MaxEntries:
+		return false
+	case abi.Flags != other.Flags:
+		return false
+	default:
+		return true
+	}
+}
+
+// ProgramABI are the attributes of a Program which are available across all supported kernels.
+type ProgramABI struct {
+	Type ProgramType
+}
+
+func newProgramABIFromSpec(spec *ProgramSpec) *ProgramABI {
+	return &ProgramABI{
+		spec.Type,
+	}
+}
+
+func newProgramABIFromFd(fd *bpfFD) (string, *ProgramABI, error) {
+	info, err := bpfGetProgInfoByFD(fd)
+	if err != nil {
+		if errors.Cause(err) == syscall.EINVAL {
+			return newProgramABIFromProc(fd)
+		}
+
+		return "", nil, err
+	}
+
+	var name string
+	if bpfName := convertCString(info.name[:]); bpfName != "" {
+		name = bpfName
+	} else {
+		name = convertCString(info.tag[:])
+	}
+
+	return name, &ProgramABI{
+		Type: ProgramType(info.progType),
+	}, nil
+}
+
+func newProgramABIFromProc(fd *bpfFD) (string, *ProgramABI, error) {
+	var (
+		abi  ProgramABI
+		name string
+	)
+
+	err := scanFdInfo(fd, map[string]interface{}{
+		"prog_type": &abi.Type,
+		"prog_tag":  &name,
+	})
+	if errors.Cause(err) == errMissingFields {
+		return "", nil, &internal.UnsupportedFeatureError{
+			Name:           "reading ABI from /proc/self/fdinfo",
+			MinimumVersion: internal.Version{4, 11, 0},
+		}
+	}
+	if err != nil {
+		return "", nil, err
+	}
+
+	return name, &abi, nil
+}
+
+func scanFdInfo(fd *bpfFD, fields map[string]interface{}) error {
+	raw, err := fd.value()
+	if err != nil {
+		return err
+	}
+
+	fh, err := os.Open(fmt.Sprintf("/proc/self/fdinfo/%d", raw))
+	if err != nil {
+		return err
+	}
+	defer fh.Close()
+
+	return errors.Wrap(scanFdInfoReader(fh, fields), fh.Name())
+}
+
+var errMissingFields = errors.New("missing fields")
+
+func scanFdInfoReader(r io.Reader, fields map[string]interface{}) error {
+	var (
+		scanner = bufio.NewScanner(r)
+		scanned int
+	)
+
+	for scanner.Scan() {
+		parts := bytes.SplitN(scanner.Bytes(), []byte("\t"), 2)
+		if len(parts) != 2 {
+			continue
+		}
+
+		name := bytes.TrimSuffix(parts[0], []byte(":"))
+		field, ok := fields[string(name)]
+		if !ok {
+			continue
+		}
+
+		if n, err := fmt.Fscanln(bytes.NewReader(parts[1]), field); err != nil || n != 1 {
+			return errors.Wrapf(err, "can't parse field %s", name)
+		}
+
+		scanned++
+	}
+
+	if err := scanner.Err(); err != nil {
+		return err
+	}
+
+	if scanned != len(fields) {
+		return errMissingFields
+	}
+
+	return nil
+}
+
+// Equal returns true if two ABIs have the same values.
+func (abi *ProgramABI) Equal(other *ProgramABI) bool {
+	switch {
+	case abi.Type != other.Type:
+		return false
+	default:
+		return true
+	}
+}

+ 149 - 0
vendor/github.com/cilium/ebpf/asm/alu.go

@@ -0,0 +1,149 @@
+package asm
+
+//go:generate stringer -output alu_string.go -type=Source,Endianness,ALUOp
+
+// Source of ALU / ALU64 / Branch operations
+//
+//    msb      lsb
+//    +----+-+---+
+//    |op  |S|cls|
+//    +----+-+---+
+type Source uint8
+
+const sourceMask OpCode = 0x08
+
+// Source bitmask
+const (
+	// InvalidSource is returned by getters when invoked
+	// on non ALU / branch OpCodes.
+	InvalidSource Source = 0xff
+	// ImmSource src is from constant
+	ImmSource Source = 0x00
+	// RegSource src is from register
+	RegSource Source = 0x08
+)
+
+// The Endianness of a byte swap instruction.
+type Endianness uint8
+
+const endianMask = sourceMask
+
+// Endian flags
+const (
+	InvalidEndian Endianness = 0xff
+	// Convert to little endian
+	LE Endianness = 0x00
+	// Convert to big endian
+	BE Endianness = 0x08
+)
+
+// ALUOp are ALU / ALU64 operations
+//
+//    msb      lsb
+//    +----+-+---+
+//    |OP  |s|cls|
+//    +----+-+---+
+type ALUOp uint8
+
+const aluMask OpCode = 0xf0
+
+const (
+	// InvalidALUOp is returned by getters when invoked
+	// on non ALU OpCodes
+	InvalidALUOp ALUOp = 0xff
+	// Add - addition
+	Add ALUOp = 0x00
+	// Sub - subtraction
+	Sub ALUOp = 0x10
+	// Mul - multiplication
+	Mul ALUOp = 0x20
+	// Div - division
+	Div ALUOp = 0x30
+	// Or - bitwise or
+	Or ALUOp = 0x40
+	// And - bitwise and
+	And ALUOp = 0x50
+	// LSh - bitwise shift left
+	LSh ALUOp = 0x60
+	// RSh - bitwise shift right
+	RSh ALUOp = 0x70
+	// Neg - sign/unsign signing bit
+	Neg ALUOp = 0x80
+	// Mod - modulo
+	Mod ALUOp = 0x90
+	// Xor - bitwise xor
+	Xor ALUOp = 0xa0
+	// Mov - move value from one place to another
+	Mov ALUOp = 0xb0
+	// ArSh - arithmatic shift
+	ArSh ALUOp = 0xc0
+	// Swap - endian conversions
+	Swap ALUOp = 0xd0
+)
+
+// HostTo converts from host to another endianness.
+func HostTo(endian Endianness, dst Register, size Size) Instruction {
+	var imm int64
+	switch size {
+	case Half:
+		imm = 16
+	case Word:
+		imm = 32
+	case DWord:
+		imm = 64
+	default:
+		return Instruction{OpCode: InvalidOpCode}
+	}
+
+	return Instruction{
+		OpCode:   OpCode(ALUClass).SetALUOp(Swap).SetSource(Source(endian)),
+		Dst:      dst,
+		Constant: imm,
+	}
+}
+
+// Op returns the OpCode for an ALU operation with a given source.
+func (op ALUOp) Op(source Source) OpCode {
+	return OpCode(ALU64Class).SetALUOp(op).SetSource(source)
+}
+
+// Reg emits `dst (op) src`.
+func (op ALUOp) Reg(dst, src Register) Instruction {
+	return Instruction{
+		OpCode: op.Op(RegSource),
+		Dst:    dst,
+		Src:    src,
+	}
+}
+
+// Imm emits `dst (op) value`.
+func (op ALUOp) Imm(dst Register, value int32) Instruction {
+	return Instruction{
+		OpCode:   op.Op(ImmSource),
+		Dst:      dst,
+		Constant: int64(value),
+	}
+}
+
+// Op32 returns the OpCode for a 32-bit ALU operation with a given source.
+func (op ALUOp) Op32(source Source) OpCode {
+	return OpCode(ALUClass).SetALUOp(op).SetSource(source)
+}
+
+// Reg32 emits `dst (op) src`, zeroing the upper 32 bit of dst.
+func (op ALUOp) Reg32(dst, src Register) Instruction {
+	return Instruction{
+		OpCode: op.Op32(RegSource),
+		Dst:    dst,
+		Src:    src,
+	}
+}
+
+// Imm32 emits `dst (op) value`, zeroing the upper 32 bit of dst.
+func (op ALUOp) Imm32(dst Register, value int32) Instruction {
+	return Instruction{
+		OpCode:   op.Op32(ImmSource),
+		Dst:      dst,
+		Constant: int64(value),
+	}
+}

+ 107 - 0
vendor/github.com/cilium/ebpf/asm/alu_string.go

@@ -0,0 +1,107 @@
+// Code generated by "stringer -output alu_string.go -type=Source,Endianness,ALUOp"; DO NOT EDIT.
+
+package asm
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[InvalidSource-255]
+	_ = x[ImmSource-0]
+	_ = x[RegSource-8]
+}
+
+const (
+	_Source_name_0 = "ImmSource"
+	_Source_name_1 = "RegSource"
+	_Source_name_2 = "InvalidSource"
+)
+
+func (i Source) String() string {
+	switch {
+	case i == 0:
+		return _Source_name_0
+	case i == 8:
+		return _Source_name_1
+	case i == 255:
+		return _Source_name_2
+	default:
+		return "Source(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+}
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[InvalidEndian-255]
+	_ = x[LE-0]
+	_ = x[BE-8]
+}
+
+const (
+	_Endianness_name_0 = "LE"
+	_Endianness_name_1 = "BE"
+	_Endianness_name_2 = "InvalidEndian"
+)
+
+func (i Endianness) String() string {
+	switch {
+	case i == 0:
+		return _Endianness_name_0
+	case i == 8:
+		return _Endianness_name_1
+	case i == 255:
+		return _Endianness_name_2
+	default:
+		return "Endianness(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+}
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[InvalidALUOp-255]
+	_ = x[Add-0]
+	_ = x[Sub-16]
+	_ = x[Mul-32]
+	_ = x[Div-48]
+	_ = x[Or-64]
+	_ = x[And-80]
+	_ = x[LSh-96]
+	_ = x[RSh-112]
+	_ = x[Neg-128]
+	_ = x[Mod-144]
+	_ = x[Xor-160]
+	_ = x[Mov-176]
+	_ = x[ArSh-192]
+	_ = x[Swap-208]
+}
+
+const _ALUOp_name = "AddSubMulDivOrAndLShRShNegModXorMovArShSwapInvalidALUOp"
+
+var _ALUOp_map = map[ALUOp]string{
+	0:   _ALUOp_name[0:3],
+	16:  _ALUOp_name[3:6],
+	32:  _ALUOp_name[6:9],
+	48:  _ALUOp_name[9:12],
+	64:  _ALUOp_name[12:14],
+	80:  _ALUOp_name[14:17],
+	96:  _ALUOp_name[17:20],
+	112: _ALUOp_name[20:23],
+	128: _ALUOp_name[23:26],
+	144: _ALUOp_name[26:29],
+	160: _ALUOp_name[29:32],
+	176: _ALUOp_name[32:35],
+	192: _ALUOp_name[35:39],
+	208: _ALUOp_name[39:43],
+	255: _ALUOp_name[43:55],
+}
+
+func (i ALUOp) String() string {
+	if str, ok := _ALUOp_map[i]; ok {
+		return str
+	}
+	return "ALUOp(" + strconv.FormatInt(int64(i), 10) + ")"
+}

+ 2 - 0
vendor/github.com/cilium/ebpf/asm/doc.go

@@ -0,0 +1,2 @@
+// Package asm is an assembler for eBPF bytecode.
+package asm

+ 143 - 0
vendor/github.com/cilium/ebpf/asm/func.go

@@ -0,0 +1,143 @@
+package asm
+
+//go:generate stringer -output func_string.go -type=BuiltinFunc
+
+// BuiltinFunc is a built-in eBPF function.
+type BuiltinFunc int32
+
+// eBPF built-in functions
+//
+// You can renegerate this list using the following gawk script:
+//
+//    /FN\(.+\),/ {
+//      match($1, /\((.+)\)/, r)
+//      split(r[1], p, "_")
+//      printf "Fn"
+//      for (i in p) {
+//        printf "%s%s", toupper(substr(p[i], 1, 1)), substr(p[i], 2)
+//      }
+//      print ""
+//    }
+//
+// The script expects include/uapi/linux/bpf.h as it's input.
+const (
+	FnUnspec BuiltinFunc = iota
+	FnMapLookupElem
+	FnMapUpdateElem
+	FnMapDeleteElem
+	FnProbeRead
+	FnKtimeGetNs
+	FnTracePrintk
+	FnGetPrandomU32
+	FnGetSmpProcessorId
+	FnSkbStoreBytes
+	FnL3CsumReplace
+	FnL4CsumReplace
+	FnTailCall
+	FnCloneRedirect
+	FnGetCurrentPidTgid
+	FnGetCurrentUidGid
+	FnGetCurrentComm
+	FnGetCgroupClassid
+	FnSkbVlanPush
+	FnSkbVlanPop
+	FnSkbGetTunnelKey
+	FnSkbSetTunnelKey
+	FnPerfEventRead
+	FnRedirect
+	FnGetRouteRealm
+	FnPerfEventOutput
+	FnSkbLoadBytes
+	FnGetStackid
+	FnCsumDiff
+	FnSkbGetTunnelOpt
+	FnSkbSetTunnelOpt
+	FnSkbChangeProto
+	FnSkbChangeType
+	FnSkbUnderCgroup
+	FnGetHashRecalc
+	FnGetCurrentTask
+	FnProbeWriteUser
+	FnCurrentTaskUnderCgroup
+	FnSkbChangeTail
+	FnSkbPullData
+	FnCsumUpdate
+	FnSetHashInvalid
+	FnGetNumaNodeId
+	FnSkbChangeHead
+	FnXdpAdjustHead
+	FnProbeReadStr
+	FnGetSocketCookie
+	FnGetSocketUid
+	FnSetHash
+	FnSetsockopt
+	FnSkbAdjustRoom
+	FnRedirectMap
+	FnSkRedirectMap
+	FnSockMapUpdate
+	FnXdpAdjustMeta
+	FnPerfEventReadValue
+	FnPerfProgReadValue
+	FnGetsockopt
+	FnOverrideReturn
+	FnSockOpsCbFlagsSet
+	FnMsgRedirectMap
+	FnMsgApplyBytes
+	FnMsgCorkBytes
+	FnMsgPullData
+	FnBind
+	FnXdpAdjustTail
+	FnSkbGetXfrmState
+	FnGetStack
+	FnSkbLoadBytesRelative
+	FnFibLookup
+	FnSockHashUpdate
+	FnMsgRedirectHash
+	FnSkRedirectHash
+	FnLwtPushEncap
+	FnLwtSeg6StoreBytes
+	FnLwtSeg6AdjustSrh
+	FnLwtSeg6Action
+	FnRcRepeat
+	FnRcKeydown
+	FnSkbCgroupId
+	FnGetCurrentCgroupId
+	FnGetLocalStorage
+	FnSkSelectReuseport
+	FnSkbAncestorCgroupId
+	FnSkLookupTcp
+	FnSkLookupUdp
+	FnSkRelease
+	FnMapPushElem
+	FnMapPopElem
+	FnMapPeekElem
+	FnMsgPushData
+	FnMsgPopData
+	FnRcPointerRel
+	FnSpinLock
+	FnSpinUnlock
+	FnSkFullsock
+	FnTcpSock
+	FnSkbEcnSetCe
+	FnGetListenerSock
+	FnSkcLookupTcp
+	FnTcpCheckSyncookie
+	FnSysctlGetName
+	FnSysctlGetCurrentValue
+	FnSysctlGetNewValue
+	FnSysctlSetNewValue
+	FnStrtol
+	FnStrtoul
+	FnSkStorageGet
+	FnSkStorageDelete
+	FnSendSignal
+	FnTcpGenSyncookie
+)
+
+// Call emits a function call.
+func (fn BuiltinFunc) Call() Instruction {
+	return Instruction{
+		OpCode:   OpCode(JumpClass).SetJumpOp(Call),
+		Constant: int64(fn),
+	}
+}

+ 133 - 0
vendor/github.com/cilium/ebpf/asm/func_string.go

@@ -0,0 +1,133 @@
+// Code generated by "stringer -output func_string.go -type=BuiltinFunc"; DO NOT EDIT.
+
+package asm
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[FnUnspec-0]
+	_ = x[FnMapLookupElem-1]
+	_ = x[FnMapUpdateElem-2]
+	_ = x[FnMapDeleteElem-3]
+	_ = x[FnProbeRead-4]
+	_ = x[FnKtimeGetNs-5]
+	_ = x[FnTracePrintk-6]
+	_ = x[FnGetPrandomU32-7]
+	_ = x[FnGetSmpProcessorId-8]
+	_ = x[FnSkbStoreBytes-9]
+	_ = x[FnL3CsumReplace-10]
+	_ = x[FnL4CsumReplace-11]
+	_ = x[FnTailCall-12]
+	_ = x[FnCloneRedirect-13]
+	_ = x[FnGetCurrentPidTgid-14]
+	_ = x[FnGetCurrentUidGid-15]
+	_ = x[FnGetCurrentComm-16]
+	_ = x[FnGetCgroupClassid-17]
+	_ = x[FnSkbVlanPush-18]
+	_ = x[FnSkbVlanPop-19]
+	_ = x[FnSkbGetTunnelKey-20]
+	_ = x[FnSkbSetTunnelKey-21]
+	_ = x[FnPerfEventRead-22]
+	_ = x[FnRedirect-23]
+	_ = x[FnGetRouteRealm-24]
+	_ = x[FnPerfEventOutput-25]
+	_ = x[FnSkbLoadBytes-26]
+	_ = x[FnGetStackid-27]
+	_ = x[FnCsumDiff-28]
+	_ = x[FnSkbGetTunnelOpt-29]
+	_ = x[FnSkbSetTunnelOpt-30]
+	_ = x[FnSkbChangeProto-31]
+	_ = x[FnSkbChangeType-32]
+	_ = x[FnSkbUnderCgroup-33]
+	_ = x[FnGetHashRecalc-34]
+	_ = x[FnGetCurrentTask-35]
+	_ = x[FnProbeWriteUser-36]
+	_ = x[FnCurrentTaskUnderCgroup-37]
+	_ = x[FnSkbChangeTail-38]
+	_ = x[FnSkbPullData-39]
+	_ = x[FnCsumUpdate-40]
+	_ = x[FnSetHashInvalid-41]
+	_ = x[FnGetNumaNodeId-42]
+	_ = x[FnSkbChangeHead-43]
+	_ = x[FnXdpAdjustHead-44]
+	_ = x[FnProbeReadStr-45]
+	_ = x[FnGetSocketCookie-46]
+	_ = x[FnGetSocketUid-47]
+	_ = x[FnSetHash-48]
+	_ = x[FnSetsockopt-49]
+	_ = x[FnSkbAdjustRoom-50]
+	_ = x[FnRedirectMap-51]
+	_ = x[FnSkRedirectMap-52]
+	_ = x[FnSockMapUpdate-53]
+	_ = x[FnXdpAdjustMeta-54]
+	_ = x[FnPerfEventReadValue-55]
+	_ = x[FnPerfProgReadValue-56]
+	_ = x[FnGetsockopt-57]
+	_ = x[FnOverrideReturn-58]
+	_ = x[FnSockOpsCbFlagsSet-59]
+	_ = x[FnMsgRedirectMap-60]
+	_ = x[FnMsgApplyBytes-61]
+	_ = x[FnMsgCorkBytes-62]
+	_ = x[FnMsgPullData-63]
+	_ = x[FnBind-64]
+	_ = x[FnXdpAdjustTail-65]
+	_ = x[FnSkbGetXfrmState-66]
+	_ = x[FnGetStack-67]
+	_ = x[FnSkbLoadBytesRelative-68]
+	_ = x[FnFibLookup-69]
+	_ = x[FnSockHashUpdate-70]
+	_ = x[FnMsgRedirectHash-71]
+	_ = x[FnSkRedirectHash-72]
+	_ = x[FnLwtPushEncap-73]
+	_ = x[FnLwtSeg6StoreBytes-74]
+	_ = x[FnLwtSeg6AdjustSrh-75]
+	_ = x[FnLwtSeg6Action-76]
+	_ = x[FnRcRepeat-77]
+	_ = x[FnRcKeydown-78]
+	_ = x[FnSkbCgroupId-79]
+	_ = x[FnGetCurrentCgroupId-80]
+	_ = x[FnGetLocalStorage-81]
+	_ = x[FnSkSelectReuseport-82]
+	_ = x[FnSkbAncestorCgroupId-83]
+	_ = x[FnSkLookupTcp-84]
+	_ = x[FnSkLookupUdp-85]
+	_ = x[FnSkRelease-86]
+	_ = x[FnMapPushElem-87]
+	_ = x[FnMapPopElem-88]
+	_ = x[FnMapPeekElem-89]
+	_ = x[FnMsgPushData-90]
+	_ = x[FnMsgPopData-91]
+	_ = x[FnRcPointerRel-92]
+	_ = x[FnSpinLock-93]
+	_ = x[FnSpinUnlock-94]
+	_ = x[FnSkFullsock-95]
+	_ = x[FnTcpSock-96]
+	_ = x[FnSkbEcnSetCe-97]
+	_ = x[FnGetListenerSock-98]
+	_ = x[FnSkcLookupTcp-99]
+	_ = x[FnTcpCheckSyncookie-100]
+	_ = x[FnSysctlGetName-101]
+	_ = x[FnSysctlGetCurrentValue-102]
+	_ = x[FnSysctlGetNewValue-103]
+	_ = x[FnSysctlSetNewValue-104]
+	_ = x[FnStrtol-105]
+	_ = x[FnStrtoul-106]
+	_ = x[FnSkStorageGet-107]
+	_ = x[FnSkStorageDelete-108]
+	_ = x[FnSendSignal-109]
+	_ = x[FnTcpGenSyncookie-110]
+}
+
+const _BuiltinFunc_name = "FnUnspecFnMapLookupElemFnMapUpdateElemFnMapDeleteElemFnProbeReadFnKtimeGetNsFnTracePrintkFnGetPrandomU32FnGetSmpProcessorIdFnSkbStoreBytesFnL3CsumReplaceFnL4CsumReplaceFnTailCallFnCloneRedirectFnGetCurrentPidTgidFnGetCurrentUidGidFnGetCurrentCommFnGetCgroupClassidFnSkbVlanPushFnSkbVlanPopFnSkbGetTunnelKeyFnSkbSetTunnelKeyFnPerfEventReadFnRedirectFnGetRouteRealmFnPerfEventOutputFnSkbLoadBytesFnGetStackidFnCsumDiffFnSkbGetTunnelOptFnSkbSetTunnelOptFnSkbChangeProtoFnSkbChangeTypeFnSkbUnderCgroupFnGetHashRecalcFnGetCurrentTaskFnProbeWriteUserFnCurrentTaskUnderCgroupFnSkbChangeTailFnSkbPullDataFnCsumUpdateFnSetHashInvalidFnGetNumaNodeIdFnSkbChangeHeadFnXdpAdjustHeadFnProbeReadStrFnGetSocketCookieFnGetSocketUidFnSetHashFnSetsockoptFnSkbAdjustRoomFnRedirectMapFnSkRedirectMapFnSockMapUpdateFnXdpAdjustMetaFnPerfEventReadValueFnPerfProgReadValueFnGetsockoptFnOverrideReturnFnSockOpsCbFlagsSetFnMsgRedirectMapFnMsgApplyBytesFnMsgCorkBytesFnMsgPullDataFnBindFnXdpAdjustTailFnSkbGetXfrmStateFnGetStackFnSkbLoadBytesRelativeFnFibLookupFnSockHashUpdateFnMsgRedirectHashFnSkRedirectHashFnLwtPushEncapFnLwtSeg6StoreBytesFnLwtSeg6AdjustSrhFnLwtSeg6ActionFnRcRepeatFnRcKeydownFnSkbCgroupIdFnGetCurrentCgroupIdFnGetLocalStorageFnSkSelectReuseportFnSkbAncestorCgroupIdFnSkLookupTcpFnSkLookupUdpFnSkReleaseFnMapPushElemFnMapPopElemFnMapPeekElemFnMsgPushDataFnMsgPopDataFnRcPointerRelFnSpinLockFnSpinUnlockFnSkFullsockFnTcpSockFnSkbEcnSetCeFnGetListenerSockFnSkcLookupTcpFnTcpCheckSyncookieFnSysctlGetNameFnSysctlGetCurrentValueFnSysctlGetNewValueFnSysctlSetNewValueFnStrtolFnStrtoulFnSkStorageGetFnSkStorageDeleteFnSendSignalFnTcpGenSyncookie"
+
+var _BuiltinFunc_index = [...]uint16{0, 8, 23, 38, 53, 64, 76, 89, 104, 123, 138, 153, 168, 178, 193, 212, 230, 246, 264, 277, 289, 306, 323, 338, 348, 363, 380, 394, 406, 416, 433, 450, 466, 481, 497, 512, 528, 544, 568, 583, 596, 608, 624, 639, 654, 669, 683, 700, 714, 723, 735, 750, 763, 778, 793, 808, 828, 847, 859, 875, 894, 910, 925, 939, 952, 958, 973, 990, 1000, 1022, 1033, 1049, 1066, 1082, 1096, 1115, 1133, 1148, 1158, 1169, 1182, 1202, 1219, 1238, 1259, 1272, 1285, 1296, 1309, 1321, 1334, 1347, 1359, 1373, 1383, 1395, 1407, 1416, 1429, 1446, 1460, 1479, 1494, 1517, 1536, 1555, 1563, 1572, 1586, 1603, 1615, 1632}
+
+func (i BuiltinFunc) String() string {
+	if i < 0 || i >= BuiltinFunc(len(_BuiltinFunc_index)-1) {
+		return "BuiltinFunc(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _BuiltinFunc_name[_BuiltinFunc_index[i]:_BuiltinFunc_index[i+1]]
+}

+ 416 - 0
vendor/github.com/cilium/ebpf/asm/instruction.go

@@ -0,0 +1,416 @@
+package asm
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"math"
+	"strings"
+
+	"github.com/pkg/errors"
+)
+
+// InstructionSize is the size of a BPF instruction in bytes
+const InstructionSize = 8
+
+// Instruction is a single eBPF instruction.
+type Instruction struct {
+	OpCode    OpCode
+	Dst       Register
+	Src       Register
+	Offset    int16
+	Constant  int64
+	Reference string
+	Symbol    string
+}
+
+// Sym creates a symbol.
+func (ins Instruction) Sym(name string) Instruction {
+	ins.Symbol = name
+	return ins
+}
+
+// Unmarshal decodes a BPF instruction.
+func (ins *Instruction) Unmarshal(r io.Reader, bo binary.ByteOrder) (uint64, error) {
+	var bi bpfInstruction
+	err := binary.Read(r, bo, &bi)
+	if err != nil {
+		return 0, err
+	}
+
+	ins.OpCode = bi.OpCode
+	ins.Dst = bi.Registers.Dst()
+	ins.Src = bi.Registers.Src()
+	ins.Offset = bi.Offset
+	ins.Constant = int64(bi.Constant)
+
+	if !bi.OpCode.isDWordLoad() {
+		return InstructionSize, nil
+	}
+
+	var bi2 bpfInstruction
+	if err := binary.Read(r, bo, &bi2); err != nil {
+		// No Wrap, to avoid io.EOF clash
+		return 0, errors.New("64bit immediate is missing second half")
+	}
+	if bi2.OpCode != 0 || bi2.Offset != 0 || bi2.Registers != 0 {
+		return 0, errors.New("64bit immediate has non-zero fields")
+	}
+	ins.Constant = int64(uint64(uint32(bi2.Constant))<<32 | uint64(uint32(bi.Constant)))
+
+	return 2 * InstructionSize, nil
+}
+
+// Marshal encodes a BPF instruction.
+func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error) {
+	if ins.OpCode == InvalidOpCode {
+		return 0, errors.New("invalid opcode")
+	}
+
+	isDWordLoad := ins.OpCode.isDWordLoad()
+
+	cons := int32(ins.Constant)
+	if isDWordLoad {
+		// Encode least significant 32bit first for 64bit operations.
+		cons = int32(uint32(ins.Constant))
+	}
+
+	bpfi := bpfInstruction{
+		ins.OpCode,
+		newBPFRegisters(ins.Dst, ins.Src),
+		ins.Offset,
+		cons,
+	}
+
+	if err := binary.Write(w, bo, &bpfi); err != nil {
+		return 0, err
+	}
+
+	if !isDWordLoad {
+		return InstructionSize, nil
+	}
+
+	bpfi = bpfInstruction{
+		Constant: int32(ins.Constant >> 32),
+	}
+
+	if err := binary.Write(w, bo, &bpfi); err != nil {
+		return 0, err
+	}
+
+	return 2 * InstructionSize, nil
+}
+
+// RewriteMapPtr changes an instruction to use a new map fd.
+//
+// Returns an error if the fd is invalid, or the instruction
+// is incorrect.
+func (ins *Instruction) RewriteMapPtr(fd int) error {
+	if !ins.OpCode.isDWordLoad() {
+		return errors.Errorf("%s is not a 64 bit load", ins.OpCode)
+	}
+
+	if fd < 0 {
+		return errors.New("invalid fd")
+	}
+
+	ins.Src = R1
+	ins.Constant = int64(fd)
+	return nil
+}
+
+// Format implements fmt.Formatter.
+func (ins Instruction) Format(f fmt.State, c rune) {
+	if c != 'v' {
+		fmt.Fprintf(f, "{UNRECOGNIZED: %c}", c)
+		return
+	}
+
+	op := ins.OpCode
+
+	if op == InvalidOpCode {
+		fmt.Fprint(f, "INVALID")
+		return
+	}
+
+	// Omit trailing space for Exit
+	if op.JumpOp() == Exit {
+		fmt.Fprint(f, op)
+		return
+	}
+
+	fmt.Fprintf(f, "%v ", op)
+	switch cls := op.Class(); cls {
+	case LdClass, LdXClass, StClass, StXClass:
+		switch op.Mode() {
+		case ImmMode:
+			fmt.Fprintf(f, "dst: %s imm: %d", ins.Dst, ins.Constant)
+		case AbsMode:
+			fmt.Fprintf(f, "imm: %d", ins.Constant)
+		case IndMode:
+			fmt.Fprintf(f, "dst: %s src: %s imm: %d", ins.Dst, ins.Src, ins.Constant)
+		case MemMode:
+			fmt.Fprintf(f, "dst: %s src: %s off: %d imm: %d", ins.Dst, ins.Src, ins.Offset, ins.Constant)
+		case XAddMode:
+			fmt.Fprintf(f, "dst: %s src: %s", ins.Dst, ins.Src)
+		}
+
+	case ALU64Class, ALUClass:
+		fmt.Fprintf(f, "dst: %s ", ins.Dst)
+		if op.ALUOp() == Swap || op.Source() == ImmSource {
+			fmt.Fprintf(f, "imm: %d", ins.Constant)
+		} else {
+			fmt.Fprintf(f, "src: %s", ins.Src)
+		}
+
+	case JumpClass:
+		switch jop := op.JumpOp(); jop {
+		case Call:
+			if ins.Src == R1 {
+				// bpf-to-bpf call
+				fmt.Fprint(f, ins.Constant)
+			} else {
+				fmt.Fprint(f, BuiltinFunc(ins.Constant))
+			}
+
+		default:
+			fmt.Fprintf(f, "dst: %s off: %d ", ins.Dst, ins.Offset)
+			if op.Source() == ImmSource {
+				fmt.Fprintf(f, "imm: %d", ins.Constant)
+			} else {
+				fmt.Fprintf(f, "src: %s", ins.Src)
+			}
+		}
+	}
+
+	if ins.Reference != "" {
+		fmt.Fprintf(f, " <%s>", ins.Reference)
+	}
+}
+
+// Instructions is an eBPF program.
+type Instructions []Instruction
+
+func (insns Instructions) String() string {
+	return fmt.Sprint(insns)
+}
+
+// RewriteMapPtr rewrites all loads of a specific map pointer to a new fd.
+//
+// Returns an error if the symbol isn't used, see IsUnreferencedSymbol.
+func (insns Instructions) RewriteMapPtr(symbol string, fd int) error {
+	if symbol == "" {
+		return errors.New("empty symbol")
+	}
+
+	found := false
+	for i := range insns {
+		ins := &insns[i]
+		if ins.Reference != symbol {
+			continue
+		}
+
+		if err := ins.RewriteMapPtr(fd); err != nil {
+			return err
+		}
+
+		found = true
+	}
+
+	if !found {
+		return &unreferencedSymbolError{symbol}
+	}
+
+	return nil
+}
+
+// SymbolOffsets returns the set of symbols and their offset in
+// the instructions.
+func (insns Instructions) SymbolOffsets() (map[string]int, error) {
+	offsets := make(map[string]int)
+
+	for i, ins := range insns {
+		if ins.Symbol == "" {
+			continue
+		}
+
+		if _, ok := offsets[ins.Symbol]; ok {
+			return nil, errors.Errorf("duplicate symbol %s", ins.Symbol)
+		}
+
+		offsets[ins.Symbol] = i
+	}
+
+	return offsets, nil
+}
+
+// ReferenceOffsets returns the set of references and their offset in
+// the instructions.
+func (insns Instructions) ReferenceOffsets() map[string][]int {
+	offsets := make(map[string][]int)
+
+	for i, ins := range insns {
+		if ins.Reference == "" {
+			continue
+		}
+
+		offsets[ins.Reference] = append(offsets[ins.Reference], i)
+	}
+
+	return offsets
+}
+
+func (insns Instructions) marshalledOffsets() (map[string]int, error) {
+	symbols := make(map[string]int)
+
+	marshalledPos := 0
+	for _, ins := range insns {
+		currentPos := marshalledPos
+		marshalledPos += ins.OpCode.marshalledInstructions()
+
+		if ins.Symbol == "" {
+			continue
+		}
+
+		if _, ok := symbols[ins.Symbol]; ok {
+			return nil, errors.Errorf("duplicate symbol %s", ins.Symbol)
+		}
+
+		symbols[ins.Symbol] = currentPos
+	}
+
+	return symbols, nil
+}
+
+// Format implements fmt.Formatter.
+//
+// You can control indentation of symbols by
+// specifying a width. Setting a precision controls the indentation of
+// instructions.
+// The default character is a tab, which can be overriden by specifying
+// the ' ' space flag.
+func (insns Instructions) Format(f fmt.State, c rune) {
+	if c != 's' && c != 'v' {
+		fmt.Fprintf(f, "{UNKNOWN FORMAT '%c'}", c)
+		return
+	}
+
+	// Precision is better in this case, because it allows
+	// specifying 0 padding easily.
+	padding, ok := f.Precision()
+	if !ok {
+		padding = 1
+	}
+
+	indent := strings.Repeat("\t", padding)
+	if f.Flag(' ') {
+		indent = strings.Repeat(" ", padding)
+	}
+
+	symPadding, ok := f.Width()
+	if !ok {
+		symPadding = padding - 1
+	}
+	if symPadding < 0 {
+		symPadding = 0
+	}
+
+	symIndent := strings.Repeat("\t", symPadding)
+	if f.Flag(' ') {
+		symIndent = strings.Repeat(" ", symPadding)
+	}
+
+	// Figure out how many digits we need to represent the highest
+	// offset.
+	highestOffset := 0
+	for _, ins := range insns {
+		highestOffset += ins.OpCode.marshalledInstructions()
+	}
+	offsetWidth := int(math.Ceil(math.Log10(float64(highestOffset))))
+
+	offset := 0
+	for _, ins := range insns {
+		if ins.Symbol != "" {
+			fmt.Fprintf(f, "%s%s:\n", symIndent, ins.Symbol)
+		}
+		fmt.Fprintf(f, "%s%*d: %v\n", indent, offsetWidth, offset, ins)
+		offset += ins.OpCode.marshalledInstructions()
+	}
+
+	return
+}
+
+// Marshal encodes a BPF program into the kernel format.
+func (insns Instructions) Marshal(w io.Writer, bo binary.ByteOrder) error {
+	absoluteOffsets, err := insns.marshalledOffsets()
+	if err != nil {
+		return err
+	}
+
+	num := 0
+	for i, ins := range insns {
+		switch {
+		case ins.OpCode.JumpOp() == Call && ins.Constant == -1:
+			// Rewrite bpf to bpf call
+			offset, ok := absoluteOffsets[ins.Reference]
+			if !ok {
+				return errors.Errorf("instruction %d: reference to missing symbol %s", i, ins.Reference)
+			}
+
+			ins.Constant = int64(offset - num - 1)
+
+		case ins.OpCode.Class() == JumpClass && ins.Offset == -1:
+			// Rewrite jump to label
+			offset, ok := absoluteOffsets[ins.Reference]
+			if !ok {
+				return errors.Errorf("instruction %d: reference to missing symbol %s", i, ins.Reference)
+			}
+
+			ins.Offset = int16(offset - num - 1)
+		}
+
+		n, err := ins.Marshal(w, bo)
+		if err != nil {
+			return errors.Wrapf(err, "instruction %d", i)
+		}
+
+		num += int(n / InstructionSize)
+	}
+	return nil
+}
+
+type bpfInstruction struct {
+	OpCode    OpCode
+	Registers bpfRegisters
+	Offset    int16
+	Constant  int32
+}
+
+type bpfRegisters uint8
+
+func newBPFRegisters(dst, src Register) bpfRegisters {
+	return bpfRegisters((src << 4) | (dst & 0xF))
+}
+
+func (r bpfRegisters) Dst() Register {
+	return Register(r & 0xF)
+}
+
+func (r bpfRegisters) Src() Register {
+	return Register(r >> 4)
+}
+
+type unreferencedSymbolError struct {
+	symbol string
+}
+
+func (use *unreferencedSymbolError) Error() string {
+	return fmt.Sprintf("unreferenced symbol %s", use.symbol)
+}
+
+// IsUnreferencedSymbol returns true if err was caused by
+// an unreferenced symbol.
+func IsUnreferencedSymbol(err error) bool {
+	_, ok := err.(*unreferencedSymbolError)
+	return ok
+}

+ 109 - 0
vendor/github.com/cilium/ebpf/asm/jump.go

@@ -0,0 +1,109 @@
+package asm
+
+//go:generate stringer -output jump_string.go -type=JumpOp
+
+// JumpOp affect control flow.
+//
+//    msb      lsb
+//    +----+-+---+
+//    |OP  |s|cls|
+//    +----+-+---+
+type JumpOp uint8
+
+const jumpMask OpCode = aluMask
+
+const (
+	// InvalidJumpOp is returned by getters when invoked
+	// on non branch OpCodes
+	InvalidJumpOp JumpOp = 0xff
+	// Ja jumps by offset unconditionally
+	Ja JumpOp = 0x00
+	// JEq jumps by offset if r == imm
+	JEq JumpOp = 0x10
+	// JGT jumps by offset if r > imm
+	JGT JumpOp = 0x20
+	// JGE jumps by offset if r >= imm
+	JGE JumpOp = 0x30
+	// JSet jumps by offset if r & imm
+	JSet JumpOp = 0x40
+	// JNE jumps by offset if r != imm
+	JNE JumpOp = 0x50
+	// JSGT jumps by offset if signed r > signed imm
+	JSGT JumpOp = 0x60
+	// JSGE jumps by offset if signed r >= signed imm
+	JSGE JumpOp = 0x70
+	// Call builtin or user defined function from imm
+	Call JumpOp = 0x80
+	// Exit ends execution, with value in r0
+	Exit JumpOp = 0x90
+	// JLT jumps by offset if r < imm
+	JLT JumpOp = 0xa0
+	// JLE jumps by offset if r <= imm
+	JLE JumpOp = 0xb0
+	// JSLT jumps by offset if signed r < signed imm
+	JSLT JumpOp = 0xc0
+	// JSLE jumps by offset if signed r <= signed imm
+	JSLE JumpOp = 0xd0
+)
+
+// Return emits an exit instruction.
+//
+// Requires a return value in R0.
+func Return() Instruction {
+	return Instruction{
+		OpCode: OpCode(JumpClass).SetJumpOp(Exit),
+	}
+}
+
+// Op returns the OpCode for a given jump source.
+func (op JumpOp) Op(source Source) OpCode {
+	return OpCode(JumpClass).SetJumpOp(op).SetSource(source)
+}
+
+// Imm compares dst to value, and adjusts PC by offset if the condition is fulfilled.
+func (op JumpOp) Imm(dst Register, value int32, label string) Instruction {
+	if op == Exit || op == Call || op == Ja {
+		return Instruction{OpCode: InvalidOpCode}
+	}
+
+	return Instruction{
+		OpCode:    OpCode(JumpClass).SetJumpOp(op).SetSource(ImmSource),
+		Dst:       dst,
+		Offset:    -1,
+		Constant:  int64(value),
+		Reference: label,
+	}
+}
+
+// Reg compares dst to src, and adjusts PC by offset if the condition is fulfilled.
+func (op JumpOp) Reg(dst, src Register, label string) Instruction {
+	if op == Exit || op == Call || op == Ja {
+		return Instruction{OpCode: InvalidOpCode}
+	}
+
+	return Instruction{
+		OpCode:    OpCode(JumpClass).SetJumpOp(op).SetSource(RegSource),
+		Dst:       dst,
+		Src:       src,
+		Offset:    -1,
+		Reference: label,
+	}
+}
+
+// Label adjusts PC to the address of the label.
+func (op JumpOp) Label(label string) Instruction {
+	if op == Call {
+		return Instruction{
+			OpCode:    OpCode(JumpClass).SetJumpOp(Call),
+			Src:       R1,
+			Constant:  -1,
+			Reference: label,
+		}
+	}
+
+	return Instruction{
+		OpCode:    OpCode(JumpClass).SetJumpOp(op),
+		Offset:    -1,
+		Reference: label,
+	}
+}

+ 53 - 0
vendor/github.com/cilium/ebpf/asm/jump_string.go

@@ -0,0 +1,53 @@
+// Code generated by "stringer -output jump_string.go -type=JumpOp"; DO NOT EDIT.
+
+package asm
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[InvalidJumpOp-255]
+	_ = x[Ja-0]
+	_ = x[JEq-16]
+	_ = x[JGT-32]
+	_ = x[JGE-48]
+	_ = x[JSet-64]
+	_ = x[JNE-80]
+	_ = x[JSGT-96]
+	_ = x[JSGE-112]
+	_ = x[Call-128]
+	_ = x[Exit-144]
+	_ = x[JLT-160]
+	_ = x[JLE-176]
+	_ = x[JSLT-192]
+	_ = x[JSLE-208]
+}
+
+const _JumpOp_name = "JaJEqJGTJGEJSetJNEJSGTJSGECallExitJLTJLEJSLTJSLEInvalidJumpOp"
+
+var _JumpOp_map = map[JumpOp]string{
+	0:   _JumpOp_name[0:2],
+	16:  _JumpOp_name[2:5],
+	32:  _JumpOp_name[5:8],
+	48:  _JumpOp_name[8:11],
+	64:  _JumpOp_name[11:15],
+	80:  _JumpOp_name[15:18],
+	96:  _JumpOp_name[18:22],
+	112: _JumpOp_name[22:26],
+	128: _JumpOp_name[26:30],
+	144: _JumpOp_name[30:34],
+	160: _JumpOp_name[34:37],
+	176: _JumpOp_name[37:40],
+	192: _JumpOp_name[40:44],
+	208: _JumpOp_name[44:48],
+	255: _JumpOp_name[48:61],
+}
+
+func (i JumpOp) String() string {
+	if str, ok := _JumpOp_map[i]; ok {
+		return str
+	}
+	return "JumpOp(" + strconv.FormatInt(int64(i), 10) + ")"
+}

+ 189 - 0
vendor/github.com/cilium/ebpf/asm/load_store.go

@@ -0,0 +1,189 @@
+package asm
+
+//go:generate stringer -output load_store_string.go -type=Mode,Size
+
+// Mode for load and store operations
+//
+//    msb      lsb
+//    +---+--+---+
+//    |MDE|sz|cls|
+//    +---+--+---+
+type Mode uint8
+
+const modeMask OpCode = 0xe0
+
+const (
+	// InvalidMode is returned by getters when invoked
+	// on non load / store OpCodes
+	InvalidMode Mode = 0xff
+	// ImmMode - immediate value
+	ImmMode Mode = 0x00
+	// AbsMode - immediate value + offset
+	AbsMode Mode = 0x20
+	// IndMode - indirect (imm+src)
+	IndMode Mode = 0x40
+	// MemMode - load from memory
+	MemMode Mode = 0x60
+	// XAddMode - add atomically across processors.
+	XAddMode Mode = 0xc0
+)
+
+// Size of load and store operations
+//
+//    msb      lsb
+//    +---+--+---+
+//    |mde|SZ|cls|
+//    +---+--+---+
+type Size uint8
+
+const sizeMask OpCode = 0x18
+
+const (
+	// InvalidSize is returned by getters when invoked
+	// on non load / store OpCodes
+	InvalidSize Size = 0xff
+	// DWord - double word; 64 bits
+	DWord Size = 0x18
+	// Word - word; 32 bits
+	Word Size = 0x00
+	// Half - half-word; 16 bits
+	Half Size = 0x08
+	// Byte - byte; 8 bits
+	Byte Size = 0x10
+)
+
+// Sizeof returns the size in bytes.
+func (s Size) Sizeof() int {
+	switch s {
+	case DWord:
+		return 8
+	case Word:
+		return 4
+	case Half:
+		return 2
+	case Byte:
+		return 1
+	default:
+		return -1
+	}
+}
+
+// LoadMemOp returns the OpCode to load a value of given size from memory.
+func LoadMemOp(size Size) OpCode {
+	return OpCode(LdXClass).SetMode(MemMode).SetSize(size)
+}
+
+// LoadMem emits `dst = *(size *)(src + offset)`.
+func LoadMem(dst, src Register, offset int16, size Size) Instruction {
+	return Instruction{
+		OpCode: LoadMemOp(size),
+		Dst:    dst,
+		Src:    src,
+		Offset: offset,
+	}
+}
+
+// LoadImmOp returns the OpCode to load an immediate of given size.
+//
+// As of kernel 4.20, only DWord size is accepted.
+func LoadImmOp(size Size) OpCode {
+	return OpCode(LdClass).SetMode(ImmMode).SetSize(size)
+}
+
+// LoadImm emits `dst = (size)value`.
+//
+// As of kernel 4.20, only DWord size is accepted.
+func LoadImm(dst Register, value int64, size Size) Instruction {
+	return Instruction{
+		OpCode:   LoadImmOp(size),
+		Dst:      dst,
+		Constant: value,
+	}
+}
+
+// LoadMapPtr stores a pointer to a map in dst.
+func LoadMapPtr(dst Register, fd int) Instruction {
+	if fd < 0 {
+		return Instruction{OpCode: InvalidOpCode}
+	}
+
+	return Instruction{
+		OpCode:   LoadImmOp(DWord),
+		Dst:      dst,
+		Src:      R1,
+		Constant: int64(fd),
+	}
+}
+
+// LoadIndOp returns the OpCode for loading a value of given size from an sk_buff.
+func LoadIndOp(size Size) OpCode {
+	return OpCode(LdClass).SetMode(IndMode).SetSize(size)
+}
+
+// LoadInd emits `dst = ntoh(*(size *)(((sk_buff *)R6)->data + src + offset))`.
+func LoadInd(dst, src Register, offset int32, size Size) Instruction {
+	return Instruction{
+		OpCode:   LoadIndOp(size),
+		Dst:      dst,
+		Src:      src,
+		Constant: int64(offset),
+	}
+}
+
+// LoadAbsOp returns the OpCode for loading a value of given size from an sk_buff.
+func LoadAbsOp(size Size) OpCode {
+	return OpCode(LdClass).SetMode(AbsMode).SetSize(size)
+}
+
+// LoadAbs emits `r0 = ntoh(*(size *)(((sk_buff *)R6)->data + offset))`.
+func LoadAbs(offset int32, size Size) Instruction {
+	return Instruction{
+		OpCode:   LoadAbsOp(size),
+		Dst:      R0,
+		Constant: int64(offset),
+	}
+}
+
+// StoreMemOp returns the OpCode for storing a register of given size in memory.
+func StoreMemOp(size Size) OpCode {
+	return OpCode(StXClass).SetMode(MemMode).SetSize(size)
+}
+
+// StoreMem emits `*(size *)(dst + offset) = src`
+func StoreMem(dst Register, offset int16, src Register, size Size) Instruction {
+	return Instruction{
+		OpCode: StoreMemOp(size),
+		Dst:    dst,
+		Src:    src,
+		Offset: offset,
+	}
+}
+
+// StoreImmOp returns the OpCode for storing an immediate of given size in memory.
+func StoreImmOp(size Size) OpCode {
+	return OpCode(StClass).SetMode(MemMode).SetSize(size)
+}
+
+// StoreImm emits `*(size *)(dst + offset) = value`.
+func StoreImm(dst Register, offset int16, value int64, size Size) Instruction {
+	return Instruction{
+		OpCode:   StoreImmOp(size),
+		Dst:      dst,
+		Offset:   offset,
+		Constant: value,
+	}
+}
+
+// StoreXAddOp returns the OpCode to atomically add a register to a value in memory.
+func StoreXAddOp(size Size) OpCode {
+	return OpCode(StXClass).SetMode(XAddMode).SetSize(size)
+}
+
+// StoreXAdd atomically adds src to *dst.
+func StoreXAdd(dst, src Register, size Size) Instruction {
+	return Instruction{
+		OpCode: StoreXAddOp(size),
+		Dst:    dst,
+		Src:    src,
+	}
+}

+ 80 - 0
vendor/github.com/cilium/ebpf/asm/load_store_string.go

@@ -0,0 +1,80 @@
+// Code generated by "stringer -output load_store_string.go -type=Mode,Size"; DO NOT EDIT.
+
+package asm
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[InvalidMode-255]
+	_ = x[ImmMode-0]
+	_ = x[AbsMode-32]
+	_ = x[IndMode-64]
+	_ = x[MemMode-96]
+	_ = x[XAddMode-192]
+}
+
+const (
+	_Mode_name_0 = "ImmMode"
+	_Mode_name_1 = "AbsMode"
+	_Mode_name_2 = "IndMode"
+	_Mode_name_3 = "MemMode"
+	_Mode_name_4 = "XAddMode"
+	_Mode_name_5 = "InvalidMode"
+)
+
+func (i Mode) String() string {
+	switch {
+	case i == 0:
+		return _Mode_name_0
+	case i == 32:
+		return _Mode_name_1
+	case i == 64:
+		return _Mode_name_2
+	case i == 96:
+		return _Mode_name_3
+	case i == 192:
+		return _Mode_name_4
+	case i == 255:
+		return _Mode_name_5
+	default:
+		return "Mode(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+}
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[InvalidSize-255]
+	_ = x[DWord-24]
+	_ = x[Word-0]
+	_ = x[Half-8]
+	_ = x[Byte-16]
+}
+
+const (
+	_Size_name_0 = "Word"
+	_Size_name_1 = "Half"
+	_Size_name_2 = "Byte"
+	_Size_name_3 = "DWord"
+	_Size_name_4 = "InvalidSize"
+)
+
+func (i Size) String() string {
+	switch {
+	case i == 0:
+		return _Size_name_0
+	case i == 8:
+		return _Size_name_1
+	case i == 16:
+		return _Size_name_2
+	case i == 24:
+		return _Size_name_3
+	case i == 255:
+		return _Size_name_4
+	default:
+		return "Size(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+}

+ 237 - 0
vendor/github.com/cilium/ebpf/asm/opcode.go

@@ -0,0 +1,237 @@
+package asm
+
+import (
+	"fmt"
+	"strings"
+)
+
+//go:generate stringer -output opcode_string.go -type=Class
+
+type encoding int
+
+const (
+	unknownEncoding encoding = iota
+	loadOrStore
+	jumpOrALU
+)
+
+// Class of operations
+//
+//    msb      lsb
+//    +---+--+---+
+//    |  ??  |CLS|
+//    +---+--+---+
+type Class uint8
+
+const classMask OpCode = 0x07
+
+const (
+	// LdClass load memory
+	LdClass Class = 0x00
+	// LdXClass load memory from constant
+	LdXClass Class = 0x01
+	// StClass load register from memory
+	StClass Class = 0x02
+	// StXClass load register from constant
+	StXClass Class = 0x03
+	// ALUClass arithmetic operators
+	ALUClass Class = 0x04
+	// JumpClass jump operators
+	JumpClass Class = 0x05
+	// ALU64Class arithmetic in 64 bit mode
+	ALU64Class Class = 0x07
+)
+
+func (cls Class) encoding() encoding {
+	switch cls {
+	case LdClass, LdXClass, StClass, StXClass:
+		return loadOrStore
+	case ALU64Class, ALUClass, JumpClass:
+		return jumpOrALU
+	default:
+		return unknownEncoding
+	}
+}
+
+// OpCode is a packed eBPF opcode.
+//
+// Its encoding is defined by a Class value:
+//
+//    msb      lsb
+//    +----+-+---+
+//    | ???? |CLS|
+//    +----+-+---+
+type OpCode uint8
+
+// InvalidOpCode is returned by setters on OpCode
+const InvalidOpCode OpCode = 0xff
+
+// marshalledInstructions returns the number of BPF instructions required
+// to encode this opcode.
+func (op OpCode) marshalledInstructions() int {
+	if op == LoadImmOp(DWord) {
+		return 2
+	}
+	return 1
+}
+
+func (op OpCode) isDWordLoad() bool {
+	return op == LoadImmOp(DWord)
+}
+
+// Class returns the class of operation.
+func (op OpCode) Class() Class {
+	return Class(op & classMask)
+}
+
+// Mode returns the mode for load and store operations.
+func (op OpCode) Mode() Mode {
+	if op.Class().encoding() != loadOrStore {
+		return InvalidMode
+	}
+	return Mode(op & modeMask)
+}
+
+// Size returns the size for load and store operations.
+func (op OpCode) Size() Size {
+	if op.Class().encoding() != loadOrStore {
+		return InvalidSize
+	}
+	return Size(op & sizeMask)
+}
+
+// Source returns the source for branch and ALU operations.
+func (op OpCode) Source() Source {
+	if op.Class().encoding() != jumpOrALU || op.ALUOp() == Swap {
+		return InvalidSource
+	}
+	return Source(op & sourceMask)
+}
+
+// ALUOp returns the ALUOp.
+func (op OpCode) ALUOp() ALUOp {
+	if op.Class().encoding() != jumpOrALU {
+		return InvalidALUOp
+	}
+	return ALUOp(op & aluMask)
+}
+
+// Endianness returns the Endianness for a byte swap instruction.
+func (op OpCode) Endianness() Endianness {
+	if op.ALUOp() != Swap {
+		return InvalidEndian
+	}
+	return Endianness(op & endianMask)
+}
+
+// JumpOp returns the JumpOp.
+func (op OpCode) JumpOp() JumpOp {
+	if op.Class().encoding() != jumpOrALU {
+		return InvalidJumpOp
+	}
+	return JumpOp(op & jumpMask)
+}
+
+// SetMode sets the mode on load and store operations.
+//
+// Returns InvalidOpCode if op is of the wrong class.
+func (op OpCode) SetMode(mode Mode) OpCode {
+	if op.Class().encoding() != loadOrStore || !valid(OpCode(mode), modeMask) {
+		return InvalidOpCode
+	}
+	return (op & ^modeMask) | OpCode(mode)
+}
+
+// SetSize sets the size on load and store operations.
+//
+// Returns InvalidOpCode if op is of the wrong class.
+func (op OpCode) SetSize(size Size) OpCode {
+	if op.Class().encoding() != loadOrStore || !valid(OpCode(size), sizeMask) {
+		return InvalidOpCode
+	}
+	return (op & ^sizeMask) | OpCode(size)
+}
+
+// SetSource sets the source on jump and ALU operations.
+//
+// Returns InvalidOpCode if op is of the wrong class.
+func (op OpCode) SetSource(source Source) OpCode {
+	if op.Class().encoding() != jumpOrALU || !valid(OpCode(source), sourceMask) {
+		return InvalidOpCode
+	}
+	return (op & ^sourceMask) | OpCode(source)
+}
+
+// SetALUOp sets the ALUOp on ALU operations.
+//
+// Returns InvalidOpCode if op is of the wrong class.
+func (op OpCode) SetALUOp(alu ALUOp) OpCode {
+	class := op.Class()
+	if (class != ALUClass && class != ALU64Class) || !valid(OpCode(alu), aluMask) {
+		return InvalidOpCode
+	}
+	return (op & ^aluMask) | OpCode(alu)
+}
+
+// SetJumpOp sets the JumpOp on jump operations.
+//
+// Returns InvalidOpCode if op is of the wrong class.
+func (op OpCode) SetJumpOp(jump JumpOp) OpCode {
+	if op.Class() != JumpClass || !valid(OpCode(jump), jumpMask) {
+		return InvalidOpCode
+	}
+	return (op & ^jumpMask) | OpCode(jump)
+}
+
+func (op OpCode) String() string {
+	var f strings.Builder
+
+	switch class := op.Class(); class {
+	case LdClass, LdXClass, StClass, StXClass:
+		f.WriteString(strings.TrimSuffix(class.String(), "Class"))
+
+		mode := op.Mode()
+		f.WriteString(strings.TrimSuffix(mode.String(), "Mode"))
+
+		switch op.Size() {
+		case DWord:
+			f.WriteString("DW")
+		case Word:
+			f.WriteString("W")
+		case Half:
+			f.WriteString("H")
+		case Byte:
+			f.WriteString("B")
+		}
+
+	case ALU64Class, ALUClass:
+		f.WriteString(op.ALUOp().String())
+
+		if op.ALUOp() == Swap {
+			// Width for Endian is controlled by Constant
+			f.WriteString(op.Endianness().String())
+		} else {
+			if class == ALUClass {
+				f.WriteString("32")
+			}
+
+			f.WriteString(strings.TrimSuffix(op.Source().String(), "Source"))
+		}
+
+	case JumpClass:
+		f.WriteString(op.JumpOp().String())
+		if jop := op.JumpOp(); jop != Exit && jop != Call {
+			f.WriteString(strings.TrimSuffix(op.Source().String(), "Source"))
+		}
+
+	default:
+		fmt.Fprintf(&f, "%#x", op)
+	}
+
+	return f.String()
+}
+
+// valid returns true if all bits in value are covered by mask.
+func valid(value, mask OpCode) bool {
+	return value & ^mask == 0
+}

+ 38 - 0
vendor/github.com/cilium/ebpf/asm/opcode_string.go

@@ -0,0 +1,38 @@
+// Code generated by "stringer -output opcode_string.go -type=Class"; DO NOT EDIT.
+
+package asm
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[LdClass-0]
+	_ = x[LdXClass-1]
+	_ = x[StClass-2]
+	_ = x[StXClass-3]
+	_ = x[ALUClass-4]
+	_ = x[JumpClass-5]
+	_ = x[ALU64Class-7]
+}
+
+const (
+	_Class_name_0 = "LdClassLdXClassStClassStXClassALUClassJumpClass"
+	_Class_name_1 = "ALU64Class"
+)
+
+var (
+	_Class_index_0 = [...]uint8{0, 7, 15, 22, 30, 38, 47}
+)
+
+func (i Class) String() string {
+	switch {
+	case 0 <= i && i <= 5:
+		return _Class_name_0[_Class_index_0[i]:_Class_index_0[i+1]]
+	case i == 7:
+		return _Class_name_1
+	default:
+		return "Class(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+}

+ 42 - 0
vendor/github.com/cilium/ebpf/asm/register.go

@@ -0,0 +1,42 @@
+package asm
+
+import (
+	"fmt"
+)
+
+// Register is the source or destination of most operations.
+type Register uint8
+
+// R0 contains return values.
+const R0 Register = 0
+
+// Registers for function arguments.
+const (
+	R1 Register = R0 + 1 + iota
+	R2
+	R3
+	R4
+	R5
+)
+
+// Callee saved registers preserved by function calls.
+const (
+	R6 Register = R5 + 1 + iota
+	R7
+	R8
+	R9
+)
+
+// Read-only frame pointer to access stack.
+const (
+	R10 Register = R9 + 1
+	RFP          = R10
+)
+
+func (r Register) String() string {
+	v := uint8(r)
+	if v == 10 {
+		return "rfp"
+	}
+	return fmt.Sprintf("r%d", v)
+}

+ 148 - 0
vendor/github.com/cilium/ebpf/collection.go

@@ -0,0 +1,148 @@
+package ebpf
+
+import (
+	"github.com/cilium/ebpf/asm"
+	"github.com/pkg/errors"
+)
+
+// CollectionOptions control loading a collection into the kernel.
+type CollectionOptions struct {
+	Programs ProgramOptions
+}
+
+// CollectionSpec describes a collection.
+type CollectionSpec struct {
+	Maps     map[string]*MapSpec
+	Programs map[string]*ProgramSpec
+}
+
+// Copy returns a recursive copy of the spec.
+func (cs *CollectionSpec) Copy() *CollectionSpec {
+	if cs == nil {
+		return nil
+	}
+
+	cpy := CollectionSpec{
+		Maps:     make(map[string]*MapSpec, len(cs.Maps)),
+		Programs: make(map[string]*ProgramSpec, len(cs.Programs)),
+	}
+
+	for name, spec := range cs.Maps {
+		cpy.Maps[name] = spec.Copy()
+	}
+
+	for name, spec := range cs.Programs {
+		cpy.Programs[name] = spec.Copy()
+	}
+
+	return &cpy
+}
+
+// Collection is a collection of Programs and Maps associated
+// with their symbols
+type Collection struct {
+	Programs map[string]*Program
+	Maps     map[string]*Map
+}
+
+// NewCollection creates a Collection from a specification.
+//
+// Only maps referenced by at least one of the programs are initialized.
+func NewCollection(spec *CollectionSpec) (*Collection, error) {
+	return NewCollectionWithOptions(spec, CollectionOptions{})
+}
+
+// NewCollectionWithOptions creates a Collection from a specification.
+//
+// Only maps referenced by at least one of the programs are initialized.
+func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Collection, error) {
+	maps := make(map[string]*Map)
+	for mapName, mapSpec := range spec.Maps {
+		m, err := NewMap(mapSpec)
+		if err != nil {
+			return nil, errors.Wrapf(err, "map %s", mapName)
+		}
+		maps[mapName] = m
+	}
+
+	progs := make(map[string]*Program)
+	for progName, origProgSpec := range spec.Programs {
+		progSpec := origProgSpec.Copy()
+
+		// Rewrite any reference to a valid map.
+		for i := range progSpec.Instructions {
+			var (
+				ins = &progSpec.Instructions[i]
+				m   = maps[ins.Reference]
+			)
+
+			if ins.Reference == "" || m == nil {
+				continue
+			}
+
+			if ins.Src == asm.R1 {
+				// Don't overwrite maps already rewritten, users can
+				// rewrite programs in the spec themselves
+				continue
+			}
+
+			if err := ins.RewriteMapPtr(m.FD()); err != nil {
+				return nil, errors.Wrapf(err, "progam %s: map %s", progName, ins.Reference)
+			}
+		}
+
+		prog, err := NewProgramWithOptions(progSpec, opts.Programs)
+		if err != nil {
+			return nil, errors.Wrapf(err, "program %s", progName)
+		}
+		progs[progName] = prog
+	}
+
+	return &Collection{
+		progs,
+		maps,
+	}, nil
+}
+
+// LoadCollection parses an object file and converts it to a collection.
+func LoadCollection(file string) (*Collection, error) {
+	spec, err := LoadCollectionSpec(file)
+	if err != nil {
+		return nil, err
+	}
+	return NewCollection(spec)
+}
+
+// Close frees all maps and programs associated with the collection.
+//
+// The collection mustn't be used afterwards.
+func (coll *Collection) Close() {
+	for _, prog := range coll.Programs {
+		prog.Close()
+	}
+	for _, m := range coll.Maps {
+		m.Close()
+	}
+}
+
+// DetachMap removes the named map from the Collection.
+//
+// This means that a later call to Close() will not affect this map.
+//
+// Returns nil if no map of that name exists.
+func (coll *Collection) DetachMap(name string) *Map {
+	m := coll.Maps[name]
+	delete(coll.Maps, name)
+	return m
+}
+
+// DetachProgram removes the named program from the Collection.
+//
+// This means that a later call to Close() will not affect this program.
+//
+// Returns nil if no program of that name exists.
+func (coll *Collection) DetachProgram(name string) *Program {
+	p := coll.Programs[name]
+	delete(coll.Programs, name)
+	return p
+}

+ 17 - 0
vendor/github.com/cilium/ebpf/doc.go

@@ -0,0 +1,17 @@
+// Package ebpf is a toolkit for working with eBPF programs.
+//
+// eBPF programs are small snippets of code which are executed directly
+// in a VM in the Linux kernel, which makes them very fast and flexible.
+// Many Linux subsystems now accept eBPF programs. This makes it possible
+// to implement highly application specific logic inside the kernel,
+// without having to modify the actual kernel itself.
+//
+// This package is designed for long-running processes which
+// want to use eBPF to implement part of their application logic. It has no
+// run-time dependencies outside of the library and the Linux kernel itself.
+// eBPF code should be compiled ahead of time using clang, and shipped with
+// your application as any other resource.
+//
+// This package doesn't include code required to attach eBPF to Linux
+// subsystems, since this varies per subsystem.
+package ebpf

+ 392 - 0
vendor/github.com/cilium/ebpf/elf_reader.go

@@ -0,0 +1,392 @@
+package ebpf
+
+import (
+	"bytes"
+	"debug/elf"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+
+	"github.com/cilium/ebpf/asm"
+
+	"github.com/pkg/errors"
+)
+
+type elfCode struct {
+	*elf.File
+	symbols           []elf.Symbol
+	symbolsPerSection map[elf.SectionIndex]map[uint64]string
+}
+
+// LoadCollectionSpec parses an ELF file into a CollectionSpec.
+func LoadCollectionSpec(file string) (*CollectionSpec, error) {
+	f, err := os.Open(file)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	spec, err := LoadCollectionSpecFromReader(f)
+	return spec, errors.Wrapf(err, "file %s", file)
+}
+
+// LoadCollectionSpecFromReader parses an ELF file into a CollectionSpec.
+func LoadCollectionSpecFromReader(code io.ReaderAt) (*CollectionSpec, error) {
+	f, err := elf.NewFile(code)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	symbols, err := f.Symbols()
+	if err != nil {
+		return nil, errors.Wrap(err, "load symbols")
+	}
+
+	ec := &elfCode{f, symbols, symbolsPerSection(symbols)}
+
+	var licenseSection, versionSection *elf.Section
+	progSections := make(map[elf.SectionIndex]*elf.Section)
+	relSections := make(map[elf.SectionIndex]*elf.Section)
+	mapSections := make(map[elf.SectionIndex]*elf.Section)
+	for i, sec := range ec.Sections {
+		switch {
+		case strings.HasPrefix(sec.Name, "license"):
+			licenseSection = sec
+		case strings.HasPrefix(sec.Name, "version"):
+			versionSection = sec
+		case strings.HasPrefix(sec.Name, "maps"):
+			mapSections[elf.SectionIndex(i)] = sec
+		case sec.Type == elf.SHT_REL:
+			if int(sec.Info) >= len(ec.Sections) {
+				return nil, errors.Errorf("found relocation section %v for missing section %v", i, sec.Info)
+			}
+
+			// Store relocations under the section index of the target
+			idx := elf.SectionIndex(sec.Info)
+			if relSections[idx] != nil {
+				return nil, errors.Errorf("section %d has multiple relocation sections", idx)
+			}
+			relSections[idx] = sec
+		case sec.Type == elf.SHT_PROGBITS && (sec.Flags&elf.SHF_EXECINSTR) != 0 && sec.Size > 0:
+			progSections[elf.SectionIndex(i)] = sec
+		}
+	}
+
+	license, err := loadLicense(licenseSection)
+	if err != nil {
+		return nil, errors.Wrap(err, "load license")
+	}
+
+	version, err := loadVersion(versionSection, ec.ByteOrder)
+	if err != nil {
+		return nil, errors.Wrap(err, "load version")
+	}
+
+	maps, err := ec.loadMaps(mapSections)
+	if err != nil {
+		return nil, errors.Wrap(err, "load maps")
+	}
+
+	progs, libs, err := ec.loadPrograms(progSections, relSections, license, version)
+	if err != nil {
+		return nil, errors.Wrap(err, "load programs")
+	}
+
+	if len(libs) > 0 {
+		for name, prog := range progs {
+			prog.Instructions, err = link(prog.Instructions, libs...)
+			if err != nil {
+				return nil, errors.Wrapf(err, "program %s", name)
+			}
+		}
+	}
+
+	return &CollectionSpec{maps, progs}, nil
+}
+
+func loadLicense(sec *elf.Section) (string, error) {
+	if sec == nil {
+		return "", errors.Errorf("missing license section")
+	}
+	data, err := sec.Data()
+	if err != nil {
+		return "", errors.Wrapf(err, "section %s", sec.Name)
+	}
+	return string(bytes.TrimRight(data, "\000")), nil
+}
+
+func loadVersion(sec *elf.Section, bo binary.ByteOrder) (uint32, error) {
+	if sec == nil {
+		return 0, nil
+	}
+
+	var version uint32
+	err := binary.Read(sec.Open(), bo, &version)
+	return version, errors.Wrapf(err, "section %s", sec.Name)
+}
+
+func (ec *elfCode) loadPrograms(progSections, relSections map[elf.SectionIndex]*elf.Section, license string, version uint32) (map[string]*ProgramSpec, []asm.Instructions, error) {
+	var (
+		progs = make(map[string]*ProgramSpec)
+		libs  []asm.Instructions
+	)
+	for idx, prog := range progSections {
+		syms := ec.symbolsPerSection[idx]
+		if len(syms) == 0 {
+			return nil, nil, errors.Errorf("section %v: missing symbols", prog.Name)
+		}
+
+		funcSym := syms[0]
+		if funcSym == "" {
+			return nil, nil, errors.Errorf("section %v: no label at start", prog.Name)
+		}
+
+		rels, err := ec.loadRelocations(relSections[idx])
+		if err != nil {
+			return nil, nil, errors.Wrapf(err, "program %s: can't load relocations", funcSym)
+		}
+
+		insns, err := ec.loadInstructions(prog, syms, rels)
+		if err != nil {
+			return nil, nil, errors.Wrapf(err, "program %s: can't unmarshal instructions", funcSym)
+		}
+
+		if progType, attachType := getProgType(prog.Name); progType == UnspecifiedProgram {
+			// There is no single name we can use for "library" sections,
+			// since they may contain multiple functions. We'll decode the
+			// labels they contain later on, and then link sections that way.
+			libs = append(libs, insns)
+		} else {
+			progs[funcSym] = &ProgramSpec{
+				Name:          funcSym,
+				Type:          progType,
+				AttachType:    attachType,
+				License:       license,
+				KernelVersion: version,
+				Instructions:  insns,
+			}
+		}
+	}
+	return progs, libs, nil
+}
+
+func (ec *elfCode) loadInstructions(section *elf.Section, symbols, relocations map[uint64]string) (asm.Instructions, error) {
+	var (
+		r      = section.Open()
+		insns  asm.Instructions
+		ins    asm.Instruction
+		offset uint64
+	)
+	for {
+		n, err := ins.Unmarshal(r, ec.ByteOrder)
+		if err == io.EOF {
+			return insns, nil
+		}
+		if err != nil {
+			return nil, errors.Wrapf(err, "offset %d", offset)
+		}
+
+		ins.Symbol = symbols[offset]
+		ins.Reference = relocations[offset]
+
+		insns = append(insns, ins)
+		offset += n
+	}
+}
+
+func (ec *elfCode) loadMaps(mapSections map[elf.SectionIndex]*elf.Section) (map[string]*MapSpec, error) {
+	var (
+		maps = make(map[string]*MapSpec)
+		b    = make([]byte, 1)
+	)
+	for idx, sec := range mapSections {
+		syms := ec.symbolsPerSection[idx]
+		if len(syms) == 0 {
+			return nil, errors.Errorf("section %v: no symbols", sec.Name)
+		}
+
+		if sec.Size%uint64(len(syms)) != 0 {
+			return nil, errors.Errorf("section %v: map descriptors are not of equal size", sec.Name)
+		}
+
+		var (
+			r    = sec.Open()
+			size = sec.Size / uint64(len(syms))
+		)
+		for i, offset := 0, uint64(0); i < len(syms); i, offset = i+1, offset+size {
+			mapSym := syms[offset]
+			if mapSym == "" {
+				fmt.Println(syms)
+				return nil, errors.Errorf("section %s: missing symbol for map at offset %d", sec.Name, offset)
+			}
+
+			if maps[mapSym] != nil {
+				return nil, errors.Errorf("section %v: map %v already exists", sec.Name, mapSym)
+			}
+
+			lr := io.LimitReader(r, int64(size))
+
+			var spec MapSpec
+			switch {
+			case binary.Read(lr, ec.ByteOrder, &spec.Type) != nil:
+				return nil, errors.Errorf("map %v: missing type", mapSym)
+			case binary.Read(lr, ec.ByteOrder, &spec.KeySize) != nil:
+				return nil, errors.Errorf("map %v: missing key size", mapSym)
+			case binary.Read(lr, ec.ByteOrder, &spec.ValueSize) != nil:
+				return nil, errors.Errorf("map %v: missing value size", mapSym)
+			case binary.Read(lr, ec.ByteOrder, &spec.MaxEntries) != nil:
+				return nil, errors.Errorf("map %v: missing max entries", mapSym)
+			case binary.Read(lr, ec.ByteOrder, &spec.Flags) != nil:
+				return nil, errors.Errorf("map %v: missing flags", mapSym)
+			}
+
+			for {
+				_, err := lr.Read(b)
+				if err == io.EOF {
+					break
+				}
+				if err != nil {
+					return nil, err
+				}
+				if b[0] != 0 {
+					return nil, errors.Errorf("map %v: unknown and non-zero fields in definition", mapSym)
+				}
+			}
+
+			maps[mapSym] = &spec
+		}
+	}
+	return maps, nil
+}
+
+func getProgType(v string) (ProgramType, AttachType) {
+	types := map[string]ProgramType{
+		// From https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/bpf/libbpf.c#n3568
+		"socket":         SocketFilter,
+		"seccomp":        SocketFilter,
+		"kprobe/":        Kprobe,
+		"kretprobe/":     Kprobe,
+		"tracepoint/":    TracePoint,
+		"xdp":            XDP,
+		"perf_event":     PerfEvent,
+		"sockops":        SockOps,
+		"sk_skb":         SkSKB,
+		"sk_msg":         SkMsg,
+		"lirc_mode2":     LircMode2,
+		"flow_dissector": FlowDissector,
+
+		"cgroup_skb/":       CGroupSKB,
+		"cgroup/dev":        CGroupDevice,
+		"cgroup/skb":        CGroupSKB,
+		"cgroup/sock":       CGroupSock,
+		"cgroup/post_bind":  CGroupSock,
+		"cgroup/bind":       CGroupSockAddr,
+		"cgroup/connect":    CGroupSockAddr,
+		"cgroup/sendmsg":    CGroupSockAddr,
+		"cgroup/recvmsg":    CGroupSockAddr,
+		"cgroup/sysctl":     CGroupSysctl,
+		"cgroup/getsockopt": CGroupSockopt,
+		"cgroup/setsockopt": CGroupSockopt,
+		"classifier":        SchedCLS,
+		"action":            SchedACT,
+	}
+	attachTypes := map[string]AttachType{
+		"cgroup_skb/ingress":    AttachCGroupInetIngress,
+		"cgroup_skb/egress":     AttachCGroupInetEgress,
+		"cgroup/sock":           AttachCGroupInetSockCreate,
+		"cgroup/post_bind4":     AttachCGroupInet4PostBind,
+		"cgroup/post_bind6":     AttachCGroupInet6PostBind,
+		"cgroup/dev":            AttachCGroupDevice,
+		"sockops":               AttachCGroupSockOps,
+		"sk_skb/stream_parser":  AttachSkSKBStreamParser,
+		"sk_skb/stream_verdict": AttachSkSKBStreamVerdict,
+		"sk_msg":                AttachSkSKBStreamVerdict,
+		"lirc_mode2":            AttachLircMode2,
+		"flow_dissector":        AttachFlowDissector,
+		"cgroup/bind4":          AttachCGroupInet4Bind,
+		"cgroup/bind6":          AttachCGroupInet6Bind,
+		"cgroup/connect4":       AttachCGroupInet4Connect,
+		"cgroup/connect6":       AttachCGroupInet6Connect,
+		"cgroup/sendmsg4":       AttachCGroupUDP4Sendmsg,
+		"cgroup/sendmsg6":       AttachCGroupUDP6Sendmsg,
+		"cgroup/recvmsg4":       AttachCGroupUDP4Recvmsg,
+		"cgroup/recvmsg6":       AttachCGroupUDP6Recvmsg,
+		"cgroup/sysctl":         AttachCGroupSysctl,
+		"cgroup/getsockopt":     AttachCGroupGetsockopt,
+		"cgroup/setsockopt":     AttachCGroupSetsockopt,
+	}
+	attachType := AttachNone
+	for k, t := range attachTypes {
+		if strings.HasPrefix(v, k) {
+			attachType = t
+		}
+	}
+
+	for k, t := range types {
+		if strings.HasPrefix(v, k) {
+			return t, attachType
+		}
+	}
+	return UnspecifiedProgram, AttachNone
+}
+
+func (ec *elfCode) loadRelocations(sec *elf.Section) (map[uint64]string, error) {
+	rels := make(map[uint64]string)
+	if sec == nil {
+		return rels, nil
+	}
+
+	if sec.Entsize < 16 {
+		return nil, errors.New("rels are less than 16 bytes")
+	}
+
+	r := sec.Open()
+	for off := uint64(0); off < sec.Size; off += sec.Entsize {
+		ent := io.LimitReader(r, int64(sec.Entsize))
+
+		var rel elf.Rel64
+		if binary.Read(ent, ec.ByteOrder, &rel) != nil {
+			return nil, errors.Errorf("can't parse relocation at offset %v", off)
+		}
+
+		symNo := int(elf.R_SYM64(rel.Info) - 1)
+		if symNo >= len(ec.symbols) {
+			return nil, errors.Errorf("relocation at offset %d: symbol %v doesnt exist", off, symNo)
+		}
+
+		rels[rel.Off] = ec.symbols[symNo].Name
+	}
+	return rels, nil
+}
+
+func symbolsPerSection(symbols []elf.Symbol) map[elf.SectionIndex]map[uint64]string {
+	result := make(map[elf.SectionIndex]map[uint64]string)
+	for i, sym := range symbols {
+		switch elf.ST_TYPE(sym.Info) {
+		case elf.STT_NOTYPE:
+			// Older versions of LLVM doesn't tag
+			// symbols correctly.
+			break
+		case elf.STT_OBJECT:
+			break
+		case elf.STT_FUNC:
+			break
+		default:
+			continue
+		}
+
+		if sym.Name == "" {
+			continue
+		}
+
+		idx := sym.Section
+		if _, ok := result[idx]; !ok {
+			result[idx] = make(map[uint64]string)
+		}
+		result[idx][sym.Value] = symbols[i].Name
+	}
+	return result
+}

+ 8 - 0
vendor/github.com/cilium/ebpf/go.mod

@@ -0,0 +1,8 @@
+module github.com/cilium/ebpf
+
+go 1.12
+
+require (
+	github.com/pkg/errors v0.8.1
+	golang.org/x/sys v0.0.0-20191022100944-742c48ecaeb7
+)

+ 64 - 0
vendor/github.com/cilium/ebpf/internal/cpu.go

@@ -0,0 +1,64 @@
+package internal
+
+import (
+	"fmt"
+	"os"
+	"sync"
+
+	"github.com/pkg/errors"
+)
+
+var sysCPU struct {
+	once sync.Once
+	err  error
+	num  int
+}
+
+// PossibleCPUs returns the max number of CPUs a system may possibly have
+// Logical CPU numbers must be of the form 0-n
+func PossibleCPUs() (int, error) {
+	sysCPU.once.Do(func() {
+		sysCPU.num, sysCPU.err = parseCPUs("/sys/devices/system/cpu/possible")
+	})
+
+	return sysCPU.num, sysCPU.err
+}
+
+var onlineCPU struct {
+	once sync.Once
+	err  error
+	num  int
+}
+
+// OnlineCPUs returns the number of currently online CPUs
+// Logical CPU numbers must be of the form 0-n
+func OnlineCPUs() (int, error) {
+	onlineCPU.once.Do(func() {
+		onlineCPU.num, onlineCPU.err = parseCPUs("/sys/devices/system/cpu/online")
+	})
+
+	return onlineCPU.num, onlineCPU.err
+}
+
+// parseCPUs parses the number of cpus from sysfs,
+// in the format of "/sys/devices/system/cpu/{possible,online,..}.
+// Logical CPU numbers must be of the form 0-n
+func parseCPUs(path string) (int, error) {
+	file, err := os.Open(path)
+	if err != nil {
+		return 0, err
+	}
+	defer file.Close()
+
+	var low, high int
+	n, _ := fmt.Fscanf(file, "%d-%d", &low, &high)
+	if n < 1 || low != 0 {
+		return 0, errors.Wrapf(err, "%s has unknown format", path)
+	}
+	if n == 1 {
+		high = low
+	}
+
+	// cpus is 0 indexed
+	return high + 1, nil
+}

+ 24 - 0
vendor/github.com/cilium/ebpf/internal/endian.go

@@ -0,0 +1,24 @@
+package internal
+
+import (
+	"encoding/binary"
+	"unsafe"
+)
+
+// NativeEndian is set to either binary.BigEndian or binary.LittleEndian,
+// depending on the host's endianness.
+var NativeEndian binary.ByteOrder
+
+func init() {
+	if isBigEndian() {
+		NativeEndian = binary.BigEndian
+	} else {
+		NativeEndian = binary.LittleEndian
+	}
+}
+
+func isBigEndian() (ret bool) {
+	i := int(0x1)
+	bs := (*[int(unsafe.Sizeof(i))]byte)(unsafe.Pointer(&i))
+	return bs[0] == 0
+}

+ 85 - 0
vendor/github.com/cilium/ebpf/internal/feature.go

@@ -0,0 +1,85 @@
+package internal
+
+import (
+	"fmt"
+	"sync"
+
+	"github.com/pkg/errors"
+)
+
+// UnsupportedFeatureError is returned by FeatureTest() functions.
+type UnsupportedFeatureError struct {
+	// The minimum Linux mainline version required for this feature.
+	// Used for the error string, and for sanity checking during testing.
+	MinimumVersion Version
+
+	// The name of the feature that isn't supported.
+	Name string
+}
+
+func (ufe *UnsupportedFeatureError) Error() string {
+	return fmt.Sprintf("%s not supported (requires >= %s)", ufe.Name, ufe.MinimumVersion)
+}
+
+// FeatureTest wraps a function so that it is run at most once.
+//
+// name should identify the tested feature, while version must be in the
+// form Major.Minor[.Patch].
+//
+// Returns a descriptive UnsupportedFeatureError if the feature is not available.
+func FeatureTest(name, version string, fn func() bool) func() error {
+	v, err := NewVersion(version)
+	if err != nil {
+		return func() error { return err }
+	}
+
+	var (
+		once   sync.Once
+		result error
+	)
+
+	return func() error {
+		once.Do(func() {
+			if !fn() {
+				result = &UnsupportedFeatureError{
+					MinimumVersion: v,
+					Name:           name,
+				}
+			}
+		})
+		return result
+	}
+}
+
+// A Version in the form Major.Minor.Patch.
+type Version [3]uint16
+
+// NewVersion creates a version from a string like "Major.Minor.Patch".
+//
+// Patch is optional.
+func NewVersion(ver string) (Version, error) {
+	var major, minor, patch uint16
+	n, _ := fmt.Sscanf(ver, "%d.%d.%d", &major, &minor, &patch)
+	if n < 2 {
+		return Version{}, errors.Errorf("invalid version: %s", ver)
+	}
+	return Version{major, minor, patch}, nil
+}
+
+func (v Version) String() string {
+	if v[2] == 0 {
+		return fmt.Sprintf("v%d.%d", v[0], v[1])
+	}
+	return fmt.Sprintf("v%d.%d.%d", v[0], v[1], v[2])
+}
+
+// Less returns true if the version is less than another version.
+func (v Version) Less(other Version) bool {
+	for i, a := range v {
+		if a == other[i] {
+			continue
+		}
+		return a < other[i]
+	}
+	return false
+}

+ 127 - 0
vendor/github.com/cilium/ebpf/internal/unix/types_linux.go

@@ -0,0 +1,127 @@
+// +build linux
+
+package unix
+
+import (
+	"syscall"
+
+	linux "golang.org/x/sys/unix"
+)
+
+const (
+	ENOENT                   = linux.ENOENT
+	EAGAIN                   = linux.EAGAIN
+	ENOSPC                   = linux.ENOSPC
+	EINVAL                   = linux.EINVAL
+	EPOLLIN                  = linux.EPOLLIN
+	BPF_OBJ_NAME_LEN         = linux.BPF_OBJ_NAME_LEN
+	BPF_TAG_SIZE             = linux.BPF_TAG_SIZE
+	SYS_BPF                  = linux.SYS_BPF
+	F_DUPFD_CLOEXEC          = linux.F_DUPFD_CLOEXEC
+	EPOLL_CTL_ADD            = linux.EPOLL_CTL_ADD
+	EPOLL_CLOEXEC            = linux.EPOLL_CLOEXEC
+	O_CLOEXEC                = linux.O_CLOEXEC
+	O_NONBLOCK               = linux.O_NONBLOCK
+	PROT_READ                = linux.PROT_READ
+	PROT_WRITE               = linux.PROT_WRITE
+	MAP_SHARED               = linux.MAP_SHARED
+	PERF_TYPE_SOFTWARE       = linux.PERF_TYPE_SOFTWARE
+	PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT
+	PerfBitWatermark         = linux.PerfBitWatermark
+	PERF_SAMPLE_RAW          = linux.PERF_SAMPLE_RAW
+	PERF_FLAG_FD_CLOEXEC     = linux.PERF_FLAG_FD_CLOEXEC
+	RLIM_INFINITY            = linux.RLIM_INFINITY
+)
+
+// Statfs_t is a wrapper
+type Statfs_t = linux.Statfs_t
+
+// Rlimit is a wrapper
+type Rlimit = linux.Rlimit
+
+// Setrlimit is a wrapper
+func Setrlimit(resource int, rlim *Rlimit) (err error) {
+	return linux.Setrlimit(resource, rlim)
+}
+
+// Syscall is a wrapper
+func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
+	return linux.Syscall(trap, a1, a2, a3)
+}
+
+// FcntlInt is a wrapper
+func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
+	return linux.FcntlInt(fd, cmd, arg)
+}
+
+// Statfs is a wrapper
+func Statfs(path string, buf *Statfs_t) (err error) {
+	return linux.Statfs(path, buf)
+}
+
+// Close is a wrapper
+func Close(fd int) (err error) {
+	return linux.Close(fd)
+}
+
+// EpollEvent is a wrapper
+type EpollEvent = linux.EpollEvent
+
+// EpollWait is a wrapper
+func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
+	return linux.EpollWait(epfd, events, msec)
+}
+
+// EpollCtl is a wrapper
+func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
+	return linux.EpollCtl(epfd, op, fd, event)
+}
+
+// Eventfd is a wrapper
+func Eventfd(initval uint, flags int) (fd int, err error) {
+	return linux.Eventfd(initval, flags)
+}
+
+// Write is a wrapper
+func Write(fd int, p []byte) (n int, err error) {
+	return linux.Write(fd, p)
+}
+
+// EpollCreate1 is a wrapper
+func EpollCreate1(flag int) (fd int, err error) {
+	return linux.EpollCreate1(flag)
+}
+
+// PerfEventMmapPage is a wrapper
+type PerfEventMmapPage linux.PerfEventMmapPage
+
+// SetNonblock is a wrapper
+func SetNonblock(fd int, nonblocking bool) (err error) {
+	return linux.SetNonblock(fd, nonblocking)
+}
+
+// Mmap is a wrapper
+func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
+	return linux.Mmap(fd, offset, length, prot, flags)
+}
+
+// Munmap is a wrapper
+func Munmap(b []byte) (err error) {
+	return linux.Munmap(b)
+}
+
+// PerfEventAttr is a wrapper
+type PerfEventAttr = linux.PerfEventAttr
+
+// PerfEventOpen is a wrapper
+func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
+	return linux.PerfEventOpen(attr, pid, cpu, groupFd, flags)
+}
+
+// Utsname is a wrapper
+type Utsname = linux.Utsname
+
+// Uname is a wrapper
+func Uname(buf *Utsname) (err error) {
+	return linux.Uname(buf)
+}

+ 193 - 0
vendor/github.com/cilium/ebpf/internal/unix/types_other.go

@@ -0,0 +1,193 @@
+// +build !linux
+
+package unix
+
+import (
+	"fmt"
+	"runtime"
+	"syscall"
+)
+
+var errNonLinux = fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
+
+const (
+	ENOENT                   = syscall.ENOENT
+	EAGAIN                   = syscall.EAGAIN
+	ENOSPC                   = syscall.ENOSPC
+	EINVAL                   = syscall.EINVAL
+	BPF_OBJ_NAME_LEN         = 0x10
+	BPF_TAG_SIZE             = 0x8
+	SYS_BPF                  = 321
+	F_DUPFD_CLOEXEC          = 0x406
+	EPOLLIN                  = 0x1
+	EPOLL_CTL_ADD            = 0x1
+	EPOLL_CLOEXEC            = 0x80000
+	O_CLOEXEC                = 0x80000
+	O_NONBLOCK               = 0x800
+	PROT_READ                = 0x1
+	PROT_WRITE               = 0x2
+	MAP_SHARED               = 0x1
+	PERF_TYPE_SOFTWARE       = 0x1
+	PERF_COUNT_SW_BPF_OUTPUT = 0xa
+	PerfBitWatermark         = 0x4000
+	PERF_SAMPLE_RAW          = 0x400
+	PERF_FLAG_FD_CLOEXEC     = 0x8
+)
+
+// Statfs_t is a wrapper
+type Statfs_t struct {
+	Type    int64
+	Bsize   int64
+	Blocks  uint64
+	Bfree   uint64
+	Bavail  uint64
+	Files   uint64
+	Ffree   uint64
+	Fsid    [2]int32
+	Namelen int64
+	Frsize  int64
+	Flags   int64
+	Spare   [4]int64
+}
+
+// Rlimit is a wrapper
+type Rlimit struct {
+	Cur uint64
+	Max uint64
+}
+
+// Setrlimit is a wrapper
+func Setrlimit(resource int, rlim *Rlimit) (err error) {
+	return errNonLinux
+}
+
+// Syscall is a wrapper
+func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
+	return 0, 0, syscall.Errno(1)
+}
+
+// FcntlInt is a wrapper
+func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
+	return -1, errNonLinux
+}
+
+// Statfs is a wrapper
+func Statfs(path string, buf *Statfs_t) error {
+	return errNonLinux
+}
+
+// Close is a wrapper
+func Close(fd int) (err error) {
+	return errNonLinux
+}
+
+// EpollEvent is a wrapper
+type EpollEvent struct {
+	Events uint32
+	Fd     int32
+	Pad    int32
+}
+
+// EpollWait is a wrapper
+func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
+	return 0, errNonLinux
+}
+
+// EpollCtl is a wrapper
+func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
+	return errNonLinux
+}
+
+// Eventfd is a wrapper
+func Eventfd(initval uint, flags int) (fd int, err error) {
+	return 0, errNonLinux
+}
+
+// Write is a wrapper
+func Write(fd int, p []byte) (n int, err error) {
+	return 0, errNonLinux
+}
+
+// EpollCreate1 is a wrapper
+func EpollCreate1(flag int) (fd int, err error) {
+	return 0, errNonLinux
+}
+
+// PerfEventMmapPage is a wrapper
+type PerfEventMmapPage struct {
+	Version        uint32
+	Compat_version uint32
+	Lock           uint32
+	Index          uint32
+	Offset         int64
+	Time_enabled   uint64
+	Time_running   uint64
+	Capabilities   uint64
+	Pmc_width      uint16
+	Time_shift     uint16
+	Time_mult      uint32
+	Time_offset    uint64
+	Time_zero      uint64
+	Size           uint32
+
+	Data_head   uint64
+	Data_tail   uint64
+	Data_offset uint64
+	Data_size   uint64
+	Aux_head    uint64
+	Aux_tail    uint64
+	Aux_offset  uint64
+	Aux_size    uint64
+}
+
+// SetNonblock is a wrapper
+func SetNonblock(fd int, nonblocking bool) (err error) {
+	return errNonLinux
+}
+
+// Mmap is a wrapper
+func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
+	return []byte{}, errNonLinux
+}
+
+// Munmap is a wrapper
+func Munmap(b []byte) (err error) {
+	return errNonLinux
+}
+
+// PerfEventAttr is a wrapper
+type PerfEventAttr struct {
+	Type               uint32
+	Size               uint32
+	Config             uint64
+	Sample             uint64
+	Sample_type        uint64
+	Read_format        uint64
+	Bits               uint64
+	Wakeup             uint32
+	Bp_type            uint32
+	Ext1               uint64
+	Ext2               uint64
+	Branch_sample_type uint64
+	Sample_regs_user   uint64
+	Sample_stack_user  uint32
+	Clockid            int32
+	Sample_regs_intr   uint64
+	Aux_watermark      uint32
+	Sample_max_stack   uint16
+}
+
+// PerfEventOpen is a wrapper
+func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
+	return 0, errNonLinux
+}
+
+// Utsname is a wrapper
+type Utsname struct {
+    Release    [65]byte
+}
+
+// Uname is a wrapper
+func Uname(buf *Utsname) (err error) {
+	return errNonLinux
+}

+ 58 - 0
vendor/github.com/cilium/ebpf/linker.go

@@ -0,0 +1,58 @@
+package ebpf
+
+import (
+	"github.com/cilium/ebpf/asm"
+)
+
+// link resolves bpf-to-bpf calls.
+//
+// Each section may contain multiple functions / labels, and is only linked
+// if the program being edited references one of these functions.
+//
+// Sections must not require linking themselves.
+func link(insns asm.Instructions, sections ...asm.Instructions) (asm.Instructions, error) {
+	for _, section := range sections {
+		var err error
+		insns, err = linkSection(insns, section)
+		if err != nil {
+			return nil, err
+		}
+	}
+	return insns, nil
+}
+
+func linkSection(insns, section asm.Instructions) (asm.Instructions, error) {
+	// A map of symbols to the libraries which contain them.
+	symbols, err := section.SymbolOffsets()
+	if err != nil {
+		return nil, err
+	}
+
+	for _, ins := range insns {
+		if ins.Reference == "" {
+			continue
+		}
+
+		if ins.OpCode.JumpOp() != asm.Call || ins.Src != asm.R1 {
+			continue
+		}
+
+		if ins.Constant != -1 {
+			// This is already a valid call, no need to link again.
+			continue
+		}
+
+		if _, ok := symbols[ins.Reference]; !ok {
+			// Symbol isn't available in this section
+			continue
+		}
+
+		// At this point we know that at least one function in the
+		// library is called from insns. Merge the two sections.
+		// The rewrite of ins.Constant happens in asm.Instruction.Marshal.
+		return append(insns, section...), nil
+	}
+
+	// None of the functions in the section are called. Do nothing.
+	return insns, nil
+}

+ 604 - 0
vendor/github.com/cilium/ebpf/map.go

@@ -0,0 +1,604 @@
+package ebpf
+
+import (
+	"fmt"
+	"unsafe"
+
+	"github.com/cilium/ebpf/internal"
+	"github.com/cilium/ebpf/internal/unix"
+
+	"github.com/pkg/errors"
+)
+
+// MapSpec defines a Map.
+type MapSpec struct {
+	// Name is passed to the kernel as a debug aid. Must only contain
+	// alpha numeric and '_' characters.
+	Name       string
+	Type       MapType
+	KeySize    uint32
+	ValueSize  uint32
+	MaxEntries uint32
+	Flags      uint32
+	// InnerMap is used as a template for ArrayOfMaps and HashOfMaps
+	InnerMap *MapSpec
+}
+
+func (ms *MapSpec) String() string {
+	return fmt.Sprintf("%s(keySize=%d, valueSize=%d, maxEntries=%d, flags=%d)", ms.Type, ms.KeySize, ms.ValueSize, ms.MaxEntries, ms.Flags)
+}
+
+// Copy returns a copy of the spec.
+func (ms *MapSpec) Copy() *MapSpec {
+	if ms == nil {
+		return nil
+	}
+
+	cpy := *ms
+	cpy.InnerMap = ms.InnerMap.Copy()
+	return &cpy
+}
+
+// Map represents a Map file descriptor.
+//
+// It is not safe to close a map which is used by other goroutines.
+//
+// Methods which take interface{} arguments by default encode
+// them using binary.Read/Write in the machine's native endianness.
+//
+// Implement encoding.BinaryMarshaler or encoding.BinaryUnmarshaler
+// if you require custom encoding.
+type Map struct {
+	name string
+	fd   *bpfFD
+	abi  MapABI
+	// Per CPU maps return values larger than the size in the spec
+	fullValueSize int
+}
+
+// NewMapFromFD creates a map from a raw fd.
+//
+// You should not use fd after calling this function.
+func NewMapFromFD(fd int) (*Map, error) {
+	if fd < 0 {
+		return nil, errors.New("invalid fd")
+	}
+	bpfFd := newBPFFD(uint32(fd))
+
+	name, abi, err := newMapABIFromFd(bpfFd)
+	if err != nil {
+		bpfFd.forget()
+		return nil, err
+	}
+	return newMap(bpfFd, name, abi)
+}
+
+// NewMap creates a new Map.
+//
+// Creating a map for the first time will perform feature detection
+// by creating small, temporary maps.
+func NewMap(spec *MapSpec) (*Map, error) {
+	if spec.Type != ArrayOfMaps && spec.Type != HashOfMaps {
+		return createMap(spec, nil)
+	}
+
+	if spec.InnerMap == nil {
+		return nil, errors.Errorf("%s requires InnerMap", spec.Type)
+	}
+
+	template, err := createMap(spec.InnerMap, nil)
+	if err != nil {
+		return nil, err
+	}
+	defer template.Close()
+
+	return createMap(spec, template.fd)
+}
+
+func createMap(spec *MapSpec, inner *bpfFD) (*Map, error) {
+	spec = spec.Copy()
+
+	switch spec.Type {
+	case ArrayOfMaps:
+		fallthrough
+	case HashOfMaps:
+		if err := haveNestedMaps(); err != nil {
+			return nil, err
+		}
+
+		if spec.ValueSize != 0 && spec.ValueSize != 4 {
+			return nil, errors.Errorf("ValueSize must be zero or four for map of map")
+		}
+		spec.ValueSize = 4
+
+	case PerfEventArray:
+		if spec.KeySize != 0 {
+			return nil, errors.Errorf("KeySize must be zero for perf event array")
+		}
+		if spec.ValueSize != 0 {
+			return nil, errors.Errorf("ValueSize must be zero for perf event array")
+		}
+		if spec.MaxEntries == 0 {
+			n, err := internal.OnlineCPUs()
+			if err != nil {
+				return nil, errors.Wrap(err, "perf event array")
+			}
+			spec.MaxEntries = uint32(n)
+		}
+
+		spec.KeySize = 4
+		spec.ValueSize = 4
+	}
+
+	attr := bpfMapCreateAttr{
+		mapType:    spec.Type,
+		keySize:    spec.KeySize,
+		valueSize:  spec.ValueSize,
+		maxEntries: spec.MaxEntries,
+		flags:      spec.Flags,
+	}
+
+	if inner != nil {
+		var err error
+		attr.innerMapFd, err = inner.value()
+		if err != nil {
+			return nil, errors.Wrap(err, "map create")
+		}
+	}
+
+	name, err := newBPFObjName(spec.Name)
+	if err != nil {
+		return nil, errors.Wrap(err, "map create")
+	}
+
+	if haveObjName() == nil {
+		attr.mapName = name
+	}
+
+	fd, err := bpfMapCreate(&attr)
+	if err != nil {
+		return nil, errors.Wrap(err, "map create")
+	}
+
+	return newMap(fd, spec.Name, newMapABIFromSpec(spec))
+}
+
+func newMap(fd *bpfFD, name string, abi *MapABI) (*Map, error) {
+	m := &Map{
+		name,
+		fd,
+		*abi,
+		int(abi.ValueSize),
+	}
+
+	if !abi.Type.hasPerCPUValue() {
+		return m, nil
+	}
+
+	possibleCPUs, err := internal.PossibleCPUs()
+	if err != nil {
+		return nil, err
+	}
+
+	m.fullValueSize = align(int(abi.ValueSize), 8) * possibleCPUs
+	return m, nil
+}
+
+func (m *Map) String() string {
+	if m.name != "" {
+		return fmt.Sprintf("%s(%s)#%v", m.abi.Type, m.name, m.fd)
+	}
+	return fmt.Sprintf("%s#%v", m.abi.Type, m.fd)
+}
+
+// ABI gets the ABI of the Map
+func (m *Map) ABI() MapABI {
+	return m.abi
+}
+
+// Lookup retrieves a value from a Map.
+//
+// Calls Close() on valueOut if it is of type **Map or **Program,
+// and *valueOut is not nil.
+//
+// Returns an error if the key doesn't exist, see IsNotExist.
+func (m *Map) Lookup(key, valueOut interface{}) error {
+	valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)
+
+	if err := m.lookup(key, valuePtr); err != nil {
+		return err
+	}
+
+	if valueBytes == nil {
+		return nil
+	}
+
+	if m.abi.Type.hasPerCPUValue() {
+		return unmarshalPerCPUValue(valueOut, int(m.abi.ValueSize), valueBytes)
+	}
+
+	switch value := valueOut.(type) {
+	case **Map:
+		m, err := unmarshalMap(valueBytes)
+		if err != nil {
+			return err
+		}
+
+		(*value).Close()
+		*value = m
+		return nil
+	case *Map:
+		return errors.Errorf("can't unmarshal into %T, need %T", value, (**Map)(nil))
+	case Map:
+		return errors.Errorf("can't unmarshal into %T, need %T", value, (**Map)(nil))
+
+	case **Program:
+		p, err := unmarshalProgram(valueBytes)
+		if err != nil {
+			return err
+		}
+
+		(*value).Close()
+		*value = p
+		return nil
+	case *Program:
+		return errors.Errorf("can't unmarshal into %T, need %T", value, (**Program)(nil))
+	case Program:
+		return errors.Errorf("can't unmarshal into %T, need %T", value, (**Program)(nil))
+
+	default:
+		return unmarshalBytes(valueOut, valueBytes)
+	}
+}
+
+// LookupBytes gets a value from Map.
+//
+// Returns a nil value if a key doesn't exist.
+func (m *Map) LookupBytes(key interface{}) ([]byte, error) {
+	valueBytes := make([]byte, m.fullValueSize)
+	valuePtr := newPtr(unsafe.Pointer(&valueBytes[0]))
+
+	err := m.lookup(key, valuePtr)
+	if IsNotExist(err) {
+		return nil, nil
+	}
+
+	return valueBytes, err
+}
+
+func (m *Map) lookup(key interface{}, valueOut syscallPtr) error {
+	keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
+	if err != nil {
+		return errors.WithMessage(err, "can't marshal key")
+	}
+
+	err = bpfMapLookupElem(m.fd, keyPtr, valueOut)
+	return errors.WithMessage(err, "lookup failed")
+}
+
+// MapUpdateFlags controls the behaviour of the Map.Update call.
+//
+// The exact semantics depend on the specific MapType.
+type MapUpdateFlags uint64
+
+const (
+	// UpdateAny creates a new element or update an existing one.
+	UpdateAny MapUpdateFlags = iota
+	// UpdateNoExist creates a new element.
+	UpdateNoExist MapUpdateFlags = 1 << (iota - 1)
+	// UpdateExist updates an existing element.
+	UpdateExist
+)
+
+// Put replaces or creates a value in map.
+//
+// It is equivalent to calling Update with UpdateAny.
+func (m *Map) Put(key, value interface{}) error {
+	return m.Update(key, value, UpdateAny)
+}
+
+// Update changes the value of a key.
+func (m *Map) Update(key, value interface{}, flags MapUpdateFlags) error {
+	keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
+	if err != nil {
+		return errors.WithMessage(err, "can't marshal key")
+	}
+
+	var valuePtr syscallPtr
+	if m.abi.Type.hasPerCPUValue() {
+		valuePtr, err = marshalPerCPUValue(value, int(m.abi.ValueSize))
+	} else {
+		valuePtr, err = marshalPtr(value, int(m.abi.ValueSize))
+	}
+	if err != nil {
+		return errors.WithMessage(err, "can't marshal value")
+	}
+
+	return bpfMapUpdateElem(m.fd, keyPtr, valuePtr, uint64(flags))
+}
+
+// Delete removes a value.
+//
+// Returns an error if the key does not exist, see IsNotExist.
+func (m *Map) Delete(key interface{}) error {
+	keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
+	if err != nil {
+		return errors.WithMessage(err, "can't marshal key")
+	}
+
+	err = bpfMapDeleteElem(m.fd, keyPtr)
+	return errors.WithMessage(err, "can't delete key")
+}
+
+// NextKey finds the key following an initial key.
+//
+// See NextKeyBytes for details.
+func (m *Map) NextKey(key, nextKeyOut interface{}) error {
+	nextKeyPtr, nextKeyBytes := makeBuffer(nextKeyOut, int(m.abi.KeySize))
+
+	if err := m.nextKey(key, nextKeyPtr); err != nil {
+		return err
+	}
+
+	if nextKeyBytes == nil {
+		return nil
+	}
+
+	err := unmarshalBytes(nextKeyOut, nextKeyBytes)
+	return errors.WithMessage(err, "can't unmarshal next key")
+}
+
+// NextKeyBytes returns the key following an initial key as a byte slice.
+//
+// Passing nil will return the first key.
+//
+// Use Iterate if you want to traverse all entries in the map.
+func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) {
+	nextKey := make([]byte, m.abi.KeySize)
+	nextKeyPtr := newPtr(unsafe.Pointer(&nextKey[0]))
+
+	err := m.nextKey(key, nextKeyPtr)
+	if IsNotExist(err) {
+		return nil, nil
+	}
+
+	return nextKey, err
+}
+
+func (m *Map) nextKey(key interface{}, nextKeyOut syscallPtr) error {
+	var (
+		keyPtr syscallPtr
+		err    error
+	)
+
+	if key != nil {
+		keyPtr, err = marshalPtr(key, int(m.abi.KeySize))
+		if err != nil {
+			return errors.WithMessage(err, "can't marshal key")
+		}
+	}
+
+	err = bpfMapGetNextKey(m.fd, keyPtr, nextKeyOut)
+	return errors.WithMessage(err, "can't get next key")
+}
+
+// Iterate traverses a map.
+//
+// It's safe to create multiple iterators at the same time.
+//
+// It's not possible to guarantee that all keys in a map will be
+// returned if there are concurrent modifications to the map.
+func (m *Map) Iterate() *MapIterator {
+	return newMapIterator(m)
+}
+
+// Close removes a Map
+func (m *Map) Close() error {
+	if m == nil {
+		// This makes it easier to clean up when iterating maps
+		// of maps / programs.
+		return nil
+	}
+
+	return m.fd.close()
+}
+
+// FD gets the file descriptor of the Map.
+//
+// Calling this function is invalid after Close has been called.
+func (m *Map) FD() int {
+	fd, err := m.fd.value()
+	if err != nil {
+		// Best effort: -1 is the number most likely to be an
+		// invalid file descriptor.
+		return -1
+	}
+
+	return int(fd)
+}
+
+// Clone creates a duplicate of the Map.
+//
+// Closing the duplicate does not affect the original, and vice versa.
+// Changes made to the map are reflected by both instances however.
+//
+// Cloning a nil Map returns nil.
+func (m *Map) Clone() (*Map, error) {
+	if m == nil {
+		return nil, nil
+	}
+
+	dup, err := m.fd.dup()
+	if err != nil {
+		return nil, errors.Wrap(err, "can't clone map")
+	}
+
+	return newMap(dup, m.name, &m.abi)
+}
+
+// Pin persists the map past the lifetime of the process that created it.
+//
+// This requires bpffs to be mounted above fileName. See http://cilium.readthedocs.io/en/doc-1.0/kubernetes/install/#mounting-the-bpf-fs-optional
+func (m *Map) Pin(fileName string) error {
+	return bpfPinObject(fileName, m.fd)
+}
+
+// LoadPinnedMap load a Map from a BPF file.
+//
+// The function is not compatible with nested maps.
+// Use LoadPinnedMapExplicit in these situations.
+func LoadPinnedMap(fileName string) (*Map, error) {
+	fd, err := bpfGetObject(fileName)
+	if err != nil {
+		return nil, err
+	}
+	name, abi, err := newMapABIFromFd(fd)
+	if err != nil {
+		_ = fd.close()
+		return nil, err
+	}
+	return newMap(fd, name, abi)
+}
+
+// LoadPinnedMapExplicit loads a map with explicit parameters.
+func LoadPinnedMapExplicit(fileName string, abi *MapABI) (*Map, error) {
+	fd, err := bpfGetObject(fileName)
+	if err != nil {
+		return nil, err
+	}
+	return newMap(fd, "", abi)
+}
+
+func unmarshalMap(buf []byte) (*Map, error) {
+	if len(buf) != 4 {
+		return nil, errors.New("map id requires 4 byte value")
+	}
+
+	// Looking up an entry in a nested map or prog array returns an id,
+	// not an fd.
+	id := internal.NativeEndian.Uint32(buf)
+	fd, err := bpfGetMapFDByID(id)
+	if err != nil {
+		return nil, err
+	}
+
+	name, abi, err := newMapABIFromFd(fd)
+	if err != nil {
+		_ = fd.close()
+		return nil, err
+	}
+
+	return newMap(fd, name, abi)
+}
+
+// MarshalBinary implements BinaryMarshaler.
+func (m *Map) MarshalBinary() ([]byte, error) {
+	fd, err := m.fd.value()
+	if err != nil {
+		return nil, err
+	}
+
+	buf := make([]byte, 4)
+	internal.NativeEndian.PutUint32(buf, fd)
+	return buf, nil
+}
+
+// MapIterator iterates a Map.
+//
+// See Map.Iterate.
+type MapIterator struct {
+	target            *Map
+	prevKey           interface{}
+	prevBytes         []byte
+	count, maxEntries uint32
+	done              bool
+	err               error
+}
+
+func newMapIterator(target *Map) *MapIterator {
+	return &MapIterator{
+		target:     target,
+		maxEntries: target.abi.MaxEntries,
+		prevBytes:  make([]byte, int(target.abi.KeySize)),
+	}
+}
+
+var errIterationAborted = errors.New("iteration aborted")
+
+// Next decodes the next key and value.
+//
+// Iterating a hash map from which keys are being deleted is not
+// safe. You may see the same key multiple times. Iteration may
+// also abort with an error, see IsIterationAborted.
+//
+// Returns false if there are no more entries. You must check
+// the result of Err afterwards.
+//
+// See Map.Get for further caveats around valueOut.
+func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool {
+	if mi.err != nil || mi.done {
+		return false
+	}
+
+	for ; mi.count < mi.maxEntries; mi.count++ {
+		var nextBytes []byte
+		nextBytes, mi.err = mi.target.NextKeyBytes(mi.prevKey)
+		if mi.err != nil {
+			return false
+		}
+
+		if nextBytes == nil {
+			mi.done = true
+			return false
+		}
+
+		// The user can get access to nextBytes since unmarshalBytes
+		// does not copy when unmarshaling into a []byte.
+		// Make a copy to prevent accidental corruption of
+		// iterator state.
+		copy(mi.prevBytes, nextBytes)
+		mi.prevKey = mi.prevBytes
+
+		mi.err = mi.target.Lookup(nextBytes, valueOut)
+		if IsNotExist(mi.err) {
+			// Even though the key should be valid, we couldn't look up
+			// its value. If we're iterating a hash map this is probably
+			// because a concurrent delete removed the value before we
+			// could get it. This means that the next call to NextKeyBytes
+			// is very likely to restart iteration.
+			// If we're iterating one of the fd maps like
+			// ProgramArray it means that a given slot doesn't have
+			// a valid fd associated. It's OK to continue to the next slot.
+			continue
+		}
+		if mi.err != nil {
+			return false
+		}
+
+		mi.err = unmarshalBytes(keyOut, nextBytes)
+		return mi.err == nil
+	}
+
+	mi.err = errIterationAborted
+	return false
+}
+
+// Err returns any encountered error.
+//
+// The method must be called after Next returns nil.
+func (mi *MapIterator) Err() error {
+	return mi.err
+}
+
+// IsNotExist returns true if the error indicates that a
+// key doesn't exist.
+func IsNotExist(err error) bool {
+	return errors.Cause(err) == unix.ENOENT
+}
+
+// IsIterationAborted returns true if the iteration was aborted.
+//
+// This occurs when keys are deleted from a hash map during iteration.
+func IsIterationAborted(err error) bool {
+	return errors.Cause(err) == errIterationAborted
+}

+ 192 - 0
vendor/github.com/cilium/ebpf/marshalers.go

@@ -0,0 +1,192 @@
+package ebpf
+
+import (
+	"bytes"
+	"encoding"
+	"encoding/binary"
+	"reflect"
+	"runtime"
+	"unsafe"
+
+	"github.com/cilium/ebpf/internal"
+
+	"github.com/pkg/errors"
+)
+
+func marshalPtr(data interface{}, length int) (syscallPtr, error) {
+	if ptr, ok := data.(unsafe.Pointer); ok {
+		return newPtr(ptr), nil
+	}
+
+	buf, err := marshalBytes(data, length)
+	if err != nil {
+		return syscallPtr{}, err
+	}
+
+	return newPtr(unsafe.Pointer(&buf[0])), nil
+}
+
+func marshalBytes(data interface{}, length int) (buf []byte, err error) {
+	switch value := data.(type) {
+	case encoding.BinaryMarshaler:
+		buf, err = value.MarshalBinary()
+	case string:
+		buf = []byte(value)
+	case []byte:
+		buf = value
+	case unsafe.Pointer:
+		err = errors.New("can't marshal from unsafe.Pointer")
+	default:
+		var wr bytes.Buffer
+		err = binary.Write(&wr, internal.NativeEndian, value)
+		err = errors.Wrapf(err, "encoding %T", value)
+		buf = wr.Bytes()
+	}
+	if err != nil {
+		return nil, err
+	}
+
+	if len(buf) != length {
+		return nil, errors.Errorf("%T doesn't marshal to %d bytes", data, length)
+	}
+	return buf, nil
+}
+
+func makeBuffer(dst interface{}, length int) (syscallPtr, []byte) {
+	if ptr, ok := dst.(unsafe.Pointer); ok {
+		return newPtr(ptr), nil
+	}
+
+	buf := make([]byte, length)
+	return newPtr(unsafe.Pointer(&buf[0])), buf
+}
+
+func unmarshalBytes(data interface{}, buf []byte) error {
+	switch value := data.(type) {
+	case unsafe.Pointer:
+		sh := &reflect.SliceHeader{
+			Data: uintptr(value),
+			Len:  len(buf),
+			Cap:  len(buf),
+		}
+
+		dst := *(*[]byte)(unsafe.Pointer(sh))
+		copy(dst, buf)
+		runtime.KeepAlive(value)
+		return nil
+	case encoding.BinaryUnmarshaler:
+		return value.UnmarshalBinary(buf)
+	case *string:
+		*value = string(buf)
+		return nil
+	case *[]byte:
+		*value = buf
+		return nil
+	case string:
+		return errors.New("require pointer to string")
+	case []byte:
+		return errors.New("require pointer to []byte")
+	default:
+		rd := bytes.NewReader(buf)
+		err := binary.Read(rd, internal.NativeEndian, value)
+		return errors.Wrapf(err, "decoding %T", value)
+	}
+}
+
+// marshalPerCPUValue encodes a slice containing one value per
+// possible CPU into a buffer of bytes.
+//
+// Values are initialized to zero if the slice has less elements than CPUs.
+//
+// slice must have a type like []elementType.
+func marshalPerCPUValue(slice interface{}, elemLength int) (syscallPtr, error) {
+	sliceType := reflect.TypeOf(slice)
+	if sliceType.Kind() != reflect.Slice {
+		return syscallPtr{}, errors.New("per-CPU value requires slice")
+	}
+
+	possibleCPUs, err := internal.PossibleCPUs()
+	if err != nil {
+		return syscallPtr{}, err
+	}
+
+	sliceValue := reflect.ValueOf(slice)
+	sliceLen := sliceValue.Len()
+	if sliceLen > possibleCPUs {
+		return syscallPtr{}, errors.Errorf("per-CPU value exceeds number of CPUs")
+	}
+
+	alignedElemLength := align(elemLength, 8)
+	buf := make([]byte, alignedElemLength*possibleCPUs)
+
+	for i := 0; i < sliceLen; i++ {
+		elem := sliceValue.Index(i).Interface()
+		elemBytes, err := marshalBytes(elem, elemLength)
+		if err != nil {
+			return syscallPtr{}, err
+		}
+
+		offset := i * alignedElemLength
+		copy(buf[offset:offset+elemLength], elemBytes)
+	}
+
+	return newPtr(unsafe.Pointer(&buf[0])), nil
+}
+
+// unmarshalPerCPUValue decodes a buffer into a slice containing one value per
+// possible CPU.
+//
+// valueOut must have a type like *[]elementType
+func unmarshalPerCPUValue(slicePtr interface{}, elemLength int, buf []byte) error {
+	slicePtrType := reflect.TypeOf(slicePtr)
+	if slicePtrType.Kind() != reflect.Ptr || slicePtrType.Elem().Kind() != reflect.Slice {
+		return errors.Errorf("per-cpu value requires pointer to slice")
+	}
+
+	possibleCPUs, err := internal.PossibleCPUs()
+	if err != nil {
+		return err
+	}
+
+	sliceType := slicePtrType.Elem()
+	slice := reflect.MakeSlice(sliceType, possibleCPUs, possibleCPUs)
+
+	sliceElemType := sliceType.Elem()
+	sliceElemIsPointer := sliceElemType.Kind() == reflect.Ptr
+	if sliceElemIsPointer {
+		sliceElemType = sliceElemType.Elem()
+	}
+
+	step := len(buf) / possibleCPUs
+	if step < elemLength {
+		return errors.Errorf("per-cpu element length is larger than available data")
+	}
+	for i := 0; i < possibleCPUs; i++ {
+		var elem interface{}
+		if sliceElemIsPointer {
+			newElem := reflect.New(sliceElemType)
+			slice.Index(i).Set(newElem)
+			elem = newElem.Interface()
+		} else {
+			elem = slice.Index(i).Addr().Interface()
+		}
+
+		// Make a copy, since unmarshal can hold on to itemBytes
+		elemBytes := make([]byte, elemLength)
+		copy(elemBytes, buf[:elemLength])
+
+		err := unmarshalBytes(elem, elemBytes)
+		if err != nil {
+			return errors.Wrapf(err, "cpu %d", i)
+		}
+
+		buf = buf[step:]
+	}
+
+	reflect.ValueOf(slicePtr).Elem().Set(slice)
+	return nil
+}
+
+func align(n, alignment int) int {
+	return (int(n) + alignment - 1) / alignment * alignment
+}

+ 504 - 0
vendor/github.com/cilium/ebpf/prog.go

@@ -0,0 +1,504 @@
+package ebpf
+
+import (
+	"bytes"
+	"fmt"
+	"math"
+	"strings"
+	"time"
+	"unsafe"
+
+	"github.com/cilium/ebpf/asm"
+	"github.com/cilium/ebpf/internal"
+	"github.com/cilium/ebpf/internal/unix"
+
+	"github.com/pkg/errors"
+)
+
+const (
+	// Number of bytes to pad the output buffer for BPF_PROG_TEST_RUN.
+	// This is currently the maximum of spare space allocated for SKB
+	// and XDP programs, and equal to XDP_PACKET_HEADROOM + NET_IP_ALIGN.
+	outputPad = 256 + 2
+)
+
+// DefaultVerifierLogSize is the default number of bytes allocated for the
+// verifier log.
+const DefaultVerifierLogSize = 64 * 1024
+
+// ProgramOptions control loading a program into the kernel.
+type ProgramOptions struct {
+	// Controls the detail emitted by the kernel verifier. Set to non-zero
+	// to enable logging.
+	LogLevel uint32
+	// Controls the output buffer size for the verifier. Defaults to
+	// DefaultVerifierLogSize.
+	LogSize int
+}
+
+// ProgramSpec defines a Program
+type ProgramSpec struct {
+	// Name is passed to the kernel as a debug aid. Must only contain
+	// alpha numeric and '_' characters.
+	Name          string
+	Type          ProgramType
+	AttachType    AttachType
+	Instructions  asm.Instructions
+	License       string
+	KernelVersion uint32
+}
+
+// Copy returns a copy of the spec.
+func (ps *ProgramSpec) Copy() *ProgramSpec {
+	if ps == nil {
+		return nil
+	}
+
+	cpy := *ps
+	cpy.Instructions = make(asm.Instructions, len(ps.Instructions))
+	copy(cpy.Instructions, ps.Instructions)
+	return &cpy
+}
+
+// Program represents BPF program loaded into the kernel.
+//
+// It is not safe to close a Program which is used by other goroutines.
+type Program struct {
+	// Contains the output of the kernel verifier if enabled,
+	// otherwise it is empty.
+	VerifierLog string
+
+	fd   *bpfFD
+	name string
+	abi  ProgramABI
+}
+
+// NewProgram creates a new Program.
+//
+// Loading a program for the first time will perform
+// feature detection by loading small, temporary programs.
+func NewProgram(spec *ProgramSpec) (*Program, error) {
+	return NewProgramWithOptions(spec, ProgramOptions{})
+}
+
+// NewProgramWithOptions creates a new Program.
+//
+// Loading a program for the first time will perform
+// feature detection by loading small, temporary programs.
+func NewProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) {
+	attr, err := convertProgramSpec(spec)
+	if err != nil {
+		return nil, err
+	}
+
+	logSize := DefaultVerifierLogSize
+	if opts.LogSize > 0 {
+		logSize = opts.LogSize
+	}
+
+	var logBuf []byte
+	if opts.LogLevel > 0 {
+		logBuf = make([]byte, logSize)
+		attr.logLevel = opts.LogLevel
+		attr.logSize = uint32(len(logBuf))
+		attr.logBuf = newPtr(unsafe.Pointer(&logBuf[0]))
+	}
+
+	fd, err := bpfProgLoad(attr)
+	if err == nil {
+		prog := newProgram(fd, spec.Name, &ProgramABI{spec.Type})
+		prog.VerifierLog = convertCString(logBuf)
+		return prog, nil
+	}
+
+	truncated := errors.Cause(err) == unix.ENOSPC
+	if opts.LogLevel == 0 {
+		// Re-run with the verifier enabled to get better error messages.
+		logBuf = make([]byte, logSize)
+		attr.logLevel = 1
+		attr.logSize = uint32(len(logBuf))
+		attr.logBuf = newPtr(unsafe.Pointer(&logBuf[0]))
+
+		_, nerr := bpfProgLoad(attr)
+		truncated = errors.Cause(nerr) == unix.ENOSPC
+	}
+
+	logs := convertCString(logBuf)
+	if truncated {
+		logs += "\n(truncated...)"
+	}
+
+	return nil, &loadError{err, logs}
+}
+
+// NewProgramFromFD creates a program from a raw fd.
+//
+// You should not use fd after calling this function.
+//
+// Requires at least Linux 4.11.
+func NewProgramFromFD(fd int) (*Program, error) {
+	if fd < 0 {
+		return nil, errors.New("invalid fd")
+	}
+	bpfFd := newBPFFD(uint32(fd))
+
+	name, abi, err := newProgramABIFromFd(bpfFd)
+	if err != nil {
+		bpfFd.forget()
+		return nil, err
+	}
+
+	return newProgram(bpfFd, name, abi), nil
+}
+
+func newProgram(fd *bpfFD, name string, abi *ProgramABI) *Program {
+	return &Program{
+		name: name,
+		fd:   fd,
+		abi:  *abi,
+	}
+}
+
+func convertProgramSpec(spec *ProgramSpec) (*bpfProgLoadAttr, error) {
+	if len(spec.Instructions) == 0 {
+		return nil, errors.New("Instructions cannot be empty")
+	}
+
+	if len(spec.License) == 0 {
+		return nil, errors.New("License cannot be empty")
+	}
+
+	buf := bytes.NewBuffer(make([]byte, 0, len(spec.Instructions)*asm.InstructionSize))
+	err := spec.Instructions.Marshal(buf, internal.NativeEndian)
+	if err != nil {
+		return nil, err
+	}
+
+	bytecode := buf.Bytes()
+	insCount := uint32(len(bytecode) / asm.InstructionSize)
+	lic := []byte(spec.License)
+	attr := &bpfProgLoadAttr{
+		progType:           spec.Type,
+		expectedAttachType: spec.AttachType,
+		insCount:           insCount,
+		instructions:       newPtr(unsafe.Pointer(&bytecode[0])),
+		license:            newPtr(unsafe.Pointer(&lic[0])),
+	}
+
+	name, err := newBPFObjName(spec.Name)
+	if err != nil {
+		return nil, err
+	}
+
+	if haveObjName() == nil {
+		attr.progName = name
+	}
+
+	return attr, nil
+}
+
+func (p *Program) String() string {
+	if p.name != "" {
+		return fmt.Sprintf("%s(%s)#%v", p.abi.Type, p.name, p.fd)
+	}
+	return fmt.Sprintf("%s#%v", p.abi.Type, p.fd)
+}
+
+// ABI gets the ABI of the Program
+func (p *Program) ABI() ProgramABI {
+	return p.abi
+}
+
+// FD gets the file descriptor of the Program.
+//
+// It is invalid to call this function after Close has been called.
+func (p *Program) FD() int {
+	fd, err := p.fd.value()
+	if err != nil {
+		// Best effort: -1 is the number most likely to be an
+		// invalid file descriptor.
+		return -1
+	}
+
+	return int(fd)
+}
+
+// Clone creates a duplicate of the Program.
+//
+// Closing the duplicate does not affect the original, and vice versa.
+//
+// Cloning a nil Program returns nil.
+func (p *Program) Clone() (*Program, error) {
+	if p == nil {
+		return nil, nil
+	}
+
+	dup, err := p.fd.dup()
+	if err != nil {
+		return nil, errors.Wrap(err, "can't clone program")
+	}
+
+	return newProgram(dup, p.name, &p.abi), nil
+}
+
+// Pin persists the Program past the lifetime of the process that created it
+//
+// This requires bpffs to be mounted above fileName. See http://cilium.readthedocs.io/en/doc-1.0/kubernetes/install/#mounting-the-bpf-fs-optional
+func (p *Program) Pin(fileName string) error {
+	return errors.Wrap(bpfPinObject(fileName, p.fd), "can't pin program")
+}
+
+// Close unloads the program from the kernel.
+func (p *Program) Close() error {
+	if p == nil {
+		return nil
+	}
+
+	return p.fd.close()
+}
+
+// Test runs the Program in the kernel with the given input and returns the
+// value returned by the eBPF program. outLen may be zero.
+//
+// Note: the kernel expects at least 14 bytes input for an ethernet header for
+// XDP and SKB programs.
+//
+// This function requires at least Linux 4.12.
+func (p *Program) Test(in []byte) (uint32, []byte, error) {
+	ret, out, _, err := p.testRun(in, 1)
+	return ret, out, errors.Wrap(err, "can't test program")
+}
+
+// Benchmark runs the Program with the given input for a number of times
+// and returns the time taken per iteration.
+//
+// The returned value is the return value of the last execution of
+// the program.
+//
+// This function requires at least Linux 4.12.
+func (p *Program) Benchmark(in []byte, repeat int) (uint32, time.Duration, error) {
+	ret, _, total, err := p.testRun(in, repeat)
+	return ret, total, errors.Wrap(err, "can't benchmark program")
+}
+
+var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() bool {
+	prog, err := NewProgram(&ProgramSpec{
+		Type: SocketFilter,
+		Instructions: asm.Instructions{
+			asm.LoadImm(asm.R0, 0, asm.DWord),
+			asm.Return(),
+		},
+		License: "MIT",
+	})
+	if err != nil {
+		// This may be because we lack sufficient permissions, etc.
+		return false
+	}
+	defer prog.Close()
+
+	fd, err := prog.fd.value()
+	if err != nil {
+		return false
+	}
+
+	// Programs require at least 14 bytes input
+	in := make([]byte, 14)
+	attr := bpfProgTestRunAttr{
+		fd:         fd,
+		dataSizeIn: uint32(len(in)),
+		dataIn:     newPtr(unsafe.Pointer(&in[0])),
+	}
+
+	_, err = bpfCall(_ProgTestRun, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
+
+	// Check for EINVAL specifically, rather than err != nil since we
+	// otherwise misdetect due to insufficient permissions.
+	return errors.Cause(err) != unix.EINVAL
+})
+
+func (p *Program) testRun(in []byte, repeat int) (uint32, []byte, time.Duration, error) {
+	if uint(repeat) > math.MaxUint32 {
+		return 0, nil, 0, fmt.Errorf("repeat is too high")
+	}
+
+	if len(in) == 0 {
+		return 0, nil, 0, fmt.Errorf("missing input")
+	}
+
+	if uint(len(in)) > math.MaxUint32 {
+		return 0, nil, 0, fmt.Errorf("input is too long")
+	}
+
+	if err := haveProgTestRun(); err != nil {
+		return 0, nil, 0, err
+	}
+
+	// Older kernels ignore the dataSizeOut argument when copying to user space.
+	// Combined with things like bpf_xdp_adjust_head() we don't really know what the final
+	// size will be. Hence we allocate an output buffer which we hope will always be large
+	// enough, and panic if the kernel wrote past the end of the allocation.
+	// See https://patchwork.ozlabs.org/cover/1006822/
+	out := make([]byte, len(in)+outputPad)
+
+	fd, err := p.fd.value()
+	if err != nil {
+		return 0, nil, 0, err
+	}
+
+	attr := bpfProgTestRunAttr{
+		fd:          fd,
+		dataSizeIn:  uint32(len(in)),
+		dataSizeOut: uint32(len(out)),
+		dataIn:      newPtr(unsafe.Pointer(&in[0])),
+		dataOut:     newPtr(unsafe.Pointer(&out[0])),
+		repeat:      uint32(repeat),
+	}
+
+	_, err = bpfCall(_ProgTestRun, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
+	if err != nil {
+		return 0, nil, 0, errors.Wrap(err, "can't run test")
+	}
+
+	if int(attr.dataSizeOut) > cap(out) {
+		// Houston, we have a problem. The program created more data than we allocated,
+		// and the kernel wrote past the end of our buffer.
+		panic("kernel wrote past end of output buffer")
+	}
+	out = out[:int(attr.dataSizeOut)]
+
+	total := time.Duration(attr.duration) * time.Nanosecond
+	return attr.retval, out, total, nil
+}
+
+func unmarshalProgram(buf []byte) (*Program, error) {
+	if len(buf) != 4 {
+		return nil, errors.New("program id requires 4 byte value")
+	}
+
+	// Looking up an entry in a nested map or prog array returns an id,
+	// not an fd.
+	id := internal.NativeEndian.Uint32(buf)
+	fd, err := bpfGetProgramFDByID(id)
+	if err != nil {
+		return nil, err
+	}
+
+	name, abi, err := newProgramABIFromFd(fd)
+	if err != nil {
+		_ = fd.close()
+		return nil, err
+	}
+
+	return newProgram(fd, name, abi), nil
+}
+
+// MarshalBinary implements BinaryMarshaler.
+func (p *Program) MarshalBinary() ([]byte, error) {
+	value, err := p.fd.value()
+	if err != nil {
+		return nil, err
+	}
+
+	buf := make([]byte, 4)
+	internal.NativeEndian.PutUint32(buf, value)
+	return buf, nil
+}
+
+// Attach a Program to a container object fd
+func (p *Program) Attach(fd int, typ AttachType, flags AttachFlags) error {
+	if fd < 0 {
+		return errors.New("invalid fd")
+	}
+
+	pfd, err := p.fd.value()
+	if err != nil {
+		return err
+	}
+
+	attr := bpfProgAlterAttr{
+		targetFd:    uint32(fd),
+		attachBpfFd: pfd,
+		attachType:  uint32(typ),
+		attachFlags: uint32(flags),
+	}
+
+	return bpfProgAlter(_ProgAttach, &attr)
+}
+
+// Detach a Program from a container object fd
+func (p *Program) Detach(fd int, typ AttachType, flags AttachFlags) error {
+	if fd < 0 {
+		return errors.New("invalid fd")
+	}
+
+	pfd, err := p.fd.value()
+	if err != nil {
+		return err
+	}
+
+	attr := bpfProgAlterAttr{
+		targetFd:    uint32(fd),
+		attachBpfFd: pfd,
+		attachType:  uint32(typ),
+		attachFlags: uint32(flags),
+	}
+
+	return bpfProgAlter(_ProgDetach, &attr)
+}
+
+// LoadPinnedProgram loads a Program from a BPF file.
+//
+// Requires at least Linux 4.11.
+func LoadPinnedProgram(fileName string) (*Program, error) {
+	fd, err := bpfGetObject(fileName)
+	if err != nil {
+		return nil, err
+	}
+
+	name, abi, err := newProgramABIFromFd(fd)
+	if err != nil {
+		_ = fd.close()
+		return nil, errors.Wrapf(err, "can't get ABI for %s", fileName)
+	}
+
+	return newProgram(fd, name, abi), nil
+}
+
+// SanitizeName replaces all invalid characters in name.
+//
+// Use this to automatically generate valid names for maps and
+// programs at run time.
+//
+// Passing a negative value for replacement will delete characters
+// instead of replacing them.
+func SanitizeName(name string, replacement rune) string {
+	return strings.Map(func(char rune) rune {
+		if invalidBPFObjNameChar(char) {
+			return replacement
+		}
+		return char
+	}, name)
+}
+
+type loadError struct {
+	cause       error
+	verifierLog string
+}
+
+func (le *loadError) Error() string {
+	if le.verifierLog == "" {
+		return fmt.Sprintf("failed to load program: %s", le.cause)
+	}
+	return fmt.Sprintf("failed to load program: %s: %s", le.cause, le.verifierLog)
+}
+
+func (le *loadError) Cause() error {
+	return le.cause
+}
+
+// IsNotSupported returns true if an error occurred because
+// the kernel does not have support for a specific feature.
+func IsNotSupported(err error) bool {
+	_, notSupported := errors.Cause(err).(*internal.UnsupportedFeatureError)
+	return notSupported
+}

+ 14 - 0
vendor/github.com/cilium/ebpf/ptr_32_be.go

@@ -0,0 +1,14 @@
+// +build armbe mips mips64p32
+
+package ebpf
+
+import (
+	"unsafe"
+)
+
+// ptr wraps an unsafe.Pointer to be 64bit to
+// conform to the syscall specification.
+type syscallPtr struct {
+	pad uint32
+	ptr unsafe.Pointer
+}

+ 14 - 0
vendor/github.com/cilium/ebpf/ptr_32_le.go

@@ -0,0 +1,14 @@
+// +build 386 amd64p32 arm mipsle mips64p32le
+
+package ebpf
+
+import (
+	"unsafe"
+)
+
+// ptr wraps an unsafe.Pointer to be 64bit to
+// conform to the syscall specification.
+type syscallPtr struct {
+	ptr unsafe.Pointer
+	pad uint32
+}

+ 14 - 0
vendor/github.com/cilium/ebpf/ptr_64.go

@@ -0,0 +1,14 @@
+// +build !386,!amd64p32,!arm,!mipsle,!mips64p32le
+// +build !armbe,!mips,!mips64p32
+
+package ebpf
+
+import (
+	"unsafe"
+)
+
+// ptr wraps an unsafe.Pointer to be 64bit to
+// conform to the syscall specification.
+type syscallPtr struct {
+	ptr unsafe.Pointer
+}

+ 20 - 0
vendor/github.com/cilium/ebpf/readme.md

@@ -0,0 +1,20 @@
+eBPF
+-------
+[![](https://godoc.org/github.com/cilium/ebpf?status.svg)](https://godoc.org/github.com/cilium/ebpf)
+
+eBPF is a pure Go library that provides utilities for loading, compiling, and debugging eBPF programs. It has minimal external dependencies and is intended to be used in long running processes.
+
+[ebpf/asm](https://godoc.org/github.com/cilium/ebpf/asm) contains a basic assembler.
+
+The library is maintained by [Cloudflare](https://www.cloudflare.com) and [Cilium](https://www.cilium.io). Feel free to [join](https://cilium.herokuapp.com/) the [libbpf-go](https://cilium.slack.com/messages/libbpf-go) channel on Slack.
+
+## Current status
+
+The package is production ready, but **the API is explicitly unstable
+right now**. Expect to update your code if you want to follow along.
+
+## Useful resources
+
+* [Cilium eBPF documentation](https://cilium.readthedocs.io/en/latest/bpf/#bpf-guide) (recommended)
+* [Linux documentation on BPF](http://elixir.free-electrons.com/linux/latest/source/Documentation/networking/filter.txt)
+* [eBPF features by Linux version](https://github.com/iovisor/bcc/blob/master/docs/kernel-versions.md)

+ 447 - 0
vendor/github.com/cilium/ebpf/syscalls.go

@@ -0,0 +1,447 @@
+package ebpf
+
+import (
+	"bytes"
+	"path/filepath"
+	"runtime"
+	"strconv"
+	"strings"
+	"unsafe"
+
+	"github.com/cilium/ebpf/internal"
+	"github.com/cilium/ebpf/internal/unix"
+
+	"github.com/pkg/errors"
+)
+
+var errClosedFd = errors.New("use of closed file descriptor")
+
+type bpfFD struct {
+	raw int64
+}
+
+func newBPFFD(value uint32) *bpfFD {
+	fd := &bpfFD{int64(value)}
+	runtime.SetFinalizer(fd, (*bpfFD).close)
+	return fd
+}
+
+func (fd *bpfFD) String() string {
+	return strconv.FormatInt(fd.raw, 10)
+}
+
+func (fd *bpfFD) value() (uint32, error) {
+	if fd.raw < 0 {
+		return 0, errClosedFd
+	}
+
+	return uint32(fd.raw), nil
+}
+
+func (fd *bpfFD) close() error {
+	if fd.raw < 0 {
+		return nil
+	}
+
+	value := int(fd.raw)
+	fd.raw = -1
+
+	fd.forget()
+	return unix.Close(value)
+}
+
+func (fd *bpfFD) forget() {
+	runtime.SetFinalizer(fd, nil)
+}
+
+func (fd *bpfFD) dup() (*bpfFD, error) {
+	if fd.raw < 0 {
+		return nil, errClosedFd
+	}
+
+	dup, err := unix.FcntlInt(uintptr(fd.raw), unix.F_DUPFD_CLOEXEC, 0)
+	if err != nil {
+		return nil, errors.Wrap(err, "can't dup fd")
+	}
+
+	return newBPFFD(uint32(dup)), nil
+}
+
+// bpfObjName is a null-terminated string made up of
+// 'A-Za-z0-9_' characters.
+type bpfObjName [unix.BPF_OBJ_NAME_LEN]byte
+
+// newBPFObjName truncates the result if it is too long.
+func newBPFObjName(name string) (bpfObjName, error) {
+	idx := strings.IndexFunc(name, invalidBPFObjNameChar)
+	if idx != -1 {
+		return bpfObjName{}, errors.Errorf("invalid character '%c' in name '%s'", name[idx], name)
+	}
+
+	var result bpfObjName
+	copy(result[:unix.BPF_OBJ_NAME_LEN-1], name)
+	return result, nil
+}
+
+func invalidBPFObjNameChar(char rune) bool {
+	switch {
+	case char >= 'A' && char <= 'Z':
+		fallthrough
+	case char >= 'a' && char <= 'z':
+		fallthrough
+	case char >= '0' && char <= '9':
+		fallthrough
+	case char == '_':
+		return false
+	default:
+		return true
+	}
+}
+
+type bpfMapCreateAttr struct {
+	mapType    MapType
+	keySize    uint32
+	valueSize  uint32
+	maxEntries uint32
+	flags      uint32
+	innerMapFd uint32     // since 4.12 56f668dfe00d
+	numaNode   uint32     // since 4.14 96eabe7a40aa
+	mapName    bpfObjName // since 4.15 ad5b177bd73f
+}
+
+type bpfMapOpAttr struct {
+	mapFd   uint32
+	padding uint32
+	key     syscallPtr
+	value   syscallPtr
+	flags   uint64
+}
+
+type bpfMapInfo struct {
+	mapType    uint32
+	id         uint32
+	keySize    uint32
+	valueSize  uint32
+	maxEntries uint32
+	flags      uint32
+	mapName    bpfObjName // since 4.15 ad5b177bd73f
+}
+
+type bpfPinObjAttr struct {
+	fileName syscallPtr
+	fd       uint32
+	padding  uint32
+}
+
+type bpfProgLoadAttr struct {
+	progType           ProgramType
+	insCount           uint32
+	instructions       syscallPtr
+	license            syscallPtr
+	logLevel           uint32
+	logSize            uint32
+	logBuf             syscallPtr
+	kernelVersion      uint32     // since 4.1  2541517c32be
+	progFlags          uint32     // since 4.11 e07b98d9bffe
+	progName           bpfObjName // since 4.15 067cae47771c
+	progIfIndex        uint32     // since 4.15 1f6f4cb7ba21
+	expectedAttachType AttachType // since 4.17 5e43f899b03a
+}
+
+type bpfProgInfo struct {
+	progType     uint32
+	id           uint32
+	tag          [unix.BPF_TAG_SIZE]byte
+	jitedLen     uint32
+	xlatedLen    uint32
+	jited        syscallPtr
+	xlated       syscallPtr
+	loadTime     uint64 // since 4.15 cb4d2b3f03d8
+	createdByUID uint32
+	nrMapIDs     uint32
+	mapIds       syscallPtr
+	name         bpfObjName
+}
+
+type bpfProgTestRunAttr struct {
+	fd          uint32
+	retval      uint32
+	dataSizeIn  uint32
+	dataSizeOut uint32
+	dataIn      syscallPtr
+	dataOut     syscallPtr
+	repeat      uint32
+	duration    uint32
+}
+
+type bpfProgAlterAttr struct {
+	targetFd    uint32
+	attachBpfFd uint32
+	attachType  uint32
+	attachFlags uint32
+}
+
+type bpfObjGetInfoByFDAttr struct {
+	fd      uint32
+	infoLen uint32
+	info    syscallPtr // May be either bpfMapInfo or bpfProgInfo
+}
+
+type bpfGetFDByIDAttr struct {
+	id   uint32
+	next uint32
+}
+
+func newPtr(ptr unsafe.Pointer) syscallPtr {
+	return syscallPtr{ptr: ptr}
+}
+
+func bpfProgLoad(attr *bpfProgLoadAttr) (*bpfFD, error) {
+	for {
+		fd, err := bpfCall(_ProgLoad, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
+		// As of ~4.20 the verifier can be interrupted by a signal,
+		// and returns EAGAIN in that case.
+		if err == unix.EAGAIN {
+			continue
+		}
+
+		if err != nil {
+			return nil, err
+		}
+
+		return newBPFFD(uint32(fd)), nil
+	}
+}
+
+func bpfProgAlter(cmd int, attr *bpfProgAlterAttr) error {
+	_, err := bpfCall(cmd, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
+	return err
+}
+
+func bpfMapCreate(attr *bpfMapCreateAttr) (*bpfFD, error) {
+	fd, err := bpfCall(_MapCreate, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
+	if err != nil {
+		return nil, err
+	}
+
+	return newBPFFD(uint32(fd)), nil
+}
+
+var haveNestedMaps = internal.FeatureTest("nested maps", "4.12", func() bool {
+	inner, err := bpfMapCreate(&bpfMapCreateAttr{
+		mapType:    Array,
+		keySize:    4,
+		valueSize:  4,
+		maxEntries: 1,
+	})
+	if err != nil {
+		return false
+	}
+	defer inner.close()
+
+	innerFd, _ := inner.value()
+	nested, err := bpfMapCreate(&bpfMapCreateAttr{
+		mapType:    ArrayOfMaps,
+		keySize:    4,
+		valueSize:  4,
+		maxEntries: 1,
+		innerMapFd: innerFd,
+	})
+	if err != nil {
+		return false
+	}
+
+	_ = nested.close()
+	return true
+})
+
+func bpfMapLookupElem(m *bpfFD, key, valueOut syscallPtr) error {
+	fd, err := m.value()
+	if err != nil {
+		return err
+	}
+
+	attr := bpfMapOpAttr{
+		mapFd: fd,
+		key:   key,
+		value: valueOut,
+	}
+	_, err = bpfCall(_MapLookupElem, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
+	return err
+}
+
+func bpfMapUpdateElem(m *bpfFD, key, valueOut syscallPtr, flags uint64) error {
+	fd, err := m.value()
+	if err != nil {
+		return err
+	}
+
+	attr := bpfMapOpAttr{
+		mapFd: fd,
+		key:   key,
+		value: valueOut,
+		flags: flags,
+	}
+	_, err = bpfCall(_MapUpdateElem, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
+	return err
+}
+
+func bpfMapDeleteElem(m *bpfFD, key syscallPtr) error {
+	fd, err := m.value()
+	if err != nil {
+		return err
+	}
+
+	attr := bpfMapOpAttr{
+		mapFd: fd,
+		key:   key,
+	}
+	_, err = bpfCall(_MapDeleteElem, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
+	return err
+}
+
+func bpfMapGetNextKey(m *bpfFD, key, nextKeyOut syscallPtr) error {
+	fd, err := m.value()
+	if err != nil {
+		return err
+	}
+
+	attr := bpfMapOpAttr{
+		mapFd: fd,
+		key:   key,
+		value: nextKeyOut,
+	}
+	_, err = bpfCall(_MapGetNextKey, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
+	return err
+}
+
+const bpfFSType = 0xcafe4a11
+
+func bpfPinObject(fileName string, fd *bpfFD) error {
+	dirName := filepath.Dir(fileName)
+	var statfs unix.Statfs_t
+	if err := unix.Statfs(dirName, &statfs); err != nil {
+		return err
+	}
+	if uint64(statfs.Type) != bpfFSType {
+		return errors.Errorf("%s is not on a bpf filesystem", fileName)
+	}
+
+	value, err := fd.value()
+	if err != nil {
+		return err
+	}
+
+	_, err = bpfCall(_ObjPin, unsafe.Pointer(&bpfPinObjAttr{
+		fileName: newPtr(unsafe.Pointer(&[]byte(fileName)[0])),
+		fd:       value,
+	}), 16)
+	return errors.Wrapf(err, "pin object %s", fileName)
+}
+
+func bpfGetObject(fileName string) (*bpfFD, error) {
+	ptr, err := bpfCall(_ObjGet, unsafe.Pointer(&bpfPinObjAttr{
+		fileName: newPtr(unsafe.Pointer(&[]byte(fileName)[0])),
+	}), 16)
+	if err != nil {
+		return nil, errors.Wrapf(err, "get object %s", fileName)
+	}
+	return newBPFFD(uint32(ptr)), nil
+}
+
+func bpfGetObjectInfoByFD(fd *bpfFD, info unsafe.Pointer, size uintptr) error {
+	value, err := fd.value()
+	if err != nil {
+		return err
+	}
+
+	// available from 4.13
+	attr := bpfObjGetInfoByFDAttr{
+		fd:      value,
+		infoLen: uint32(size),
+		info:    newPtr(info),
+	}
+	_, err = bpfCall(_ObjGetInfoByFD, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
+	return errors.Wrapf(err, "fd %d", value)
+}
+
+func bpfGetProgInfoByFD(fd *bpfFD) (*bpfProgInfo, error) {
+	var info bpfProgInfo
+	err := bpfGetObjectInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info))
+	return &info, errors.Wrap(err, "can't get program info")
+}
+
+func bpfGetMapInfoByFD(fd *bpfFD) (*bpfMapInfo, error) {
+	var info bpfMapInfo
+	err := bpfGetObjectInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info))
+	return &info, errors.Wrap(err, "can't get map info")
+}
+
+var haveObjName = internal.FeatureTest("object names", "4.15", func() bool {
+	name, err := newBPFObjName("feature_test")
+	if err != nil {
+		// This really is a fatal error, but it should be caught
+		// by the unit tests not working.
+		return false
+	}
+
+	attr := bpfMapCreateAttr{
+		mapType:    Array,
+		keySize:    4,
+		valueSize:  4,
+		maxEntries: 1,
+		mapName:    name,
+	}
+
+	fd, err := bpfMapCreate(&attr)
+	if err != nil {
+		return false
+	}
+
+	_ = fd.close()
+	return true
+})
+
+func bpfGetMapFDByID(id uint32) (*bpfFD, error) {
+	// available from 4.13
+	attr := bpfGetFDByIDAttr{
+		id: id,
+	}
+	ptr, err := bpfCall(_MapGetFDByID, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
+	if err != nil {
+		return nil, errors.Wrapf(err, "can't get fd for map id %d", id)
+	}
+	return newBPFFD(uint32(ptr)), nil
+}
+
+func bpfGetProgramFDByID(id uint32) (*bpfFD, error) {
+	// available from 4.13
+	attr := bpfGetFDByIDAttr{
+		id: id,
+	}
+	ptr, err := bpfCall(_ProgGetFDByID, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
+	if err != nil {
+		return nil, errors.Wrapf(err, "can't get fd for program id %d", id)
+	}
+	return newBPFFD(uint32(ptr)), nil
+}
+
+func bpfCall(cmd int, attr unsafe.Pointer, size uintptr) (uintptr, error) {
+	r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size)
+	runtime.KeepAlive(attr)
+
+	var err error
+	if errNo != 0 {
+		err = errNo
+	}
+
+	return r1, err
+}
+
+func convertCString(in []byte) string {
+	inLen := bytes.IndexByte(in, 0)
+	if inLen == -1 {
+		return ""
+	}
+	return string(in[:inLen])
+}

+ 189 - 0
vendor/github.com/cilium/ebpf/types.go

@@ -0,0 +1,189 @@
+package ebpf
+
+//go:generate stringer -output types_string.go -type=MapType,ProgramType
+
+// MapType indicates the type map structure
+// that will be initialized in the kernel.
+type MapType uint32
+
+// All the various map types that can be created
+const (
+	UnspecifiedMap MapType = iota
+	// Hash is a hash map
+	Hash
+	// Array is an array map
+	Array
+	// ProgramArray - A program array map is a special kind of array map whose map
+	// values contain only file descriptors referring to other eBPF
+	// programs.  Thus, both the key_size and value_size must be
+	// exactly four bytes.  This map is used in conjunction with the
+	// TailCall helper.
+	ProgramArray
+	// PerfEventArray - A perf event array is used in conjunction with PerfEventRead
+	// and PerfEventOutput calls, to read the raw bpf_perf_data from the registers.
+	PerfEventArray
+	// PerCPUHash - This data structure is useful for people who have high performance
+	// network needs and can reconcile adds at the end of some cycle, so that
+	// hashes can be lock free without the use of XAdd, which can be costly.
+	PerCPUHash
+	// PerCPUArray - This data structure is useful for people who have high performance
+	// network needs and can reconcile adds at the end of some cycle, so that
+	// hashes can be lock free without the use of XAdd, which can be costly.
+	// Each CPU gets a copy of this hash, the contents of all of which can be reconciled
+	// later.
+	PerCPUArray
+	// StackTrace - This holds whole user and kernel stack traces, it can be retrieved with
+	// GetStackID
+	StackTrace
+	// CGroupArray - This is a very niche structure used to help SKBInCGroup determine
+	// if an skb is from a socket belonging to a specific cgroup
+	CGroupArray
+	// LRUHash - This allows you to create a small hash structure that will purge the
+	// least recently used items rather than thow an error when you run out of memory
+	LRUHash
+	// LRUCPUHash - This is NOT like PerCPUHash, this structure is shared among the CPUs,
+	// it has more to do with including the CPU id with the LRU calculation so that if a
+	// particular CPU is using a value over-and-over again, then it will be saved, but if
+	// a value is being retrieved a lot but sparsely across CPUs it is not as important, basically
+	// giving weight to CPU locality over overall usage.
+	LRUCPUHash
+	// LPMTrie - This is an implementation of Longest-Prefix-Match Trie structure. It is useful,
+	// for storing things like IP addresses which can be bit masked allowing for keys of differing
+	// values to refer to the same reference based on their masks. See wikipedia for more details.
+	LPMTrie
+	// ArrayOfMaps - Each item in the array is another map. The inner map mustn't be a map of maps
+	// itself.
+	ArrayOfMaps
+	// HashOfMaps - Each item in the hash map is another map. The inner map mustn't be a map of maps
+	// itself.
+	HashOfMaps
+)
+
+// hasPerCPUValue returns true if the Map stores a value per CPU.
+func (mt MapType) hasPerCPUValue() bool {
+	if mt == PerCPUHash || mt == PerCPUArray {
+		return true
+	}
+	return false
+}
+
+const (
+	_MapCreate = iota
+	_MapLookupElem
+	_MapUpdateElem
+	_MapDeleteElem
+	_MapGetNextKey
+	_ProgLoad
+	_ObjPin
+	_ObjGet
+	_ProgAttach
+	_ProgDetach
+	_ProgTestRun
+	_ProgGetNextID
+	_MapGetNextID
+	_ProgGetFDByID
+	_MapGetFDByID
+	_ObjGetInfoByFD
+)
+
+const (
+	_Any = iota
+	_NoExist
+	_Exist
+)
+
+// ProgramType of the eBPF program
+type ProgramType uint32
+
+// eBPF program types
+const (
+	// Unrecognized program type
+	UnspecifiedProgram ProgramType = iota
+	// SocketFilter socket or seccomp filter
+	SocketFilter
+	// Kprobe program
+	Kprobe
+	// SchedCLS traffic control shaper
+	SchedCLS
+	// SchedACT routing control shaper
+	SchedACT
+	// TracePoint program
+	TracePoint
+	// XDP program
+	XDP
+	// PerfEvent program
+	PerfEvent
+	// CGroupSKB program
+	CGroupSKB
+	// CGroupSock program
+	CGroupSock
+	// LWTIn program
+	LWTIn
+	// LWTOut program
+	LWTOut
+	// LWTXmit program
+	LWTXmit
+	// SockOps program
+	SockOps
+	// SkSKB program
+	SkSKB
+	// CGroupDevice program
+	CGroupDevice
+	// SkMsg program
+	SkMsg
+	// RawTracepoint program
+	RawTracepoint
+	// CGroupSockAddr program
+	CGroupSockAddr
+	// LWTSeg6Local program
+	LWTSeg6Local
+	// LircMode2 program
+	LircMode2
+	// SkReuseport program
+	SkReuseport
+	// FlowDissector program
+	FlowDissector
+	// CGroupSysctl program
+	CGroupSysctl
+	// RawTracepointWritable program
+	RawTracepointWritable
+	// CGroupSockopt program
+	CGroupSockopt
+)
+
+// AttachType of the eBPF program, needed to differentiate allowed context accesses in
+// some newer program types like CGroupSockAddr. Should be set to AttachNone if not required.
+// Will cause invalid argument (EINVAL) at program load time if set incorrectly.
+type AttachType uint32
+
+// AttachNone is an alias for AttachCGroupInetIngress for readability reasons
+const AttachNone AttachType = 0
+
+const (
+	AttachCGroupInetIngress AttachType = iota
+	AttachCGroupInetEgress
+	AttachCGroupInetSockCreate
+	AttachCGroupSockOps
+	AttachSkSKBStreamParser
+	AttachSkSKBStreamVerdict
+	AttachCGroupDevice
+	AttachSkMsgVerdict
+	AttachCGroupInet4Bind
+	AttachCGroupInet6Bind
+	AttachCGroupInet4Connect
+	AttachCGroupInet6Connect
+	AttachCGroupInet4PostBind
+	AttachCGroupInet6PostBind
+	AttachCGroupUDP4Sendmsg
+	AttachCGroupUDP6Sendmsg
+	AttachLircMode2
+	AttachFlowDissector
+	AttachCGroupSysctl
+	AttachCGroupUDP4Recvmsg
+	AttachCGroupUDP6Recvmsg
+	AttachCGroupGetsockopt
+	AttachCGroupSetsockopt
+)
+
+// AttachFlags of the eBPF program used in BPF_PROG_ATTACH command
+type AttachFlags uint32

+ 78 - 0
vendor/github.com/cilium/ebpf/types_string.go

@@ -0,0 +1,78 @@
+// Code generated by "stringer -output types_string.go -type=MapType,ProgramType"; DO NOT EDIT.
+
+package ebpf
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[UnspecifiedMap-0]
+	_ = x[Hash-1]
+	_ = x[Array-2]
+	_ = x[ProgramArray-3]
+	_ = x[PerfEventArray-4]
+	_ = x[PerCPUHash-5]
+	_ = x[PerCPUArray-6]
+	_ = x[StackTrace-7]
+	_ = x[CGroupArray-8]
+	_ = x[LRUHash-9]
+	_ = x[LRUCPUHash-10]
+	_ = x[LPMTrie-11]
+	_ = x[ArrayOfMaps-12]
+	_ = x[HashOfMaps-13]
+}
+
+const _MapType_name = "UnspecifiedMapHashArrayProgramArrayPerfEventArrayPerCPUHashPerCPUArrayStackTraceCGroupArrayLRUHashLRUCPUHashLPMTrieArrayOfMapsHashOfMaps"
+
+var _MapType_index = [...]uint8{0, 14, 18, 23, 35, 49, 59, 70, 80, 91, 98, 108, 115, 126, 136}
+
+func (i MapType) String() string {
+	if i >= MapType(len(_MapType_index)-1) {
+		return "MapType(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _MapType_name[_MapType_index[i]:_MapType_index[i+1]]
+}
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[UnspecifiedProgram-0]
+	_ = x[SocketFilter-1]
+	_ = x[Kprobe-2]
+	_ = x[SchedCLS-3]
+	_ = x[SchedACT-4]
+	_ = x[TracePoint-5]
+	_ = x[XDP-6]
+	_ = x[PerfEvent-7]
+	_ = x[CGroupSKB-8]
+	_ = x[CGroupSock-9]
+	_ = x[LWTIn-10]
+	_ = x[LWTOut-11]
+	_ = x[LWTXmit-12]
+	_ = x[SockOps-13]
+	_ = x[SkSKB-14]
+	_ = x[CGroupDevice-15]
+	_ = x[SkMsg-16]
+	_ = x[RawTracepoint-17]
+	_ = x[CGroupSockAddr-18]
+	_ = x[LWTSeg6Local-19]
+	_ = x[LircMode2-20]
+	_ = x[SkReuseport-21]
+	_ = x[FlowDissector-22]
+	_ = x[CGroupSysctl-23]
+	_ = x[RawTracepointWritable-24]
+	_ = x[CGroupSockopt-25]
+}
+
+const _ProgramType_name = "UnspecifiedProgramSocketFilterKprobeSchedCLSSchedACTTracePointXDPPerfEventCGroupSKBCGroupSockLWTInLWTOutLWTXmitSockOpsSkSKBCGroupDeviceSkMsgRawTracepointCGroupSockAddrLWTSeg6LocalLircMode2SkReuseportFlowDissectorCGroupSysctlRawTracepointWritableCGroupSockopt"
+
+var _ProgramType_index = [...]uint16{0, 18, 30, 36, 44, 52, 62, 65, 74, 83, 93, 98, 104, 111, 118, 123, 135, 140, 153, 167, 179, 188, 199, 212, 224, 245, 258}
+
+func (i ProgramType) String() string {
+	if i >= ProgramType(len(_ProgramType_index)-1) {
+		return "ProgramType(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _ProgramType_name[_ProgramType_index[i]:_ProgramType_index[i+1]]
+}

+ 83 - 0
vendor/github.com/containerd/cgroups/v2/cpu.go

@@ -0,0 +1,83 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import (
+	"math"
+	"strconv"
+	"strings"
+)
+
+type CPUMax string
+
+func NewCPUMax(quota *int64, period *uint64) CPUMax {
+	max := "max"
+	if quota != nil {
+		max = strconv.FormatInt(*quota, 10)
+	}
+	return CPUMax(strings.Join([]string{max, strconv.FormatUint(*period, 10)}, " "))
+}
+
+type CPU struct {
+	Weight *uint64
+	Max    CPUMax
+	Cpus   string
+	Mems   string
+}
+
+func (c CPUMax) extractQuotaAndPeriod() (int64, uint64) {
+	var (
+		quota  int64
+		period uint64
+	)
+	values := strings.Split(string(c), " ")
+	if values[0] == "max" {
+		quota = math.MaxInt64
+	} else {
+		quota, _ = strconv.ParseInt(values[0], 10, 64)
+	}
+	period, _ = strconv.ParseUint(values[1], 10, 64)
+	return quota, period
+}
+
+func (r *CPU) Values() (o []Value) {
+	if r.Weight != nil {
+		o = append(o, Value{
+			filename: "cpu.weight",
+			value:    *r.Weight,
+		})
+	}
+	if r.Max != "" {
+		o = append(o, Value{
+			filename: "cpu.max",
+			value:    r.Max,
+		})
+	}
+	if r.Cpus != "" {
+		o = append(o, Value{
+			filename: "cpuset.cpus",
+			value:    r.Cpus,
+		})
+	}
+	if r.Mems != "" {
+		o = append(o, Value{
+			filename: "cpuset.mems",
+			value:    r.Mems,
+		})
+	}
+	return o
+}

+ 199 - 0
vendor/github.com/containerd/cgroups/v2/devicefilter.go

@@ -0,0 +1,199 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+// Devicefilter containes eBPF device filter program
+//
+// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c
+//
+// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano)
+// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397
+//
+// This particular Go implementation based on runc version
+// https://github.com/opencontainers/runc/blob/master/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
+package v2
+
+import (
+	"fmt"
+	"math"
+
+	"github.com/cilium/ebpf/asm"
+	"github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/pkg/errors"
+	"golang.org/x/sys/unix"
+)
+
+const (
+	// license string format is same as kernel MODULE_LICENSE macro
+	license = "Apache"
+)
+
+// DeviceFilter returns eBPF device filter program and its license string
+func DeviceFilter(devices []specs.LinuxDeviceCgroup) (asm.Instructions, string, error) {
+	p := &program{}
+	p.init()
+	for i := len(devices) - 1; i >= 0; i-- {
+		if err := p.appendDevice(devices[i]); err != nil {
+			return nil, "", err
+		}
+	}
+	insts, err := p.finalize()
+	return insts, license, err
+}
+
+type program struct {
+	insts       asm.Instructions
+	hasWildCard bool
+	blockID     int
+}
+
+func (p *program) init() {
+	// struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423
+	/*
+		u32 access_type
+		u32 major
+		u32 minor
+	*/
+	// R2 <- type (lower 16 bit of u32 access_type at R1[0])
+	p.insts = append(p.insts,
+		asm.LoadMem(asm.R2, asm.R1, 0, asm.Half))
+
+	// R3 <- access (upper 16 bit of u32 access_type at R1[0])
+	p.insts = append(p.insts,
+		asm.LoadMem(asm.R3, asm.R1, 0, asm.Word),
+		// RSh: bitwise shift right
+		asm.RSh.Imm32(asm.R3, 16))
+
+	// R4 <- major (u32 major at R1[4])
+	p.insts = append(p.insts,
+		asm.LoadMem(asm.R4, asm.R1, 4, asm.Word))
+
+	// R5 <- minor (u32 minor at R1[8])
+	p.insts = append(p.insts,
+		asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
+}
+
+// appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element.
+func (p *program) appendDevice(dev specs.LinuxDeviceCgroup) error {
+	if p.blockID < 0 {
+		return errors.New("the program is finalized")
+	}
+	if p.hasWildCard {
+		// All entries after wildcard entry are ignored
+		return nil
+	}
+
+	bpfType := int32(-1)
+	hasType := true
+	switch dev.Type {
+	case string('c'):
+		bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
+	case string('b'):
+		bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
+	case string('a'):
+		hasType = false
+	default:
+		// if not specified in OCI json, typ is set to DeviceTypeAll
+		return errors.Errorf("invalid DeviceType %q", dev.Type)
+	}
+	if *dev.Major > math.MaxUint32 {
+		return errors.Errorf("invalid major %d", *dev.Major)
+	}
+	if *dev.Minor > math.MaxUint32 {
+		return errors.Errorf("invalid minor %d", *dev.Major)
+	}
+	hasMajor := *dev.Major >= 0 // if not specified in OCI json, major is set to -1
+	hasMinor := *dev.Minor >= 0
+	bpfAccess := int32(0)
+	for _, r := range dev.Access {
+		switch r {
+		case 'r':
+			bpfAccess |= unix.BPF_DEVCG_ACC_READ
+		case 'w':
+			bpfAccess |= unix.BPF_DEVCG_ACC_WRITE
+		case 'm':
+			bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD
+		default:
+			return errors.Errorf("unknown device access %v", r)
+		}
+	}
+	// If the access is rwm, skip the check.
+	hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD)
+
+	blockSym := fmt.Sprintf("block-%d", p.blockID)
+	nextBlockSym := fmt.Sprintf("block-%d", p.blockID+1)
+	prevBlockLastIdx := len(p.insts) - 1
+	if hasType {
+		p.insts = append(p.insts,
+			// if (R2 != bpfType) goto next
+			asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
+		)
+	}
+	if hasAccess {
+		p.insts = append(p.insts,
+			// if (R3 & bpfAccess == 0 /* use R1 as a temp var */) goto next
+			asm.Mov.Reg32(asm.R1, asm.R3),
+			asm.And.Imm32(asm.R1, bpfAccess),
+			asm.JEq.Imm(asm.R1, 0, nextBlockSym),
+		)
+	}
+	if hasMajor {
+		p.insts = append(p.insts,
+			// if (R4 != major) goto next
+			asm.JNE.Imm(asm.R4, int32(*dev.Major), nextBlockSym),
+		)
+	}
+	if hasMinor {
+		p.insts = append(p.insts,
+			// if (R5 != minor) goto next
+			asm.JNE.Imm(asm.R5, int32(*dev.Minor), nextBlockSym),
+		)
+	}
+	if !hasType && !hasAccess && !hasMajor && !hasMinor {
+		p.hasWildCard = true
+	}
+	p.insts = append(p.insts, acceptBlock(dev.Allow)...)
+	// set blockSym to the first instruction we added in this iteration
+	p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym)
+	p.blockID++
+	return nil
+}
+
+func (p *program) finalize() (asm.Instructions, error) {
+	if p.hasWildCard {
+		// acceptBlock with asm.Return() is already inserted
+		return p.insts, nil
+	}
+	blockSym := fmt.Sprintf("block-%d", p.blockID)
+	p.insts = append(p.insts,
+		// R0 <- 0
+		asm.Mov.Imm32(asm.R0, 0).Sym(blockSym),
+		asm.Return(),
+	)
+	p.blockID = -1
+	return p.insts, nil
+}
+
+func acceptBlock(accept bool) asm.Instructions {
+	v := int32(0)
+	if accept {
+		v = 1
+	}
+	return []asm.Instruction{
+		// R0 <- v
+		asm.Mov.Imm32(asm.R0, v),
+		asm.Return(),
+	}
+}

+ 83 - 0
vendor/github.com/containerd/cgroups/v2/ebpf.go

@@ -0,0 +1,83 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import (
+	"github.com/cilium/ebpf"
+	"github.com/cilium/ebpf/asm"
+	"github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/pkg/errors"
+	"golang.org/x/sys/unix"
+)
+
+// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
+//
+// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
+//
+// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
+func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD int) (func() error, error) {
+	nilCloser := func() error {
+		return nil
+	}
+	spec := &ebpf.ProgramSpec{
+		Type:         ebpf.CGroupDevice,
+		Instructions: insts,
+		License:      license,
+	}
+	prog, err := ebpf.NewProgram(spec)
+	if err != nil {
+		return nilCloser, err
+	}
+	if err := prog.Attach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
+		return nilCloser, errors.Wrap(err, "failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
+	}
+	closer := func() error {
+		if err := prog.Detach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
+			return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
+		}
+		return nil
+	}
+	return closer, nil
+}
+
+func isRWM(cgroupPermissions string) bool {
+	r := false
+	w := false
+	m := false
+	for _, rn := range cgroupPermissions {
+		switch rn {
+		case 'r':
+			r = true
+		case 'w':
+			w = true
+		case 'm':
+			m = true
+		}
+	}
+	return r && w && m
+}
+
+// the logic is from runc
+// https://github.com/opencontainers/runc/blob/master/libcontainer/cgroups/fs/devices_v2.go#L44
+func canSkipEBPFError(devices []specs.LinuxDeviceCgroup) bool {
+	for _, dev := range devices {
+		if dev.Allow || !isRWM(dev.Access) {
+			return false
+		}
+	}
+	return true
+}

+ 50 - 0
vendor/github.com/containerd/cgroups/v2/errors.go

@@ -0,0 +1,50 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import (
+	"errors"
+	"os"
+)
+
+var (
+	ErrInvalidPid               = errors.New("cgroups: pid must be greater than 0")
+	ErrMountPointNotExist       = errors.New("cgroups: cgroup mountpoint does not exist")
+	ErrInvalidFormat            = errors.New("cgroups: parsing file with invalid format failed")
+	ErrFreezerNotSupported      = errors.New("cgroups: freezer cgroup (v2) not supported on this system")
+	ErrMemoryNotSupported       = errors.New("cgroups: memory cgroup (v2) not supported on this system")
+	ErrPidsNotSupported         = errors.New("cgroups: pids cgroup (v2) not supported on this system")
+	ErrCPUNotSupported          = errors.New("cgroups: cpu cgroup (v2) not supported on this system")
+	ErrCgroupDeleted            = errors.New("cgroups: cgroup deleted")
+	ErrNoCgroupMountDestination = errors.New("cgroups: cannot find cgroup mount destination")
+	ErrInvalidGroupPath         = errors.New("cgroups: invalid group path")
+)
+
+// ErrorHandler is a function that handles and acts on errors
+type ErrorHandler func(err error) error
+
+// IgnoreNotExist ignores any errors that are for not existing files
+func IgnoreNotExist(err error) error {
+	if os.IsNotExist(err) {
+		return nil
+	}
+	return err
+}
+
+func errPassthrough(err error) error {
+	return err
+}

+ 37 - 0
vendor/github.com/containerd/cgroups/v2/hugetlb.go

@@ -0,0 +1,37 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import "strings"
+
+type HugeTlb []HugeTlbEntry
+
+type HugeTlbEntry struct {
+	HugePageSize string
+	Limit        uint64
+}
+
+func (r *HugeTlb) Values() (o []Value) {
+	for _, e := range *r {
+		o = append(o, Value{
+			filename: strings.Join([]string{"hugetlb", e.HugePageSize, "max"}, "."),
+			value:    e.Limit,
+		})
+	}
+
+	return o
+}

+ 64 - 0
vendor/github.com/containerd/cgroups/v2/io.go

@@ -0,0 +1,64 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import "fmt"
+
+type IOType string
+
+const (
+	ReadBPS   IOType = "rbps"
+	WriteBPS  IOType = "wbps"
+	ReadIOPS  IOType = "riops"
+	WriteIOPS IOType = "wiops"
+)
+
+type BFQ struct {
+	Weight uint16
+}
+
+type Entry struct {
+	Type  IOType
+	Major int64
+	Minor int64
+	Rate  uint64
+}
+
+func (e Entry) String() string {
+	return fmt.Sprintf("%d:%d %s=%d", e.Major, e.Minor, e.Type, e.Rate)
+}
+
+type IO struct {
+	BFQ BFQ
+	Max []Entry
+}
+
+func (i *IO) Values() (o []Value) {
+	if i.BFQ.Weight != 0 {
+		o = append(o, Value{
+			filename: "io.bfq.weight",
+			value:    i.BFQ.Weight,
+		})
+	}
+	for _, e := range i.Max {
+		o = append(o, Value{
+			filename: "io.max",
+			value:    e.String(),
+		})
+	}
+	return o
+}

+ 739 - 0
vendor/github.com/containerd/cgroups/v2/manager.go

@@ -0,0 +1,739 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import (
+	"bufio"
+	"fmt"
+	"io/ioutil"
+	"math"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+
+	"golang.org/x/sys/unix"
+
+	"github.com/containerd/cgroups/v2/stats"
+	"github.com/godbus/dbus/v5"
+	"github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
+
+	systemdDbus "github.com/coreos/go-systemd/v22/dbus"
+)
+
+const (
+	subtreeControl     = "cgroup.subtree_control"
+	controllersFile    = "cgroup.controllers"
+	defaultCgroup2Path = "/sys/fs/cgroup"
+	defaultSlice       = "system.slice"
+)
+
+var (
+	canDelegate bool
+	once        sync.Once
+)
+
+type cgValuer interface {
+	Values() []Value
+}
+
+type Event struct {
+	Low     uint64
+	High    uint64
+	Max     uint64
+	OOM     uint64
+	OOMKill uint64
+}
+
+// Resources for a cgroups v2 unified hierarchy
+type Resources struct {
+	CPU     *CPU
+	Memory  *Memory
+	Pids    *Pids
+	IO      *IO
+	RDMA    *RDMA
+	HugeTlb *HugeTlb
+	// When len(Devices) is zero, devices are not controlled
+	Devices []specs.LinuxDeviceCgroup
+}
+
+// Values returns the raw filenames and values that
+// can be written to the unified hierarchy
+func (r *Resources) Values() (o []Value) {
+	if r.CPU != nil {
+		o = append(o, r.CPU.Values()...)
+	}
+	if r.Memory != nil {
+		o = append(o, r.Memory.Values()...)
+	}
+	if r.Pids != nil {
+		o = append(o, r.Pids.Values()...)
+	}
+	if r.IO != nil {
+		o = append(o, r.IO.Values()...)
+	}
+	if r.RDMA != nil {
+		o = append(o, r.RDMA.Values()...)
+	}
+	if r.HugeTlb != nil {
+		o = append(o, r.HugeTlb.Values()...)
+	}
+	return o
+}
+
+// EnabledControllers returns the list of all not nil resource controllers
+func (r *Resources) EnabledControllers() (c []string) {
+	if r.CPU != nil {
+		c = append(c, "cpu")
+		c = append(c, "cpuset")
+	}
+	if r.Memory != nil {
+		c = append(c, "memory")
+	}
+	if r.Pids != nil {
+		c = append(c, "pids")
+	}
+	if r.IO != nil {
+		c = append(c, "io")
+	}
+	if r.RDMA != nil {
+		c = append(c, "rdma")
+	}
+	if r.HugeTlb != nil {
+		c = append(c, "hugetlb")
+	}
+	return
+}
+
+// Value of a cgroup setting
+type Value struct {
+	filename string
+	value    interface{}
+}
+
+// write the value to the full, absolute path, of a unified hierarchy
+func (c *Value) write(path string, perm os.FileMode) error {
+	var data []byte
+	switch t := c.value.(type) {
+	case uint64:
+		data = []byte(strconv.FormatUint(t, 10))
+	case uint16:
+		data = []byte(strconv.FormatUint(uint64(t), 10))
+	case int64:
+		data = []byte(strconv.FormatInt(t, 10))
+	case []byte:
+		data = t
+	case string:
+		data = []byte(t)
+	case CPUMax:
+		data = []byte(t)
+	default:
+		return ErrInvalidFormat
+	}
+	return ioutil.WriteFile(
+		filepath.Join(path, c.filename),
+		data,
+		perm,
+	)
+}
+
+func writeValues(path string, values []Value) error {
+	for _, o := range values {
+		if err := o.write(path, defaultFilePerm); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func NewManager(mountpoint string, group string, resources *Resources) (*Manager, error) {
+	if err := VerifyGroupPath(group); err != nil {
+		return nil, err
+	}
+	path := filepath.Join(mountpoint, group)
+	if err := os.MkdirAll(path, defaultDirPerm); err != nil {
+		return nil, err
+	}
+	m := Manager{
+		unifiedMountpoint: mountpoint,
+		path:              path,
+	}
+	if err := m.ToggleControllers(resources.EnabledControllers(), Enable); err != nil {
+		// clean up cgroup dir on failure
+		os.Remove(path)
+		return nil, err
+	}
+	if err := setResources(path, resources); err != nil {
+		os.Remove(path)
+		return nil, err
+	}
+	return &m, nil
+}
+
+func LoadManager(mountpoint string, group string) (*Manager, error) {
+	if err := VerifyGroupPath(group); err != nil {
+		return nil, err
+	}
+	path := filepath.Join(mountpoint, group)
+	return &Manager{
+		unifiedMountpoint: mountpoint,
+		path:              path,
+	}, nil
+}
+
+type Manager struct {
+	unifiedMountpoint string
+	path              string
+}
+
+func setResources(path string, resources *Resources) error {
+	if resources != nil {
+		if err := writeValues(path, resources.Values()); err != nil {
+			return err
+		}
+		if err := setDevices(path, resources.Devices); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (c *Manager) RootControllers() ([]string, error) {
+	b, err := ioutil.ReadFile(filepath.Join(c.unifiedMountpoint, controllersFile))
+	if err != nil {
+		return nil, err
+	}
+	return strings.Fields(string(b)), nil
+}
+
+func (c *Manager) Controllers() ([]string, error) {
+	b, err := ioutil.ReadFile(filepath.Join(c.path, controllersFile))
+	if err != nil {
+		return nil, err
+	}
+	return strings.Fields(string(b)), nil
+}
+
+type ControllerToggle int
+
+const (
+	Enable ControllerToggle = iota + 1
+	Disable
+)
+
+func toggleFunc(controllers []string, prefix string) []string {
+	out := make([]string, len(controllers))
+	for i, c := range controllers {
+		out[i] = prefix + c
+	}
+	return out
+}
+
+func (c *Manager) ToggleControllers(controllers []string, t ControllerToggle) error {
+	// when c.path is like /foo/bar/baz, the following files need to be written:
+	// * /sys/fs/cgroup/cgroup.subtree_control
+	// * /sys/fs/cgroup/foo/cgroup.subtree_control
+	// * /sys/fs/cgroup/foo/bar/cgroup.subtree_control
+	// Note that /sys/fs/cgroup/foo/bar/baz/cgroup.subtree_control does not need to be written.
+	split := strings.Split(c.path, "/")
+	var lastErr error
+	for i, _ := range split {
+		f := strings.Join(split[:i], "/")
+		if !strings.HasPrefix(f, c.unifiedMountpoint) || f == c.path {
+			continue
+		}
+		filePath := filepath.Join(f, subtreeControl)
+		if err := c.writeSubtreeControl(filePath, controllers, t); err != nil {
+			// When running as rootless, the user may face EPERM on parent groups, but it is neglible when the
+			// controller is already written.
+			// So we only return the last error.
+			lastErr = errors.Wrapf(err, "failed to write subtree controllers %+v to %q", controllers, filePath)
+		}
+	}
+	return lastErr
+}
+
+func (c *Manager) writeSubtreeControl(filePath string, controllers []string, t ControllerToggle) error {
+	f, err := os.OpenFile(filePath, os.O_WRONLY, 0)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	switch t {
+	case Enable:
+		controllers = toggleFunc(controllers, "+")
+	case Disable:
+		controllers = toggleFunc(controllers, "-")
+	}
+	_, err = f.WriteString(strings.Join(controllers, " "))
+	return err
+}
+
+func (c *Manager) NewChild(name string, resources *Resources) (*Manager, error) {
+	if strings.HasPrefix(name, "/") {
+		return nil, errors.New("name must be relative")
+	}
+	path := filepath.Join(c.path, name)
+	if err := os.MkdirAll(path, defaultDirPerm); err != nil {
+		return nil, err
+	}
+	if err := setResources(path, resources); err != nil {
+		// clean up cgroup dir on failure
+		os.Remove(path)
+		return nil, err
+	}
+	return &Manager{
+		unifiedMountpoint: c.unifiedMountpoint,
+		path:              path,
+	}, nil
+}
+
+func (c *Manager) AddProc(pid uint64) error {
+	v := Value{
+		filename: cgroupProcs,
+		value:    pid,
+	}
+	return writeValues(c.path, []Value{v})
+}
+
+func (c *Manager) Delete() error {
+	return remove(c.path)
+}
+
+func (c *Manager) Procs(recursive bool) ([]uint64, error) {
+	var processes []uint64
+	err := filepath.Walk(c.path, func(p string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+		if !recursive && info.IsDir() {
+			if p == c.path {
+				return nil
+			}
+			return filepath.SkipDir
+		}
+		_, name := filepath.Split(p)
+		if name != cgroupProcs {
+			return nil
+		}
+		procs, err := parseCgroupProcsFile(p)
+		if err != nil {
+			return err
+		}
+		processes = append(processes, procs...)
+		return nil
+	})
+	return processes, err
+}
+
+var singleValueFiles = []string{
+	"pids.current",
+	"pids.max",
+}
+
+func (c *Manager) Stat() (*stats.Metrics, error) {
+	controllers, err := c.Controllers()
+	if err != nil {
+		return nil, err
+	}
+	out := make(map[string]interface{})
+	for _, controller := range controllers {
+		switch controller {
+		case "cpu", "memory":
+			filename := fmt.Sprintf("%s.stat", controller)
+			if err := readKVStatsFile(c.path, filename, out); err != nil {
+				if os.IsNotExist(err) {
+					continue
+				}
+				return nil, err
+			}
+		}
+	}
+	for _, name := range singleValueFiles {
+		if err := readSingleFile(c.path, name, out); err != nil {
+			if os.IsNotExist(err) {
+				continue
+			}
+			return nil, err
+		}
+	}
+	var metrics stats.Metrics
+
+	metrics.Pids = &stats.PidsStat{
+		Current: getPidValue("pids.current", out),
+		Limit:   getPidValue("pids.max", out),
+	}
+	metrics.CPU = &stats.CPUStat{
+		UsageUsec:     getUint64Value("usage_usec", out),
+		UserUsec:      getUint64Value("user_usec", out),
+		SystemUsec:    getUint64Value("system_usec", out),
+		NrPeriods:     getUint64Value("nr_periods", out),
+		NrThrottled:   getUint64Value("nr_throttled", out),
+		ThrottledUsec: getUint64Value("throttled_usec", out),
+	}
+	metrics.Memory = &stats.MemoryStat{
+		Anon:                  getUint64Value("anon", out),
+		File:                  getUint64Value("file", out),
+		KernelStack:           getUint64Value("kernel_stack", out),
+		Slab:                  getUint64Value("slab", out),
+		Sock:                  getUint64Value("sock", out),
+		Shmem:                 getUint64Value("shmem", out),
+		FileMapped:            getUint64Value("file_mapped", out),
+		FileDirty:             getUint64Value("file_dirty", out),
+		FileWriteback:         getUint64Value("file_writeback", out),
+		AnonThp:               getUint64Value("anon_thp", out),
+		InactiveAnon:          getUint64Value("inactive_anon", out),
+		ActiveAnon:            getUint64Value("active_anon", out),
+		InactiveFile:          getUint64Value("inactive_file", out),
+		ActiveFile:            getUint64Value("active_file", out),
+		Unevictable:           getUint64Value("unevictable", out),
+		SlabReclaimable:       getUint64Value("slab_reclaimable", out),
+		SlabUnreclaimable:     getUint64Value("slab_unreclaimable", out),
+		Pgfault:               getUint64Value("pgfault", out),
+		Pgmajfault:            getUint64Value("pgmajfault", out),
+		WorkingsetRefault:     getUint64Value("workingset_refault", out),
+		WorkingsetActivate:    getUint64Value("workingset_activate", out),
+		WorkingsetNodereclaim: getUint64Value("workingset_nodereclaim", out),
+		Pgrefill:              getUint64Value("pgrefill", out),
+		Pgscan:                getUint64Value("pgscan", out),
+		Pgsteal:               getUint64Value("pgsteal", out),
+		Pgactivate:            getUint64Value("pgactivate", out),
+		Pgdeactivate:          getUint64Value("pgdeactivate", out),
+		Pglazyfree:            getUint64Value("pglazyfree", out),
+		Pglazyfreed:           getUint64Value("pglazyfreed", out),
+		ThpFaultAlloc:         getUint64Value("thp_fault_alloc", out),
+		ThpCollapseAlloc:      getUint64Value("thp_collapse_alloc", out),
+		Usage:                 getStatFileContentUint64(filepath.Join(c.path, "memory.current")),
+		UsageLimit:            getStatFileContentUint64(filepath.Join(c.path, "memory.max")),
+		SwapUsage:             getStatFileContentUint64(filepath.Join(c.path, "memory.swap.current")),
+		SwapLimit:             getStatFileContentUint64(filepath.Join(c.path, "memory.swap.max")),
+	}
+
+	metrics.Io = &stats.IOStat{Usage: readIoStats(c.path)}
+	metrics.Rdma = &stats.RdmaStat{
+		Current: rdmaStats(filepath.Join(c.path, "rdma.current")),
+		Limit:   rdmaStats(filepath.Join(c.path, "rdma.max")),
+	}
+	metrics.Hugetlb = readHugeTlbStats(c.path)
+
+	return &metrics, nil
+}
+
+func getUint64Value(key string, out map[string]interface{}) uint64 {
+	v, ok := out[key]
+	if !ok {
+		return 0
+	}
+	switch t := v.(type) {
+	case uint64:
+		return t
+	}
+	return 0
+}
+
+func getPidValue(key string, out map[string]interface{}) uint64 {
+	v, ok := out[key]
+	if !ok {
+		return 0
+	}
+	switch t := v.(type) {
+	case uint64:
+		return t
+	case string:
+		if t == "max" {
+			return math.MaxUint64
+		}
+	}
+	return 0
+}
+
+func readSingleFile(path string, file string, out map[string]interface{}) error {
+	f, err := os.Open(filepath.Join(path, file))
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	data, err := ioutil.ReadAll(f)
+	if err != nil {
+		return err
+	}
+	s := strings.TrimSpace(string(data))
+	v, err := parseUint(s, 10, 64)
+	if err != nil {
+		// if we cannot parse as a uint, parse as a string
+		out[file] = s
+		return nil
+	}
+	out[file] = v
+	return nil
+}
+
+func readKVStatsFile(path string, file string, out map[string]interface{}) error {
+	f, err := os.Open(filepath.Join(path, file))
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	s := bufio.NewScanner(f)
+	for s.Scan() {
+		if err := s.Err(); err != nil {
+			return err
+		}
+		name, value, err := parseKV(s.Text())
+		if err != nil {
+			return errors.Wrapf(err, "error while parsing %s (line=%q)", filepath.Join(path, file), s.Text())
+		}
+		out[name] = value
+	}
+	return nil
+}
+
+func (c *Manager) Freeze() error {
+	return c.freeze(c.path, Frozen)
+}
+
+func (c *Manager) Thaw() error {
+	return c.freeze(c.path, Thawed)
+}
+
+func (c *Manager) freeze(path string, state State) error {
+	values := state.Values()
+	for {
+		if err := writeValues(path, values); err != nil {
+			return err
+		}
+		current, err := fetchState(path)
+		if err != nil {
+			return err
+		}
+		if current == state {
+			return nil
+		}
+		time.Sleep(1 * time.Millisecond)
+	}
+}
+
+// MemoryEventFD returns inotify file descriptor and 'memory.events' inotify watch descriptor
+func (c *Manager) MemoryEventFD() (int, uint32, error) {
+	fpath := filepath.Join(c.path, "memory.events")
+	fd, err := syscall.InotifyInit()
+	if err != nil {
+		return 0, 0, errors.Errorf("Failed to create inotify fd")
+	}
+	wd, err := syscall.InotifyAddWatch(fd, fpath, unix.IN_MODIFY)
+	if wd < 0 {
+		syscall.Close(fd)
+		return 0, 0, errors.Errorf("Failed to add inotify watch for %q", fpath)
+	}
+
+	return fd, uint32(wd), nil
+}
+
+func (c *Manager) EventChan() (<-chan Event, <-chan error) {
+	ec := make(chan Event)
+	errCh := make(chan error)
+	go c.waitForEvents(ec, errCh)
+
+	return ec, nil
+}
+
+func (c *Manager) waitForEvents(ec chan<- Event, errCh chan<- error) {
+	fd, wd, err := c.MemoryEventFD()
+
+	defer syscall.InotifyRmWatch(fd, wd)
+	defer syscall.Close(fd)
+
+	if err != nil {
+		errCh <- err
+		return
+	}
+
+	for {
+		buffer := make([]byte, syscall.SizeofInotifyEvent*10)
+		bytesRead, err := syscall.Read(fd, buffer)
+		if err != nil {
+			errCh <- err
+			return
+		}
+		var out map[string]interface{}
+		if bytesRead >= syscall.SizeofInotifyEvent {
+			if err := readKVStatsFile(c.path, "memory.events", out); err != nil {
+				e := Event{
+					High:    out["high"].(uint64),
+					Low:     out["low"].(uint64),
+					Max:     out["max"].(uint64),
+					OOM:     out["oom"].(uint64),
+					OOMKill: out["oom_kill"].(uint64),
+				}
+				ec <- e
+			} else {
+				errCh <- err
+				return
+			}
+		}
+	}
+}
+
+func setDevices(path string, devices []specs.LinuxDeviceCgroup) error {
+	if len(devices) == 0 {
+		return nil
+	}
+	insts, license, err := DeviceFilter(devices)
+	if err != nil {
+		return err
+	}
+	dirFD, err := unix.Open(path, unix.O_DIRECTORY|unix.O_RDONLY, 0600)
+	if err != nil {
+		return errors.Errorf("cannot get dir FD for %s", path)
+	}
+	defer unix.Close(dirFD)
+	if _, err := LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
+		if !canSkipEBPFError(devices) {
+			return err
+		}
+	}
+	return nil
+}
+
+func NewSystemd(slice, group string, pid int, resources *Resources) (*Manager, error) {
+	if slice == "" {
+		slice = defaultSlice
+	}
+	path := filepath.Join(defaultCgroup2Path, slice, group)
+	conn, err := systemdDbus.New()
+	if err != nil {
+		return &Manager{}, err
+	}
+	defer conn.Close()
+
+	properties := []systemdDbus.Property{
+		systemdDbus.PropDescription(fmt.Sprintf("cgroup %s", group)),
+		newSystemdProperty("DefaultDependencies", false),
+		newSystemdProperty("MemoryAccounting", true),
+		newSystemdProperty("CPUAccounting", true),
+		newSystemdProperty("IOAccounting", true),
+	}
+
+	// if we create a slice, the parent is defined via a Wants=
+	if strings.HasSuffix(group, ".slice") {
+		properties = append(properties, systemdDbus.PropWants(defaultSlice))
+	} else {
+		// otherwise, we use Slice=
+		properties = append(properties, systemdDbus.PropSlice(defaultSlice))
+	}
+
+	// only add pid if its valid, -1 is used w/ general slice creation.
+	if pid != -1 {
+		properties = append(properties, newSystemdProperty("PIDs", []uint32{uint32(pid)}))
+	}
+
+	if resources.Memory != nil && *resources.Memory.Max != 0 {
+		properties = append(properties,
+			newSystemdProperty("MemoryMax", uint64(*resources.Memory.Max)))
+	}
+
+	if resources.CPU != nil && *resources.CPU.Weight != 0 {
+		properties = append(properties,
+			newSystemdProperty("CPUWeight", *resources.CPU.Weight))
+	}
+
+	if resources.CPU != nil && resources.CPU.Max != "" {
+		quota, period := resources.CPU.Max.extractQuotaAndPeriod()
+		// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
+		// corresponds to USEC_INFINITY in systemd
+		// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
+		// always setting a property value ensures we can apply a quota and remove it later
+		cpuQuotaPerSecUSec := uint64(math.MaxUint64)
+		if quota > 0 {
+			// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
+			// (integer percentage of CPU) internally.  This means that if a fractional percent of
+			// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
+			// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
+			cpuQuotaPerSecUSec = uint64(quota*1000000) / period
+			if cpuQuotaPerSecUSec%10000 != 0 {
+				cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
+			}
+		}
+		properties = append(properties,
+			newSystemdProperty("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
+	}
+
+	// If we can delegate, we add the property back in
+	if canDelegate {
+		properties = append(properties, newSystemdProperty("Delegate", true))
+	}
+
+	if resources.Pids != nil && resources.Pids.Max > 0 {
+		properties = append(properties,
+			newSystemdProperty("TasksAccounting", true),
+			newSystemdProperty("TasksMax", uint64(resources.Pids.Max)))
+	}
+
+	statusChan := make(chan string, 1)
+	if _, err := conn.StartTransientUnit(group, "replace", properties, statusChan); err == nil {
+		select {
+		case <-statusChan:
+		case <-time.After(time.Second):
+			logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", group)
+		}
+	} else if !isUnitExists(err) {
+		return &Manager{}, err
+	}
+
+	return &Manager{
+		path: path,
+	}, nil
+}
+
+func LoadSystemd(slice, group string) (*Manager, error) {
+	if slice == "" {
+		slice = defaultSlice
+	}
+	group = filepath.Join(defaultCgroup2Path, slice, group)
+	return &Manager{
+		path: group,
+	}, nil
+}
+
+func (c *Manager) DeleteSystemd() error {
+	conn, err := systemdDbus.New()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+	group := systemdUnitFromPath(c.path)
+	ch := make(chan string)
+	_, err = conn.StopUnit(group, "replace", ch)
+	if err != nil {
+		return err
+	}
+	<-ch
+	return nil
+}
+
+func newSystemdProperty(name string, units interface{}) systemdDbus.Property {
+	return systemdDbus.Property{
+		Name:  name,
+		Value: dbus.MakeVariant(units),
+	}
+}

+ 52 - 0
vendor/github.com/containerd/cgroups/v2/memory.go

@@ -0,0 +1,52 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+type Memory struct {
+	Swap *int64
+	Max  *int64
+	Low  *int64
+	High *int64
+}
+
+func (r *Memory) Values() (o []Value) {
+	if r.Swap != nil {
+		o = append(o, Value{
+			filename: "memory.swap.max",
+			value:    *r.Swap,
+		})
+	}
+	if r.Max != nil {
+		o = append(o, Value{
+			filename: "memory.max",
+			value:    *r.Max,
+		})
+	}
+	if r.Low != nil {
+		o = append(o, Value{
+			filename: "memory.low",
+			value:    *r.Low,
+		})
+	}
+	if r.High != nil {
+		o = append(o, Value{
+			filename: "memory.high",
+			value:    *r.High,
+		})
+	}
+	return o
+}

+ 60 - 0
vendor/github.com/containerd/cgroups/v2/paths.go

@@ -0,0 +1,60 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import (
+	"fmt"
+	"path/filepath"
+	"strings"
+)
+
+// NestedGroupPath will nest the cgroups based on the calling processes cgroup
+// placing its child processes inside its own path
+func NestedGroupPath(suffix string) (string, error) {
+	path, err := parseCgroupFile("/proc/self/cgroup")
+	if err != nil {
+		return "", err
+	}
+	return filepath.Join(string(path), suffix), nil
+}
+
+// PidGroupPath will return the correct cgroup paths for an existing process running inside a cgroup
+// This is commonly used for the Load function to restore an existing container
+func PidGroupPath(pid int) (string, error) {
+	p := fmt.Sprintf("/proc/%d/cgroup", pid)
+	return parseCgroupFile(p)
+}
+
+// VerifyGroupPath verifies the format of group path string g.
+// The format is same as the third field in /proc/PID/cgroup.
+// e.g. "/user.slice/user-1001.slice/session-1.scope"
+//
+// g must be a "clean" absolute path starts with "/", and must not contain "/sys/fs/cgroup" prefix.
+//
+// VerifyGroupPath doesn't verify whether g actually exists on the system.
+func VerifyGroupPath(g string) error {
+	if !strings.HasPrefix(g, "/") {
+		return ErrInvalidGroupPath
+	}
+	if filepath.Clean(g) != g {
+		return ErrInvalidGroupPath
+	}
+	if strings.HasPrefix(g, "/sys/fs/cgroup") {
+		return ErrInvalidGroupPath
+	}
+	return nil
+}

+ 37 - 0
vendor/github.com/containerd/cgroups/v2/pids.go

@@ -0,0 +1,37 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import "strconv"
+
+type Pids struct {
+	Max int64
+}
+
+func (r *Pids) Values() (o []Value) {
+	if r.Max != 0 {
+		limit := "max"
+		if r.Max > 0 {
+			limit = strconv.FormatInt(r.Max, 10)
+		}
+		o = append(o, Value{
+			filename: "pids.max",
+			value:    limit,
+		})
+	}
+	return o
+}

+ 46 - 0
vendor/github.com/containerd/cgroups/v2/rdma.go

@@ -0,0 +1,46 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import (
+	"fmt"
+)
+
+type RDMA struct {
+	Limit []RDMAEntry
+}
+
+type RDMAEntry struct {
+	Device     string
+	HcaHandles uint32
+	HcaObjects uint32
+}
+
+func (r RDMAEntry) String() string {
+	return fmt.Sprintf("%s hca_handle=%d hca_object=%d", r.Device, r.HcaHandles, r.HcaObjects)
+}
+
+func (r *RDMA) Values() (o []Value) {
+	for _, e := range r.Limit {
+		o = append(o, Value{
+			filename: "rdma.max",
+			value:    e.String(),
+		})
+	}
+
+	return o
+}

+ 65 - 0
vendor/github.com/containerd/cgroups/v2/state.go

@@ -0,0 +1,65 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import (
+	"io/ioutil"
+	"path/filepath"
+	"strings"
+)
+
+// State is a type that represents the state of the current cgroup
+type State string
+
+const (
+	Unknown State = ""
+	Thawed  State = "thawed"
+	Frozen  State = "frozen"
+	Deleted State = "deleted"
+
+	cgroupFreeze = "cgroup.freeze"
+)
+
+func (s State) Values() []Value {
+	v := Value{
+		filename: cgroupFreeze,
+	}
+	switch s {
+	case Frozen:
+		v.value = "1"
+	case Thawed:
+		v.value = "0"
+	}
+	return []Value{
+		v,
+	}
+}
+
+func fetchState(path string) (State, error) {
+	current, err := ioutil.ReadFile(filepath.Join(path, cgroupFreeze))
+	if err != nil {
+		return Unknown, err
+	}
+	switch strings.TrimSpace(string(current)) {
+	case "1":
+		return Frozen, nil
+	case "0":
+		return Thawed, nil
+	default:
+		return Unknown, nil
+	}
+}

+ 442 - 0
vendor/github.com/containerd/cgroups/v2/utils.go

@@ -0,0 +1,442 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"math"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/godbus/dbus/v5"
+
+	"github.com/containerd/cgroups/v2/stats"
+	"github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
+)
+
+const (
+	cgroupProcs    = "cgroup.procs"
+	defaultDirPerm = 0755
+)
+
+// defaultFilePerm is a var so that the test framework can change the filemode
+// of all files created when the tests are running.  The difference between the
+// tests and real world use is that files like "cgroup.procs" will exist when writing
+// to a read cgroup filesystem and do not exist prior when running in the tests.
+// this is set to a non 0 value in the test code
+var defaultFilePerm = os.FileMode(0)
+
+// remove will remove a cgroup path handling EAGAIN and EBUSY errors and
+// retrying the remove after a exp timeout
+func remove(path string) error {
+	var err error
+	delay := 10 * time.Millisecond
+	for i := 0; i < 5; i++ {
+		if i != 0 {
+			time.Sleep(delay)
+			delay *= 2
+		}
+		if err = os.RemoveAll(path); err == nil {
+			return nil
+		}
+	}
+	return errors.Wrapf(err, "cgroups: unable to remove path %q", path)
+}
+
+// parseCgroupProcsFile parses /sys/fs/cgroup/$GROUPPATH/cgroup.procs
+func parseCgroupProcsFile(path string) ([]uint64, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+	var (
+		out []uint64
+		s   = bufio.NewScanner(f)
+	)
+	for s.Scan() {
+		if t := s.Text(); t != "" {
+			pid, err := strconv.ParseUint(t, 10, 0)
+			if err != nil {
+				return nil, err
+			}
+			out = append(out, pid)
+		}
+	}
+	return out, nil
+}
+
+func parseKV(raw string) (string, interface{}, error) {
+	parts := strings.Fields(raw)
+	switch len(parts) {
+	case 2:
+		v, err := parseUint(parts[1], 10, 64)
+		if err != nil {
+			// if we cannot parse as a uint, parse as a string
+			return parts[0], parts[1], nil
+		}
+		return parts[0], v, nil
+	default:
+		return "", 0, ErrInvalidFormat
+	}
+}
+
+func readUint(path string) (uint64, error) {
+	v, err := ioutil.ReadFile(path)
+	if err != nil {
+		return 0, err
+	}
+	return parseUint(strings.TrimSpace(string(v)), 10, 64)
+}
+
+func parseUint(s string, base, bitSize int) (uint64, error) {
+	v, err := strconv.ParseUint(s, base, bitSize)
+	if err != nil {
+		intValue, intErr := strconv.ParseInt(s, base, bitSize)
+		// 1. Handle negative values greater than MinInt64 (and)
+		// 2. Handle negative values lesser than MinInt64
+		if intErr == nil && intValue < 0 {
+			return 0, nil
+		} else if intErr != nil &&
+			intErr.(*strconv.NumError).Err == strconv.ErrRange &&
+			intValue < 0 {
+			return 0, nil
+		}
+		return 0, err
+	}
+	return v, nil
+}
+
+// parseCgroupFile parses /proc/PID/cgroup file and return string
+func parseCgroupFile(path string) (string, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return "", err
+	}
+	defer f.Close()
+	return parseCgroupFromReader(f)
+}
+
+func parseCgroupFromReader(r io.Reader) (string, error) {
+	var (
+		s = bufio.NewScanner(r)
+	)
+	for s.Scan() {
+		if err := s.Err(); err != nil {
+			return "", err
+		}
+		var (
+			text  = s.Text()
+			parts = strings.SplitN(text, ":", 3)
+		)
+		if len(parts) < 3 {
+			return "", fmt.Errorf("invalid cgroup entry: %q", text)
+		}
+		// text is like "0::/user.slice/user-1001.slice/session-1.scope"
+		if parts[0] == "0" && parts[1] == "" {
+			return parts[2], nil
+		}
+	}
+	return "", fmt.Errorf("cgroup path not found")
+}
+
+// ToResources converts the oci LinuxResources struct into a
+// v2 Resources type for use with this package.
+//
+// converting cgroups configuration from v1 to v2
+// ref: https://github.com/containers/crun/blob/master/crun.1.md#cgroup-v2
+func ToResources(spec *specs.LinuxResources) *Resources {
+	var resources Resources
+	if cpu := spec.CPU; cpu != nil {
+		resources.CPU = &CPU{
+			Cpus: cpu.Cpus,
+			Mems: cpu.Mems,
+		}
+		if shares := cpu.Shares; shares != nil {
+			convertedWeight := (1 + ((*shares-2)*9999)/262142)
+			resources.CPU.Weight = &convertedWeight
+		}
+		if period := cpu.Period; period != nil {
+			resources.CPU.Max = NewCPUMax(cpu.Quota, period)
+		}
+	}
+	if mem := spec.Memory; mem != nil {
+		resources.Memory = &Memory{}
+		if swap := mem.Swap; swap != nil {
+			resources.Memory.Swap = swap
+		}
+		if l := mem.Limit; l != nil {
+			resources.Memory.Max = l
+		}
+		if l := mem.Reservation; l != nil {
+			resources.Memory.Low = l
+		}
+	}
+	if hugetlbs := spec.HugepageLimits; hugetlbs != nil {
+		hugeTlbUsage := HugeTlb{}
+		for _, hugetlb := range hugetlbs {
+			hugeTlbUsage = append(hugeTlbUsage, HugeTlbEntry{
+				HugePageSize: hugetlb.Pagesize,
+				Limit:        hugetlb.Limit,
+			})
+		}
+		resources.HugeTlb = &hugeTlbUsage
+	}
+	if pids := spec.Pids; pids != nil {
+		resources.Pids = &Pids{
+			Max: pids.Limit,
+		}
+	}
+	if i := spec.BlockIO; i != nil {
+		resources.IO = &IO{}
+		if i.Weight != nil {
+			resources.IO.BFQ.Weight = 1 + (*i.Weight-10)*9999/990
+		}
+		for t, devices := range map[IOType][]specs.LinuxThrottleDevice{
+			ReadBPS:   i.ThrottleReadBpsDevice,
+			WriteBPS:  i.ThrottleWriteBpsDevice,
+			ReadIOPS:  i.ThrottleReadIOPSDevice,
+			WriteIOPS: i.ThrottleWriteIOPSDevice,
+		} {
+			for _, d := range devices {
+				resources.IO.Max = append(resources.IO.Max, Entry{
+					Type:  t,
+					Major: d.Major,
+					Minor: d.Minor,
+					Rate:  d.Rate,
+				})
+			}
+		}
+	}
+	if i := spec.Rdma; i != nil {
+		resources.RDMA = &RDMA{}
+		for device, value := range spec.Rdma {
+			if device != "" && (value.HcaHandles != nil || value.HcaObjects != nil) {
+				resources.RDMA.Limit = append(resources.RDMA.Limit, RDMAEntry{
+					Device:     device,
+					HcaHandles: *value.HcaHandles,
+					HcaObjects: *value.HcaObjects,
+				})
+			}
+		}
+	}
+
+	return &resources
+}
+
+// Gets uint64 parsed content of single value cgroup stat file
+func getStatFileContentUint64(filePath string) uint64 {
+	contents, err := ioutil.ReadFile(filePath)
+	if err != nil {
+		return 0
+	}
+	trimmed := strings.TrimSpace(string(contents))
+	if trimmed == "max" {
+		return math.MaxUint64
+	}
+
+	res, err := parseUint(trimmed, 10, 64)
+	if err != nil {
+		logrus.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), filePath)
+		return res
+	}
+
+	return res
+}
+
+func readIoStats(path string) []*stats.IOEntry {
+	// more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
+	var usage []*stats.IOEntry
+	fpath := filepath.Join(path, "io.stat")
+	currentData, err := ioutil.ReadFile(fpath)
+	if err != nil {
+		return usage
+	}
+	entries := strings.Split(string(currentData), "\n")
+
+	for _, entry := range entries {
+		parts := strings.Split(entry, " ")
+		if len(parts) < 2 {
+			continue
+		}
+		majmin := strings.Split(parts[0], ":")
+		if len(majmin) != 2 {
+			continue
+		}
+		major, err := strconv.ParseUint(majmin[0], 10, 0)
+		if err != nil {
+			return usage
+		}
+		minor, err := strconv.ParseUint(majmin[1], 10, 0)
+		if err != nil {
+			return usage
+		}
+		parts = parts[1:]
+		ioEntry := stats.IOEntry{
+			Major: major,
+			Minor: minor,
+		}
+		for _, stats := range parts {
+			keyPairValue := strings.Split(stats, "=")
+			if len(keyPairValue) != 2 {
+				continue
+			}
+			v, err := strconv.ParseUint(keyPairValue[1], 10, 0)
+			if err != nil {
+				continue
+			}
+			switch keyPairValue[0] {
+			case "rbytes":
+				ioEntry.Rbytes = v
+			case "wbytes":
+				ioEntry.Wbytes = v
+			case "rios":
+				ioEntry.Rios = v
+			case "wios":
+				ioEntry.Wios = v
+			}
+		}
+		usage = append(usage, &ioEntry)
+	}
+	return usage
+}
+
+func rdmaStats(filepath string) []*stats.RdmaEntry {
+	currentData, err := ioutil.ReadFile(filepath)
+	if err != nil {
+		return []*stats.RdmaEntry{}
+	}
+	return toRdmaEntry(strings.Split(string(currentData), "\n"))
+}
+
+func parseRdmaKV(raw string, entry *stats.RdmaEntry) {
+	var value uint64
+	var err error
+
+	parts := strings.Split(raw, "=")
+	switch len(parts) {
+	case 2:
+		if parts[1] == "max" {
+			value = math.MaxUint32
+		} else {
+			value, err = parseUint(parts[1], 10, 32)
+			if err != nil {
+				return
+			}
+		}
+		if parts[0] == "hca_handle" {
+			entry.HcaHandles = uint32(value)
+		} else if parts[0] == "hca_object" {
+			entry.HcaObjects = uint32(value)
+		}
+	}
+}
+
+func toRdmaEntry(strEntries []string) []*stats.RdmaEntry {
+	var rdmaEntries []*stats.RdmaEntry
+	for i := range strEntries {
+		parts := strings.Fields(strEntries[i])
+		switch len(parts) {
+		case 3:
+			entry := new(stats.RdmaEntry)
+			entry.Device = parts[0]
+			parseRdmaKV(parts[1], entry)
+			parseRdmaKV(parts[2], entry)
+
+			rdmaEntries = append(rdmaEntries, entry)
+		default:
+			continue
+		}
+	}
+	return rdmaEntries
+}
+
+// isUnitExists returns true if the error is that a systemd unit already exists.
+func isUnitExists(err error) bool {
+	if err != nil {
+		if dbusError, ok := err.(dbus.Error); ok {
+			return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists")
+		}
+	}
+	return false
+}
+
+func systemdUnitFromPath(path string) string {
+	_, unit := filepath.Split(path)
+	return unit
+}
+
+func readHugeTlbStats(path string) []*stats.HugeTlbStat {
+	var usage = []*stats.HugeTlbStat{}
+	var keyUsage = make(map[string]*stats.HugeTlbStat)
+	f, err := os.Open(path)
+	if err != nil {
+		return usage
+	}
+	files, err := f.Readdir(-1)
+	f.Close()
+	if err != nil {
+		return usage
+	}
+
+	for _, file := range files {
+		if strings.Contains(file.Name(), "hugetlb") &&
+			(strings.HasSuffix(file.Name(), "max") || strings.HasSuffix(file.Name(), "current")) {
+			var hugeTlb *stats.HugeTlbStat
+			var ok bool
+			fileName := strings.Split(file.Name(), ".")
+			pageSize := fileName[1]
+			if hugeTlb, ok = keyUsage[pageSize]; !ok {
+				hugeTlb = &stats.HugeTlbStat{}
+			}
+			hugeTlb.Pagesize = pageSize
+			out, err := ioutil.ReadFile(filepath.Join(path, file.Name()))
+			if err != nil {
+				continue
+			}
+			var value uint64
+			stringVal := strings.TrimSpace(string(out))
+			if stringVal == "max" {
+				value = math.MaxUint64
+			} else {
+				value, err = strconv.ParseUint(stringVal, 10, 64)
+			}
+			if err != nil {
+				continue
+			}
+			switch fileName[2] {
+			case "max":
+				hugeTlb.Max = value
+			case "current":
+				hugeTlb.Current = value
+			}
+			keyUsage[pageSize] = hugeTlb
+		}
+	}
+	for _, entry := range keyUsage {
+		usage = append(usage, entry)
+	}
+	return usage
+}