Browse Source

cgroup2: implement `docker info`

ref: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
Akihiro Suda 5 năm trước cách đây
mục cha
commit
f350b53241
66 tập tin đã thay đổi với 7376 bổ sung89 xóa
  1. 7 0
      api/swagger.yaml
  2. 1 0
      api/types/types.go
  3. 6 1
      cmd/dockerd/daemon.go
  4. 3 3
      daemon/daemon.go
  5. 15 2
      daemon/daemon_unix.go
  6. 10 1
      daemon/daemon_unsupported.go
  7. 5 0
      daemon/daemon_windows.go
  8. 1 2
      daemon/info.go
  9. 43 26
      daemon/info_unix.go
  10. 1 0
      docs/api/version-history.md
  11. 151 0
      pkg/sysinfo/cgroup2_linux.go
  12. 3 0
      pkg/sysinfo/sysinfo.go
  13. 28 46
      pkg/sysinfo/sysinfo_linux.go
  14. 6 1
      pkg/sysinfo/sysinfo_unix.go
  15. 6 1
      pkg/sysinfo/sysinfo_windows.go
  16. 13 4
      runconfig/config.go
  17. 3 2
      runconfig/config_test.go
  18. 1 0
      vendor.conf
  19. 23 0
      vendor/github.com/cilium/ebpf/LICENSE
  20. 203 0
      vendor/github.com/cilium/ebpf/abi.go
  21. 149 0
      vendor/github.com/cilium/ebpf/asm/alu.go
  22. 107 0
      vendor/github.com/cilium/ebpf/asm/alu_string.go
  23. 2 0
      vendor/github.com/cilium/ebpf/asm/doc.go
  24. 143 0
      vendor/github.com/cilium/ebpf/asm/func.go
  25. 133 0
      vendor/github.com/cilium/ebpf/asm/func_string.go
  26. 416 0
      vendor/github.com/cilium/ebpf/asm/instruction.go
  27. 109 0
      vendor/github.com/cilium/ebpf/asm/jump.go
  28. 53 0
      vendor/github.com/cilium/ebpf/asm/jump_string.go
  29. 189 0
      vendor/github.com/cilium/ebpf/asm/load_store.go
  30. 80 0
      vendor/github.com/cilium/ebpf/asm/load_store_string.go
  31. 237 0
      vendor/github.com/cilium/ebpf/asm/opcode.go
  32. 38 0
      vendor/github.com/cilium/ebpf/asm/opcode_string.go
  33. 42 0
      vendor/github.com/cilium/ebpf/asm/register.go
  34. 148 0
      vendor/github.com/cilium/ebpf/collection.go
  35. 17 0
      vendor/github.com/cilium/ebpf/doc.go
  36. 392 0
      vendor/github.com/cilium/ebpf/elf_reader.go
  37. 8 0
      vendor/github.com/cilium/ebpf/go.mod
  38. 64 0
      vendor/github.com/cilium/ebpf/internal/cpu.go
  39. 24 0
      vendor/github.com/cilium/ebpf/internal/endian.go
  40. 85 0
      vendor/github.com/cilium/ebpf/internal/feature.go
  41. 127 0
      vendor/github.com/cilium/ebpf/internal/unix/types_linux.go
  42. 193 0
      vendor/github.com/cilium/ebpf/internal/unix/types_other.go
  43. 58 0
      vendor/github.com/cilium/ebpf/linker.go
  44. 604 0
      vendor/github.com/cilium/ebpf/map.go
  45. 192 0
      vendor/github.com/cilium/ebpf/marshalers.go
  46. 504 0
      vendor/github.com/cilium/ebpf/prog.go
  47. 14 0
      vendor/github.com/cilium/ebpf/ptr_32_be.go
  48. 14 0
      vendor/github.com/cilium/ebpf/ptr_32_le.go
  49. 14 0
      vendor/github.com/cilium/ebpf/ptr_64.go
  50. 20 0
      vendor/github.com/cilium/ebpf/readme.md
  51. 447 0
      vendor/github.com/cilium/ebpf/syscalls.go
  52. 189 0
      vendor/github.com/cilium/ebpf/types.go
  53. 78 0
      vendor/github.com/cilium/ebpf/types_string.go
  54. 83 0
      vendor/github.com/containerd/cgroups/v2/cpu.go
  55. 199 0
      vendor/github.com/containerd/cgroups/v2/devicefilter.go
  56. 83 0
      vendor/github.com/containerd/cgroups/v2/ebpf.go
  57. 50 0
      vendor/github.com/containerd/cgroups/v2/errors.go
  58. 37 0
      vendor/github.com/containerd/cgroups/v2/hugetlb.go
  59. 64 0
      vendor/github.com/containerd/cgroups/v2/io.go
  60. 739 0
      vendor/github.com/containerd/cgroups/v2/manager.go
  61. 52 0
      vendor/github.com/containerd/cgroups/v2/memory.go
  62. 60 0
      vendor/github.com/containerd/cgroups/v2/paths.go
  63. 37 0
      vendor/github.com/containerd/cgroups/v2/pids.go
  64. 46 0
      vendor/github.com/containerd/cgroups/v2/rdma.go
  65. 65 0
      vendor/github.com/containerd/cgroups/v2/state.go
  66. 442 0
      vendor/github.com/containerd/cgroups/v2/utils.go

+ 7 - 0
api/swagger.yaml

@@ -4047,6 +4047,13 @@ definitions:
         enum: ["cgroupfs", "systemd", "none"]
         enum: ["cgroupfs", "systemd", "none"]
         default: "cgroupfs"
         default: "cgroupfs"
         example: "cgroupfs"
         example: "cgroupfs"
+      CgroupVersion:
+        description: |
+          The version of the cgroup.
+        type: "string"
+        enum: ["1", "2"]
+        default: "1"
+        example: "1"
       NEventsListener:
       NEventsListener:
         description: "Number of event listeners subscribed."
         description: "Number of event listeners subscribed."
         type: "integer"
         type: "integer"

+ 1 - 0
api/types/types.go

@@ -175,6 +175,7 @@ type Info struct {
 	SystemTime         string
 	SystemTime         string
 	LoggingDriver      string
 	LoggingDriver      string
 	CgroupDriver       string
 	CgroupDriver       string
+	CgroupVersion      string `json:",omitempty"`
 	NEventsListener    int
 	NEventsListener    int
 	KernelVersion      string
 	KernelVersion      string
 	OperatingSystem    string
 	OperatingSystem    string

+ 6 - 1
cmd/dockerd/daemon.go

@@ -45,6 +45,7 @@ import (
 	"github.com/docker/docker/pkg/pidfile"
 	"github.com/docker/docker/pkg/pidfile"
 	"github.com/docker/docker/pkg/plugingetter"
 	"github.com/docker/docker/pkg/plugingetter"
 	"github.com/docker/docker/pkg/signal"
 	"github.com/docker/docker/pkg/signal"
+	"github.com/docker/docker/pkg/sysinfo"
 	"github.com/docker/docker/pkg/system"
 	"github.com/docker/docker/pkg/system"
 	"github.com/docker/docker/plugin"
 	"github.com/docker/docker/plugin"
 	"github.com/docker/docker/rootless"
 	"github.com/docker/docker/rootless"
@@ -456,7 +457,11 @@ func warnOnDeprecatedConfigOptions(config *config.Config) {
 }
 }
 
 
 func initRouter(opts routerOptions) {
 func initRouter(opts routerOptions) {
-	decoder := runconfig.ContainerDecoder{}
+	decoder := runconfig.ContainerDecoder{
+		GetSysInfo: func() *sysinfo.SysInfo {
+			return opts.daemon.RawSysInfo(true)
+		},
+	}
 
 
 	routers := []router.Router{
 	routers := []router.Router{
 		// we need to add the checkpoint router before the container router or the DELETE gets masked
 		// we need to add the checkpoint router before the container router or the DELETE gets masked

+ 3 - 3
daemon/daemon.go

@@ -42,6 +42,7 @@ import (
 	"github.com/docker/docker/errdefs"
 	"github.com/docker/docker/errdefs"
 	bkconfig "github.com/moby/buildkit/cmd/buildkitd/config"
 	bkconfig "github.com/moby/buildkit/cmd/buildkitd/config"
 	"github.com/moby/buildkit/util/resolver"
 	"github.com/moby/buildkit/util/resolver"
+	rsystem "github.com/opencontainers/runc/libcontainer/system"
 	"github.com/sirupsen/logrus"
 	"github.com/sirupsen/logrus"
 
 
 	// register graph drivers
 	// register graph drivers
@@ -56,7 +57,6 @@ import (
 	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/idtools"
 	"github.com/docker/docker/pkg/locker"
 	"github.com/docker/docker/pkg/locker"
 	"github.com/docker/docker/pkg/plugingetter"
 	"github.com/docker/docker/pkg/plugingetter"
-	"github.com/docker/docker/pkg/sysinfo"
 	"github.com/docker/docker/pkg/system"
 	"github.com/docker/docker/pkg/system"
 	"github.com/docker/docker/pkg/truncindex"
 	"github.com/docker/docker/pkg/truncindex"
 	"github.com/docker/docker/plugin"
 	"github.com/docker/docker/plugin"
@@ -1026,10 +1026,10 @@ func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.S
 		return nil, err
 		return nil, err
 	}
 	}
 
 
-	sysInfo := sysinfo.New(false)
+	sysInfo := d.RawSysInfo(false)
 	// Check if Devices cgroup is mounted, it is hard requirement for container security,
 	// Check if Devices cgroup is mounted, it is hard requirement for container security,
 	// on Linux.
 	// on Linux.
-	if runtime.GOOS == "linux" && !sysInfo.CgroupDevicesEnabled {
+	if runtime.GOOS == "linux" && !sysInfo.CgroupDevicesEnabled && !rsystem.RunningInUserNS() {
 		return nil, errors.New("Devices cgroup isn't mounted")
 		return nil, errors.New("Devices cgroup isn't mounted")
 	}
 	}
 
 

+ 15 - 2
daemon/daemon_unix.go

@@ -644,7 +644,7 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes.
 	if hostConfig == nil {
 	if hostConfig == nil {
 		return nil, nil
 		return nil, nil
 	}
 	}
-	sysInfo := sysinfo.New(true)
+	sysInfo := daemon.RawSysInfo(true)
 
 
 	w, err := verifyPlatformContainerResources(&hostConfig.Resources, sysInfo, update)
 	w, err := verifyPlatformContainerResources(&hostConfig.Resources, sysInfo, update)
 
 
@@ -1745,7 +1745,7 @@ func (daemon *Daemon) initCgroupsPath(path string) error {
 	}
 	}
 
 
 	path = filepath.Join(mnt, root, path)
 	path = filepath.Join(mnt, root, path)
-	sysInfo := sysinfo.New(true)
+	sysInfo := daemon.RawSysInfo(true)
 	if err := maybeCreateCPURealTimeFile(sysInfo.CPURealtimePeriod, daemon.configStore.CPURealtimePeriod, "cpu.rt_period_us", path); err != nil {
 	if err := maybeCreateCPURealTimeFile(sysInfo.CPURealtimePeriod, daemon.configStore.CPURealtimePeriod, "cpu.rt_period_us", path); err != nil {
 		return err
 		return err
 	}
 	}
@@ -1779,3 +1779,16 @@ func (daemon *Daemon) setupSeccompProfile() error {
 func (daemon *Daemon) useShimV2() bool {
 func (daemon *Daemon) useShimV2() bool {
 	return cgroups.IsCgroup2UnifiedMode()
 	return cgroups.IsCgroup2UnifiedMode()
 }
 }
+
+// RawSysInfo returns *sysinfo.SysInfo .
+func (daemon *Daemon) RawSysInfo(quiet bool) *sysinfo.SysInfo {
+	var opts []sysinfo.Opt
+	if daemon.getCgroupDriver() == cgroupSystemdDriver {
+		rootlesskitParentEUID := os.Getenv("ROOTLESSKIT_PARENT_EUID")
+		if rootlesskitParentEUID != "" {
+			groupPath := fmt.Sprintf("/user.slice/user-%s.slice", rootlesskitParentEUID)
+			opts = append(opts, sysinfo.WithCgroup2GroupPath(groupPath))
+		}
+	}
+	return sysinfo.New(quiet, opts...)
+}

+ 10 - 1
daemon/daemon_unsupported.go

@@ -1,9 +1,18 @@
 // +build !linux,!freebsd,!windows
 // +build !linux,!freebsd,!windows
 
 
 package daemon // import "github.com/docker/docker/daemon"
 package daemon // import "github.com/docker/docker/daemon"
-import "github.com/docker/docker/daemon/config"
+
+import (
+	"github.com/docker/docker/daemon/config"
+	"github.com/docker/docker/pkg/sysinfo"
+)
 
 
 const platformSupported = false
 const platformSupported = false
 
 
 func setupResolvConf(config *config.Config) {
 func setupResolvConf(config *config.Config) {
 }
 }
+
+// RawSysInfo returns *sysinfo.SysInfo .
+func (daemon *Daemon) RawSysInfo(quiet bool) *sysinfo.SysInfo {
+	return sysinfo.New(quiet)
+}

+ 5 - 0
daemon/daemon_windows.go

@@ -657,3 +657,8 @@ func setupResolvConf(config *config.Config) {
 func (daemon *Daemon) useShimV2() bool {
 func (daemon *Daemon) useShimV2() bool {
 	return true
 	return true
 }
 }
+
+// RawSysInfo returns *sysinfo.SysInfo .
+func (daemon *Daemon) RawSysInfo(quiet bool) *sysinfo.SysInfo {
+	return sysinfo.New(quiet)
+}

+ 1 - 2
daemon/info.go

@@ -28,7 +28,7 @@ import (
 func (daemon *Daemon) SystemInfo() *types.Info {
 func (daemon *Daemon) SystemInfo() *types.Info {
 	defer metrics.StartTimer(hostInfoFunctions.WithValues("system_info"))()
 	defer metrics.StartTimer(hostInfoFunctions.WithValues("system_info"))()
 
 
-	sysInfo := sysinfo.New(true)
+	sysInfo := daemon.RawSysInfo(true)
 	cRunning, cPaused, cStopped := stateCtr.get()
 	cRunning, cPaused, cStopped := stateCtr.get()
 
 
 	v := &types.Info{
 	v := &types.Info{
@@ -47,7 +47,6 @@ func (daemon *Daemon) SystemInfo() *types.Info {
 		NGoroutines:        runtime.NumGoroutine(),
 		NGoroutines:        runtime.NumGoroutine(),
 		SystemTime:         time.Now().Format(time.RFC3339Nano),
 		SystemTime:         time.Now().Format(time.RFC3339Nano),
 		LoggingDriver:      daemon.defaultLogConfig.Type,
 		LoggingDriver:      daemon.defaultLogConfig.Type,
-		CgroupDriver:       daemon.getCgroupDriver(),
 		NEventsListener:    daemon.EventsService.SubscribersCount(),
 		NEventsListener:    daemon.EventsService.SubscribersCount(),
 		KernelVersion:      kernelVersion(),
 		KernelVersion:      kernelVersion(),
 		OperatingSystem:    operatingSystem(),
 		OperatingSystem:    operatingSystem(),

+ 43 - 26
daemon/info_unix.go

@@ -19,6 +19,12 @@ import (
 
 
 // fillPlatformInfo fills the platform related info.
 // fillPlatformInfo fills the platform related info.
 func (daemon *Daemon) fillPlatformInfo(v *types.Info, sysInfo *sysinfo.SysInfo) {
 func (daemon *Daemon) fillPlatformInfo(v *types.Info, sysInfo *sysinfo.SysInfo) {
+	v.CgroupDriver = daemon.getCgroupDriver()
+	v.CgroupVersion = "1"
+	if sysInfo.CgroupUnified {
+		v.CgroupVersion = "2"
+	}
+
 	v.MemoryLimit = sysInfo.MemoryLimit
 	v.MemoryLimit = sysInfo.MemoryLimit
 	v.SwapLimit = sysInfo.SwapLimit
 	v.SwapLimit = sysInfo.SwapLimit
 	v.KernelMemory = sysInfo.KernelMemory
 	v.KernelMemory = sysInfo.KernelMemory
@@ -81,32 +87,43 @@ func (daemon *Daemon) fillPlatformInfo(v *types.Info, sysInfo *sysinfo.SysInfo)
 		v.InitCommit.ID = "N/A"
 		v.InitCommit.ID = "N/A"
 	}
 	}
 
 
-	if !v.MemoryLimit {
-		v.Warnings = append(v.Warnings, "WARNING: No memory limit support")
-	}
-	if !v.SwapLimit {
-		v.Warnings = append(v.Warnings, "WARNING: No swap limit support")
-	}
-	if !v.KernelMemory {
-		v.Warnings = append(v.Warnings, "WARNING: No kernel memory limit support")
-	}
-	if !v.KernelMemoryTCP {
-		v.Warnings = append(v.Warnings, "WARNING: No kernel memory TCP limit support")
-	}
-	if !v.OomKillDisable {
-		v.Warnings = append(v.Warnings, "WARNING: No oom kill disable support")
-	}
-	if !v.CPUCfsQuota {
-		v.Warnings = append(v.Warnings, "WARNING: No cpu cfs quota support")
-	}
-	if !v.CPUCfsPeriod {
-		v.Warnings = append(v.Warnings, "WARNING: No cpu cfs period support")
-	}
-	if !v.CPUShares {
-		v.Warnings = append(v.Warnings, "WARNING: No cpu shares support")
-	}
-	if !v.CPUSet {
-		v.Warnings = append(v.Warnings, "WARNING: No cpuset support")
+	if v.CgroupDriver == cgroupNoneDriver {
+		if v.CgroupVersion == "2" {
+			v.Warnings = append(v.Warnings, "WARNING: Running in rootless-mode without cgroup. To enable cgroup in rootless-mode, you need to set exec-opt \"native.cgroupdriver=systemd\".")
+		} else {
+			v.Warnings = append(v.Warnings, "WARNING: Running in rootless-mode without cgroup. To enable cgroup in rootless-mode, you need to boot the system in cgroup v2 mode and set exec-opt \"native.cgroupdriver=systemd\".")
+		}
+	} else {
+		if !v.MemoryLimit {
+			v.Warnings = append(v.Warnings, "WARNING: No memory limit support")
+		}
+		if !v.SwapLimit {
+			v.Warnings = append(v.Warnings, "WARNING: No swap limit support")
+		}
+		if !v.KernelMemory {
+			v.Warnings = append(v.Warnings, "WARNING: No kernel memory limit support")
+		}
+		if !v.KernelMemoryTCP {
+			v.Warnings = append(v.Warnings, "WARNING: No kernel memory TCP limit support")
+		}
+		if !v.OomKillDisable {
+			v.Warnings = append(v.Warnings, "WARNING: No oom kill disable support")
+		}
+		if !v.CPUCfsQuota {
+			v.Warnings = append(v.Warnings, "WARNING: No cpu cfs quota support")
+		}
+		if !v.CPUCfsPeriod {
+			v.Warnings = append(v.Warnings, "WARNING: No cpu cfs period support")
+		}
+		if !v.CPUShares {
+			v.Warnings = append(v.Warnings, "WARNING: No cpu shares support")
+		}
+		if !v.CPUSet {
+			v.Warnings = append(v.Warnings, "WARNING: No cpuset support")
+		}
+		if v.CgroupVersion == "2" {
+			v.Warnings = append(v.Warnings, "WARNING: Support for cgroup v2 is experimental")
+		}
 	}
 	}
 	if !v.IPv4Forwarding {
 	if !v.IPv4Forwarding {
 		v.Warnings = append(v.Warnings, "WARNING: IPv4 forwarding is disabled")
 		v.Warnings = append(v.Warnings, "WARNING: IPv4 forwarding is disabled")

+ 1 - 0
docs/api/version-history.md

@@ -17,6 +17,7 @@ keywords: "API, Docker, rcli, REST, documentation"
 
 
 [Docker Engine API v1.41](https://docs.docker.com/engine/api/v1.41/) documentation
 [Docker Engine API v1.41](https://docs.docker.com/engine/api/v1.41/) documentation
 
 
+* `GET /info` now returns an `CgroupVersion` field, containing the cgroup version.
 * `POST /services/create` and `POST /services/{id}/update` now supports `BindOptions.NonRecursive`.
 * `POST /services/create` and `POST /services/{id}/update` now supports `BindOptions.NonRecursive`.
 * The `ClusterStore` and `ClusterAdvertise` fields in `GET /info` are deprecated
 * The `ClusterStore` and `ClusterAdvertise` fields in `GET /info` are deprecated
   and are now omitted if they contain an empty value. This change is not versioned,
   and are now omitted if they contain an empty value. This change is not versioned,

+ 151 - 0
pkg/sysinfo/cgroup2_linux.go

@@ -0,0 +1,151 @@
+package sysinfo // import "github.com/docker/docker/pkg/sysinfo"
+
+import (
+	"io/ioutil"
+	"path"
+	"strings"
+
+	cgroupsV2 "github.com/containerd/cgroups/v2"
+	rsystem "github.com/opencontainers/runc/libcontainer/system"
+	"github.com/sirupsen/logrus"
+)
+
+type infoCollectorV2 func(info *SysInfo, controllers map[string]struct{}, dirPath string) (warnings []string)
+
+func newV2(quiet bool, opts *opts) *SysInfo {
+	var warnings []string
+	sysInfo := &SysInfo{
+		CgroupUnified: true,
+	}
+	g := opts.cg2GroupPath
+	if g == "" {
+		g = "/"
+	}
+	m, err := cgroupsV2.LoadManager("/sys/fs/cgroup", g)
+	if err != nil {
+		logrus.Warn(err)
+	} else {
+		controllersM := make(map[string]struct{})
+		controllers, err := m.Controllers()
+		if err != nil {
+			logrus.Warn(err)
+		}
+		for _, c := range controllers {
+			controllersM[c] = struct{}{}
+		}
+		opsV2 := []infoCollectorV2{
+			applyMemoryCgroupInfoV2,
+			applyCPUCgroupInfoV2,
+			applyIOCgroupInfoV2,
+			applyCPUSetCgroupInfoV2,
+			applyPIDSCgroupInfoV2,
+			applyDevicesCgroupInfoV2,
+		}
+		dirPath := path.Join("/sys/fs/cgroup", path.Clean(g))
+		for _, o := range opsV2 {
+			w := o(sysInfo, controllersM, dirPath)
+			warnings = append(warnings, w...)
+		}
+	}
+
+	ops := []infoCollector{
+		applyNetworkingInfo,
+		applyAppArmorInfo,
+		applySeccompInfo,
+		applyCgroupNsInfo,
+	}
+	for _, o := range ops {
+		w := o(sysInfo, nil)
+		warnings = append(warnings, w...)
+	}
+	if !quiet {
+		for _, w := range warnings {
+			logrus.Warn(w)
+		}
+	}
+	return sysInfo
+}
+
+func applyMemoryCgroupInfoV2(info *SysInfo, controllers map[string]struct{}, _ string) []string {
+	var warnings []string
+	if _, ok := controllers["memory"]; !ok {
+		warnings = append(warnings, "Unable to find memory controller")
+		return warnings
+	}
+
+	info.MemoryLimit = true
+	info.SwapLimit = true
+	info.MemoryReservation = true
+	info.OomKillDisable = false
+	info.MemorySwappiness = false
+	info.KernelMemory = false
+	info.KernelMemoryTCP = false
+	return warnings
+}
+
+func applyCPUCgroupInfoV2(info *SysInfo, controllers map[string]struct{}, _ string) []string {
+	var warnings []string
+	if _, ok := controllers["cpu"]; !ok {
+		warnings = append(warnings, "Unable to find cpu controller")
+		return warnings
+	}
+	info.CPUShares = true
+	info.CPUCfsPeriod = true
+	info.CPUCfsQuota = true
+	info.CPURealtimePeriod = false
+	info.CPURealtimeRuntime = false
+	return warnings
+}
+
+func applyIOCgroupInfoV2(info *SysInfo, controllers map[string]struct{}, _ string) []string {
+	var warnings []string
+	if _, ok := controllers["io"]; !ok {
+		warnings = append(warnings, "Unable to find io controller")
+		return warnings
+	}
+
+	info.BlkioWeight = true
+	info.BlkioWeightDevice = true
+	info.BlkioReadBpsDevice = true
+	info.BlkioWriteBpsDevice = true
+	info.BlkioReadIOpsDevice = true
+	info.BlkioWriteIOpsDevice = true
+	return warnings
+}
+
+func applyCPUSetCgroupInfoV2(info *SysInfo, controllers map[string]struct{}, dirPath string) []string {
+	var warnings []string
+	if _, ok := controllers["cpuset"]; !ok {
+		warnings = append(warnings, "Unable to find cpuset controller")
+		return warnings
+	}
+	info.Cpuset = true
+
+	cpus, err := ioutil.ReadFile(path.Join(dirPath, "cpuset.cpus.effective"))
+	if err != nil {
+		return warnings
+	}
+	info.Cpus = strings.TrimSpace(string(cpus))
+
+	mems, err := ioutil.ReadFile(path.Join(dirPath, "cpuset.mems.effective"))
+	if err != nil {
+		return warnings
+	}
+	info.Mems = strings.TrimSpace(string(mems))
+	return warnings
+}
+
+func applyPIDSCgroupInfoV2(info *SysInfo, controllers map[string]struct{}, _ string) []string {
+	var warnings []string
+	if _, ok := controllers["pids"]; !ok {
+		warnings = append(warnings, "Unable to find pids controller")
+		return warnings
+	}
+	info.PidsLimit = true
+	return warnings
+}
+
+func applyDevicesCgroupInfoV2(info *SysInfo, controllers map[string]struct{}, _ string) []string {
+	info.CgroupDevicesEnabled = !rsystem.RunningInUserNS()
+	return nil
+}

+ 3 - 0
pkg/sysinfo/sysinfo.go

@@ -30,6 +30,9 @@ type SysInfo struct {
 
 
 	// Whether the cgroup has the mountpoint of "devices" or not
 	// Whether the cgroup has the mountpoint of "devices" or not
 	CgroupDevicesEnabled bool
 	CgroupDevicesEnabled bool
+
+	// Whether the cgroup is in unified mode (v2).
+	CgroupUnified bool
 }
 }
 
 
 type cgroupMemInfo struct {
 type cgroupMemInfo struct {

+ 28 - 46
pkg/sysinfo/sysinfo_linux.go

@@ -28,10 +28,37 @@ func findCgroupMountpoints() (map[string]string, error) {
 
 
 type infoCollector func(info *SysInfo, cgMounts map[string]string) (warnings []string)
 type infoCollector func(info *SysInfo, cgMounts map[string]string) (warnings []string)
 
 
+type opts struct {
+	cg2GroupPath string
+}
+
+// Opt for New().
+type Opt func(*opts)
+
+// WithCgroup2GroupPath specifies the cgroup v2 group path to inspect availability
+// of the controllers.
+//
+// WithCgroup2GroupPath is expected to be used for rootless mode with systemd driver.
+//
+// e.g. g = "/user.slice/user-1000.slice/user@1000.service"
+func WithCgroup2GroupPath(g string) Opt {
+	return func(o *opts) {
+		o.cg2GroupPath = path.Clean(g)
+	}
+}
+
 // New returns a new SysInfo, using the filesystem to detect which features
 // New returns a new SysInfo, using the filesystem to detect which features
 // the kernel supports. If `quiet` is `false` warnings are printed in logs
 // the kernel supports. If `quiet` is `false` warnings are printed in logs
 // whenever an error occurs or misconfigurations are present.
 // whenever an error occurs or misconfigurations are present.
-func New(quiet bool) *SysInfo {
+func New(quiet bool, options ...Opt) *SysInfo {
+	var opts opts
+	for _, o := range options {
+		o(&opts)
+	}
+	if cgroups.IsCgroup2UnifiedMode() {
+		return newV2(quiet, &opts)
+	}
+
 	var ops []infoCollector
 	var ops []infoCollector
 	var warnings []string
 	var warnings []string
 	sysInfo := &SysInfo{}
 	sysInfo := &SysInfo{}
@@ -60,9 +87,6 @@ func New(quiet bool) *SysInfo {
 		w := o(sysInfo, cgMounts)
 		w := o(sysInfo, cgMounts)
 		warnings = append(warnings, w...)
 		warnings = append(warnings, w...)
 	}
 	}
-	if cgroups.IsCgroup2UnifiedMode() {
-		warnings = append(warnings, "Your system is running cgroup v2 (unsupported)")
-	}
 	if !quiet {
 	if !quiet {
 		for _, w := range warnings {
 		for _, w := range warnings {
 			logrus.Warn(w)
 			logrus.Warn(w)
@@ -73,15 +97,6 @@ func New(quiet bool) *SysInfo {
 
 
 // applyMemoryCgroupInfo reads the memory information from the memory cgroup mount point.
 // applyMemoryCgroupInfo reads the memory information from the memory cgroup mount point.
 func applyMemoryCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
 func applyMemoryCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
-	if cgroups.IsCgroup2UnifiedMode() {
-		// TODO: check cgroup2 info correctly
-		info.MemoryLimit = true
-		info.SwapLimit = true
-		info.MemoryReservation = true
-		info.OomKillDisable = true
-		info.MemorySwappiness = true
-		return nil
-	}
 	var warnings []string
 	var warnings []string
 	mountPoint, ok := cgMounts["memory"]
 	mountPoint, ok := cgMounts["memory"]
 	if !ok {
 	if !ok {
@@ -120,15 +135,6 @@ func applyMemoryCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
 
 
 // applyCPUCgroupInfo reads the cpu information from the cpu cgroup mount point.
 // applyCPUCgroupInfo reads the cpu information from the cpu cgroup mount point.
 func applyCPUCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
 func applyCPUCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
-	if cgroups.IsCgroup2UnifiedMode() {
-		// TODO: check cgroup2 info correctly
-		info.CPUShares = true
-		info.CPUCfsPeriod = true
-		info.CPUCfsQuota = true
-		info.CPURealtimePeriod = true
-		info.CPURealtimeRuntime = true
-		return nil
-	}
 	var warnings []string
 	var warnings []string
 	mountPoint, ok := cgMounts["cpu"]
 	mountPoint, ok := cgMounts["cpu"]
 	if !ok {
 	if !ok {
@@ -166,15 +172,6 @@ func applyCPUCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
 
 
 // applyBlkioCgroupInfo reads the blkio information from the blkio cgroup mount point.
 // applyBlkioCgroupInfo reads the blkio information from the blkio cgroup mount point.
 func applyBlkioCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
 func applyBlkioCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
-	if cgroups.IsCgroup2UnifiedMode() {
-		// TODO: check cgroup2 info correctly
-		info.BlkioWeight = true
-		info.BlkioReadBpsDevice = true
-		info.BlkioWriteBpsDevice = true
-		info.BlkioReadIOpsDevice = true
-		info.BlkioWriteIOpsDevice = true
-		return nil
-	}
 	var warnings []string
 	var warnings []string
 	mountPoint, ok := cgMounts["blkio"]
 	mountPoint, ok := cgMounts["blkio"]
 	if !ok {
 	if !ok {
@@ -216,11 +213,6 @@ func applyBlkioCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
 
 
 // applyCPUSetCgroupInfo reads the cpuset information from the cpuset cgroup mount point.
 // applyCPUSetCgroupInfo reads the cpuset information from the cpuset cgroup mount point.
 func applyCPUSetCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
 func applyCPUSetCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
-	if cgroups.IsCgroup2UnifiedMode() {
-		// TODO: check cgroup2 info correctly
-		info.Cpuset = true
-		return nil
-	}
 	var warnings []string
 	var warnings []string
 	mountPoint, ok := cgMounts["cpuset"]
 	mountPoint, ok := cgMounts["cpuset"]
 	if !ok {
 	if !ok {
@@ -248,11 +240,6 @@ func applyCPUSetCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
 
 
 // applyPIDSCgroupInfo reads the pids information from the pids cgroup mount point.
 // applyPIDSCgroupInfo reads the pids information from the pids cgroup mount point.
 func applyPIDSCgroupInfo(info *SysInfo, _ map[string]string) []string {
 func applyPIDSCgroupInfo(info *SysInfo, _ map[string]string) []string {
-	if cgroups.IsCgroup2UnifiedMode() {
-		// TODO: check cgroup2 info correctly
-		info.PidsLimit = true
-		return nil
-	}
 	var warnings []string
 	var warnings []string
 	_, err := cgroups.FindCgroupMountpoint("", "pids")
 	_, err := cgroups.FindCgroupMountpoint("", "pids")
 	if err != nil {
 	if err != nil {
@@ -265,11 +252,6 @@ func applyPIDSCgroupInfo(info *SysInfo, _ map[string]string) []string {
 
 
 // applyDevicesCgroupInfo reads the pids information from the devices cgroup mount point.
 // applyDevicesCgroupInfo reads the pids information from the devices cgroup mount point.
 func applyDevicesCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
 func applyDevicesCgroupInfo(info *SysInfo, cgMounts map[string]string) []string {
-	if cgroups.IsCgroup2UnifiedMode() {
-		// TODO: check cgroup2 info correctly
-		info.CgroupDevicesEnabled = true
-		return nil
-	}
 	var warnings []string
 	var warnings []string
 	_, ok := cgMounts["devices"]
 	_, ok := cgMounts["devices"]
 	info.CgroupDevicesEnabled = ok
 	info.CgroupDevicesEnabled = ok

+ 6 - 1
pkg/sysinfo/sysinfo_unix.go

@@ -2,8 +2,13 @@
 
 
 package sysinfo // import "github.com/docker/docker/pkg/sysinfo"
 package sysinfo // import "github.com/docker/docker/pkg/sysinfo"
 
 
+type opts struct{}
+
+// Opt for New().
+type Opt func(*opts)
+
 // New returns an empty SysInfo for non linux for now.
 // New returns an empty SysInfo for non linux for now.
-func New(quiet bool) *SysInfo {
+func New(quiet bool, options ...Opt) *SysInfo {
 	sysInfo := &SysInfo{}
 	sysInfo := &SysInfo{}
 	return sysInfo
 	return sysInfo
 }
 }

+ 6 - 1
pkg/sysinfo/sysinfo_windows.go

@@ -1,7 +1,12 @@
 package sysinfo // import "github.com/docker/docker/pkg/sysinfo"
 package sysinfo // import "github.com/docker/docker/pkg/sysinfo"
 
 
+type opts struct{}
+
+// Opt for New().
+type Opt func(*opts)
+
 // New returns an empty SysInfo for windows for now.
 // New returns an empty SysInfo for windows for now.
-func New(quiet bool) *SysInfo {
+func New(quiet bool, options ...Opt) *SysInfo {
 	sysInfo := &SysInfo{}
 	sysInfo := &SysInfo{}
 	return sysInfo
 	return sysInfo
 }
 }

+ 13 - 4
runconfig/config.go

@@ -11,11 +11,20 @@ import (
 
 
 // ContainerDecoder implements httputils.ContainerDecoder
 // ContainerDecoder implements httputils.ContainerDecoder
 // calling DecodeContainerConfig.
 // calling DecodeContainerConfig.
-type ContainerDecoder struct{}
+type ContainerDecoder struct {
+	GetSysInfo func() *sysinfo.SysInfo
+}
 
 
 // DecodeConfig makes ContainerDecoder to implement httputils.ContainerDecoder
 // DecodeConfig makes ContainerDecoder to implement httputils.ContainerDecoder
 func (r ContainerDecoder) DecodeConfig(src io.Reader) (*container.Config, *container.HostConfig, *networktypes.NetworkingConfig, error) {
 func (r ContainerDecoder) DecodeConfig(src io.Reader) (*container.Config, *container.HostConfig, *networktypes.NetworkingConfig, error) {
-	return decodeContainerConfig(src)
+	var si *sysinfo.SysInfo
+	if r.GetSysInfo != nil {
+		si = r.GetSysInfo()
+	} else {
+		si = sysinfo.New(true)
+	}
+
+	return decodeContainerConfig(src, si)
 }
 }
 
 
 // DecodeHostConfig makes ContainerDecoder to implement httputils.ContainerDecoder
 // DecodeHostConfig makes ContainerDecoder to implement httputils.ContainerDecoder
@@ -27,7 +36,7 @@ func (r ContainerDecoder) DecodeHostConfig(src io.Reader) (*container.HostConfig
 // struct and returns both a Config and a HostConfig struct
 // struct and returns both a Config and a HostConfig struct
 // Be aware this function is not checking whether the resulted structs are nil,
 // Be aware this function is not checking whether the resulted structs are nil,
 // it's your business to do so
 // it's your business to do so
-func decodeContainerConfig(src io.Reader) (*container.Config, *container.HostConfig, *networktypes.NetworkingConfig, error) {
+func decodeContainerConfig(src io.Reader, si *sysinfo.SysInfo) (*container.Config, *container.HostConfig, *networktypes.NetworkingConfig, error) {
 	var w ContainerConfigWrapper
 	var w ContainerConfigWrapper
 
 
 	decoder := json.NewDecoder(src)
 	decoder := json.NewDecoder(src)
@@ -63,7 +72,7 @@ func decodeContainerConfig(src io.Reader) (*container.Config, *container.HostCon
 	}
 	}
 
 
 	// Validate Resources
 	// Validate Resources
-	if err := validateResources(hc, sysinfo.New(true)); err != nil {
+	if err := validateResources(hc, si); err != nil {
 		return nil, nil, nil, err
 		return nil, nil, nil, err
 	}
 	}
 
 

+ 3 - 2
runconfig/config_test.go

@@ -12,6 +12,7 @@ import (
 	"github.com/docker/docker/api/types/container"
 	"github.com/docker/docker/api/types/container"
 	networktypes "github.com/docker/docker/api/types/network"
 	networktypes "github.com/docker/docker/api/types/network"
 	"github.com/docker/docker/api/types/strslice"
 	"github.com/docker/docker/api/types/strslice"
+	"github.com/docker/docker/pkg/sysinfo"
 )
 )
 
 
 type f struct {
 type f struct {
@@ -46,7 +47,7 @@ func TestDecodeContainerConfig(t *testing.T) {
 			t.Fatal(err)
 			t.Fatal(err)
 		}
 		}
 
 
-		c, h, _, err := decodeContainerConfig(bytes.NewReader(b))
+		c, h, _, err := decodeContainerConfig(bytes.NewReader(b), sysinfo.New(true))
 		if err != nil {
 		if err != nil {
 			t.Fatal(fmt.Errorf("Error parsing %s: %v", f, err))
 			t.Fatal(fmt.Errorf("Error parsing %s: %v", f, err))
 		}
 		}
@@ -130,5 +131,5 @@ func callDecodeContainerConfigIsolation(isolation string) (*container.Config, *c
 	if b, err = json.Marshal(w); err != nil {
 	if b, err = json.Marshal(w); err != nil {
 		return nil, nil, nil, fmt.Errorf("Error on marshal %s", err.Error())
 		return nil, nil, nil, fmt.Errorf("Error on marshal %s", err.Error())
 	}
 	}
-	return decodeContainerConfig(bytes.NewReader(b))
+	return decodeContainerConfig(bytes.NewReader(b), sysinfo.New(true))
 }
 }

+ 1 - 0
vendor.conf

@@ -129,6 +129,7 @@ github.com/containerd/go-runc                       7016d3ce2328dd2cb1192b2076eb
 github.com/containerd/typeurl                       b45ef1f1f737e10bd45b25b669df25f0da8b9ba0
 github.com/containerd/typeurl                       b45ef1f1f737e10bd45b25b669df25f0da8b9ba0
 github.com/containerd/ttrpc                         0be804eadb152bc3b3c20c5edc314c4633833398
 github.com/containerd/ttrpc                         0be804eadb152bc3b3c20c5edc314c4633833398
 github.com/gogo/googleapis                          01e0f9cca9b92166042241267ee2a5cdf5cff46c # v1.3.2
 github.com/gogo/googleapis                          01e0f9cca9b92166042241267ee2a5cdf5cff46c # v1.3.2
+github.com/cilium/ebpf                              60c3aa43f488292fe2ee50fb8b833b383ca8ebbb
 
 
 # cluster
 # cluster
 github.com/docker/swarmkit                          ebe39a32e3ed4c3a3783a02c11cccf388818694c
 github.com/docker/swarmkit                          ebe39a32e3ed4c3a3783a02c11cccf388818694c

+ 23 - 0
vendor/github.com/cilium/ebpf/LICENSE

@@ -0,0 +1,23 @@
+MIT License
+
+Copyright (c) 2017 Nathan Sweet
+Copyright (c) 2018, 2019 Cloudflare
+Copyright (c) 2019 Authors of Cilium
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

+ 203 - 0
vendor/github.com/cilium/ebpf/abi.go

@@ -0,0 +1,203 @@
+package ebpf
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"io"
+	"os"
+	"syscall"
+
+	"github.com/cilium/ebpf/internal"
+	"github.com/pkg/errors"
+)
+
+// MapABI are the attributes of a Map which are available across all supported kernels.
+type MapABI struct {
+	Type       MapType
+	KeySize    uint32
+	ValueSize  uint32
+	MaxEntries uint32
+	Flags      uint32
+}
+
+func newMapABIFromSpec(spec *MapSpec) *MapABI {
+	return &MapABI{
+		spec.Type,
+		spec.KeySize,
+		spec.ValueSize,
+		spec.MaxEntries,
+		spec.Flags,
+	}
+}
+
+func newMapABIFromFd(fd *bpfFD) (string, *MapABI, error) {
+	info, err := bpfGetMapInfoByFD(fd)
+	if err != nil {
+		if errors.Cause(err) == syscall.EINVAL {
+			abi, err := newMapABIFromProc(fd)
+			return "", abi, err
+		}
+		return "", nil, err
+	}
+
+	return "", &MapABI{
+		MapType(info.mapType),
+		info.keySize,
+		info.valueSize,
+		info.maxEntries,
+		info.flags,
+	}, nil
+}
+
+func newMapABIFromProc(fd *bpfFD) (*MapABI, error) {
+	var abi MapABI
+	err := scanFdInfo(fd, map[string]interface{}{
+		"map_type":    &abi.Type,
+		"key_size":    &abi.KeySize,
+		"value_size":  &abi.ValueSize,
+		"max_entries": &abi.MaxEntries,
+		"map_flags":   &abi.Flags,
+	})
+	if err != nil {
+		return nil, err
+	}
+	return &abi, nil
+}
+
+// Equal returns true if two ABIs have the same values.
+func (abi *MapABI) Equal(other *MapABI) bool {
+	switch {
+	case abi.Type != other.Type:
+		return false
+	case abi.KeySize != other.KeySize:
+		return false
+	case abi.ValueSize != other.ValueSize:
+		return false
+	case abi.MaxEntries != other.MaxEntries:
+		return false
+	case abi.Flags != other.Flags:
+		return false
+	default:
+		return true
+	}
+}
+
+// ProgramABI are the attributes of a Program which are available across all supported kernels.
+type ProgramABI struct {
+	Type ProgramType
+}
+
+func newProgramABIFromSpec(spec *ProgramSpec) *ProgramABI {
+	return &ProgramABI{
+		spec.Type,
+	}
+}
+
+func newProgramABIFromFd(fd *bpfFD) (string, *ProgramABI, error) {
+	info, err := bpfGetProgInfoByFD(fd)
+	if err != nil {
+		if errors.Cause(err) == syscall.EINVAL {
+			return newProgramABIFromProc(fd)
+		}
+
+		return "", nil, err
+	}
+
+	var name string
+	if bpfName := convertCString(info.name[:]); bpfName != "" {
+		name = bpfName
+	} else {
+		name = convertCString(info.tag[:])
+	}
+
+	return name, &ProgramABI{
+		Type: ProgramType(info.progType),
+	}, nil
+}
+
+func newProgramABIFromProc(fd *bpfFD) (string, *ProgramABI, error) {
+	var (
+		abi  ProgramABI
+		name string
+	)
+
+	err := scanFdInfo(fd, map[string]interface{}{
+		"prog_type": &abi.Type,
+		"prog_tag":  &name,
+	})
+	if errors.Cause(err) == errMissingFields {
+		return "", nil, &internal.UnsupportedFeatureError{
+			Name:           "reading ABI from /proc/self/fdinfo",
+			MinimumVersion: internal.Version{4, 11, 0},
+		}
+	}
+	if err != nil {
+		return "", nil, err
+	}
+
+	return name, &abi, nil
+}
+
+func scanFdInfo(fd *bpfFD, fields map[string]interface{}) error {
+	raw, err := fd.value()
+	if err != nil {
+		return err
+	}
+
+	fh, err := os.Open(fmt.Sprintf("/proc/self/fdinfo/%d", raw))
+	if err != nil {
+		return err
+	}
+	defer fh.Close()
+
+	return errors.Wrap(scanFdInfoReader(fh, fields), fh.Name())
+}
+
+var errMissingFields = errors.New("missing fields")
+
+func scanFdInfoReader(r io.Reader, fields map[string]interface{}) error {
+	var (
+		scanner = bufio.NewScanner(r)
+		scanned int
+	)
+
+	for scanner.Scan() {
+		parts := bytes.SplitN(scanner.Bytes(), []byte("\t"), 2)
+		if len(parts) != 2 {
+			continue
+		}
+
+		name := bytes.TrimSuffix(parts[0], []byte(":"))
+		field, ok := fields[string(name)]
+		if !ok {
+			continue
+		}
+
+		if n, err := fmt.Fscanln(bytes.NewReader(parts[1]), field); err != nil || n != 1 {
+			return errors.Wrapf(err, "can't parse field %s", name)
+		}
+
+		scanned++
+	}
+
+	if err := scanner.Err(); err != nil {
+		return err
+	}
+
+	if scanned != len(fields) {
+		return errMissingFields
+	}
+
+	return nil
+}
+
+// Equal returns true if two ABIs have the same values.
+func (abi *ProgramABI) Equal(other *ProgramABI) bool {
+	switch {
+	case abi.Type != other.Type:
+		return false
+	default:
+		return true
+	}
+}

+ 149 - 0
vendor/github.com/cilium/ebpf/asm/alu.go

@@ -0,0 +1,149 @@
+package asm
+
+//go:generate stringer -output alu_string.go -type=Source,Endianness,ALUOp
+
+// Source of ALU / ALU64 / Branch operations
+//
+//    msb      lsb
+//    +----+-+---+
+//    |op  |S|cls|
+//    +----+-+---+
+type Source uint8
+
+const sourceMask OpCode = 0x08
+
+// Source bitmask
+const (
+	// InvalidSource is returned by getters when invoked
+	// on non ALU / branch OpCodes.
+	InvalidSource Source = 0xff
+	// ImmSource src is from constant
+	ImmSource Source = 0x00
+	// RegSource src is from register
+	RegSource Source = 0x08
+)
+
+// The Endianness of a byte swap instruction.
+type Endianness uint8
+
+const endianMask = sourceMask
+
+// Endian flags
+const (
+	InvalidEndian Endianness = 0xff
+	// Convert to little endian
+	LE Endianness = 0x00
+	// Convert to big endian
+	BE Endianness = 0x08
+)
+
+// ALUOp are ALU / ALU64 operations
+//
+//    msb      lsb
+//    +----+-+---+
+//    |OP  |s|cls|
+//    +----+-+---+
+type ALUOp uint8
+
+const aluMask OpCode = 0xf0
+
+const (
+	// InvalidALUOp is returned by getters when invoked
+	// on non ALU OpCodes
+	InvalidALUOp ALUOp = 0xff
+	// Add - addition
+	Add ALUOp = 0x00
+	// Sub - subtraction
+	Sub ALUOp = 0x10
+	// Mul - multiplication
+	Mul ALUOp = 0x20
+	// Div - division
+	Div ALUOp = 0x30
+	// Or - bitwise or
+	Or ALUOp = 0x40
+	// And - bitwise and
+	And ALUOp = 0x50
+	// LSh - bitwise shift left
+	LSh ALUOp = 0x60
+	// RSh - bitwise shift right
+	RSh ALUOp = 0x70
+	// Neg - sign/unsign signing bit
+	Neg ALUOp = 0x80
+	// Mod - modulo
+	Mod ALUOp = 0x90
+	// Xor - bitwise xor
+	Xor ALUOp = 0xa0
+	// Mov - move value from one place to another
+	Mov ALUOp = 0xb0
+	// ArSh - arithmatic shift
+	ArSh ALUOp = 0xc0
+	// Swap - endian conversions
+	Swap ALUOp = 0xd0
+)
+
+// HostTo converts from host to another endianness.
+func HostTo(endian Endianness, dst Register, size Size) Instruction {
+	var imm int64
+	switch size {
+	case Half:
+		imm = 16
+	case Word:
+		imm = 32
+	case DWord:
+		imm = 64
+	default:
+		return Instruction{OpCode: InvalidOpCode}
+	}
+
+	return Instruction{
+		OpCode:   OpCode(ALUClass).SetALUOp(Swap).SetSource(Source(endian)),
+		Dst:      dst,
+		Constant: imm,
+	}
+}
+
+// Op returns the OpCode for an ALU operation with a given source.
+func (op ALUOp) Op(source Source) OpCode {
+	return OpCode(ALU64Class).SetALUOp(op).SetSource(source)
+}
+
+// Reg emits `dst (op) src`.
+func (op ALUOp) Reg(dst, src Register) Instruction {
+	return Instruction{
+		OpCode: op.Op(RegSource),
+		Dst:    dst,
+		Src:    src,
+	}
+}
+
+// Imm emits `dst (op) value`.
+func (op ALUOp) Imm(dst Register, value int32) Instruction {
+	return Instruction{
+		OpCode:   op.Op(ImmSource),
+		Dst:      dst,
+		Constant: int64(value),
+	}
+}
+
+// Op32 returns the OpCode for a 32-bit ALU operation with a given source.
+func (op ALUOp) Op32(source Source) OpCode {
+	return OpCode(ALUClass).SetALUOp(op).SetSource(source)
+}
+
+// Reg32 emits `dst (op) src`, zeroing the upper 32 bit of dst.
+func (op ALUOp) Reg32(dst, src Register) Instruction {
+	return Instruction{
+		OpCode: op.Op32(RegSource),
+		Dst:    dst,
+		Src:    src,
+	}
+}
+
+// Imm32 emits `dst (op) value`, zeroing the upper 32 bit of dst.
+func (op ALUOp) Imm32(dst Register, value int32) Instruction {
+	return Instruction{
+		OpCode:   op.Op32(ImmSource),
+		Dst:      dst,
+		Constant: int64(value),
+	}
+}

+ 107 - 0
vendor/github.com/cilium/ebpf/asm/alu_string.go

@@ -0,0 +1,107 @@
+// Code generated by "stringer -output alu_string.go -type=Source,Endianness,ALUOp"; DO NOT EDIT.
+
+package asm
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[InvalidSource-255]
+	_ = x[ImmSource-0]
+	_ = x[RegSource-8]
+}
+
+const (
+	_Source_name_0 = "ImmSource"
+	_Source_name_1 = "RegSource"
+	_Source_name_2 = "InvalidSource"
+)
+
+func (i Source) String() string {
+	switch {
+	case i == 0:
+		return _Source_name_0
+	case i == 8:
+		return _Source_name_1
+	case i == 255:
+		return _Source_name_2
+	default:
+		return "Source(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+}
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[InvalidEndian-255]
+	_ = x[LE-0]
+	_ = x[BE-8]
+}
+
+const (
+	_Endianness_name_0 = "LE"
+	_Endianness_name_1 = "BE"
+	_Endianness_name_2 = "InvalidEndian"
+)
+
+func (i Endianness) String() string {
+	switch {
+	case i == 0:
+		return _Endianness_name_0
+	case i == 8:
+		return _Endianness_name_1
+	case i == 255:
+		return _Endianness_name_2
+	default:
+		return "Endianness(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+}
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[InvalidALUOp-255]
+	_ = x[Add-0]
+	_ = x[Sub-16]
+	_ = x[Mul-32]
+	_ = x[Div-48]
+	_ = x[Or-64]
+	_ = x[And-80]
+	_ = x[LSh-96]
+	_ = x[RSh-112]
+	_ = x[Neg-128]
+	_ = x[Mod-144]
+	_ = x[Xor-160]
+	_ = x[Mov-176]
+	_ = x[ArSh-192]
+	_ = x[Swap-208]
+}
+
+const _ALUOp_name = "AddSubMulDivOrAndLShRShNegModXorMovArShSwapInvalidALUOp"
+
+var _ALUOp_map = map[ALUOp]string{
+	0:   _ALUOp_name[0:3],
+	16:  _ALUOp_name[3:6],
+	32:  _ALUOp_name[6:9],
+	48:  _ALUOp_name[9:12],
+	64:  _ALUOp_name[12:14],
+	80:  _ALUOp_name[14:17],
+	96:  _ALUOp_name[17:20],
+	112: _ALUOp_name[20:23],
+	128: _ALUOp_name[23:26],
+	144: _ALUOp_name[26:29],
+	160: _ALUOp_name[29:32],
+	176: _ALUOp_name[32:35],
+	192: _ALUOp_name[35:39],
+	208: _ALUOp_name[39:43],
+	255: _ALUOp_name[43:55],
+}
+
+func (i ALUOp) String() string {
+	if str, ok := _ALUOp_map[i]; ok {
+		return str
+	}
+	return "ALUOp(" + strconv.FormatInt(int64(i), 10) + ")"
+}

+ 2 - 0
vendor/github.com/cilium/ebpf/asm/doc.go

@@ -0,0 +1,2 @@
+// Package asm is an assembler for eBPF bytecode.
+package asm

+ 143 - 0
vendor/github.com/cilium/ebpf/asm/func.go

@@ -0,0 +1,143 @@
+package asm
+
+//go:generate stringer -output func_string.go -type=BuiltinFunc
+
+// BuiltinFunc is a built-in eBPF function.
+type BuiltinFunc int32
+
+// eBPF built-in functions
+//
+// You can renegerate this list using the following gawk script:
+//
+//    /FN\(.+\),/ {
+//      match($1, /\((.+)\)/, r)
+//      split(r[1], p, "_")
+//      printf "Fn"
+//      for (i in p) {
+//        printf "%s%s", toupper(substr(p[i], 1, 1)), substr(p[i], 2)
+//      }
+//      print ""
+//    }
+//
+// The script expects include/uapi/linux/bpf.h as it's input.
+const (
+	FnUnspec BuiltinFunc = iota
+	FnMapLookupElem
+	FnMapUpdateElem
+	FnMapDeleteElem
+	FnProbeRead
+	FnKtimeGetNs
+	FnTracePrintk
+	FnGetPrandomU32
+	FnGetSmpProcessorId
+	FnSkbStoreBytes
+	FnL3CsumReplace
+	FnL4CsumReplace
+	FnTailCall
+	FnCloneRedirect
+	FnGetCurrentPidTgid
+	FnGetCurrentUidGid
+	FnGetCurrentComm
+	FnGetCgroupClassid
+	FnSkbVlanPush
+	FnSkbVlanPop
+	FnSkbGetTunnelKey
+	FnSkbSetTunnelKey
+	FnPerfEventRead
+	FnRedirect
+	FnGetRouteRealm
+	FnPerfEventOutput
+	FnSkbLoadBytes
+	FnGetStackid
+	FnCsumDiff
+	FnSkbGetTunnelOpt
+	FnSkbSetTunnelOpt
+	FnSkbChangeProto
+	FnSkbChangeType
+	FnSkbUnderCgroup
+	FnGetHashRecalc
+	FnGetCurrentTask
+	FnProbeWriteUser
+	FnCurrentTaskUnderCgroup
+	FnSkbChangeTail
+	FnSkbPullData
+	FnCsumUpdate
+	FnSetHashInvalid
+	FnGetNumaNodeId
+	FnSkbChangeHead
+	FnXdpAdjustHead
+	FnProbeReadStr
+	FnGetSocketCookie
+	FnGetSocketUid
+	FnSetHash
+	FnSetsockopt
+	FnSkbAdjustRoom
+	FnRedirectMap
+	FnSkRedirectMap
+	FnSockMapUpdate
+	FnXdpAdjustMeta
+	FnPerfEventReadValue
+	FnPerfProgReadValue
+	FnGetsockopt
+	FnOverrideReturn
+	FnSockOpsCbFlagsSet
+	FnMsgRedirectMap
+	FnMsgApplyBytes
+	FnMsgCorkBytes
+	FnMsgPullData
+	FnBind
+	FnXdpAdjustTail
+	FnSkbGetXfrmState
+	FnGetStack
+	FnSkbLoadBytesRelative
+	FnFibLookup
+	FnSockHashUpdate
+	FnMsgRedirectHash
+	FnSkRedirectHash
+	FnLwtPushEncap
+	FnLwtSeg6StoreBytes
+	FnLwtSeg6AdjustSrh
+	FnLwtSeg6Action
+	FnRcRepeat
+	FnRcKeydown
+	FnSkbCgroupId
+	FnGetCurrentCgroupId
+	FnGetLocalStorage
+	FnSkSelectReuseport
+	FnSkbAncestorCgroupId
+	FnSkLookupTcp
+	FnSkLookupUdp
+	FnSkRelease
+	FnMapPushElem
+	FnMapPopElem
+	FnMapPeekElem
+	FnMsgPushData
+	FnMsgPopData
+	FnRcPointerRel
+	FnSpinLock
+	FnSpinUnlock
+	FnSkFullsock
+	FnTcpSock
+	FnSkbEcnSetCe
+	FnGetListenerSock
+	FnSkcLookupTcp
+	FnTcpCheckSyncookie
+	FnSysctlGetName
+	FnSysctlGetCurrentValue
+	FnSysctlGetNewValue
+	FnSysctlSetNewValue
+	FnStrtol
+	FnStrtoul
+	FnSkStorageGet
+	FnSkStorageDelete
+	FnSendSignal
+	FnTcpGenSyncookie
+)
+
+// Call emits a function call.
+func (fn BuiltinFunc) Call() Instruction {
+	return Instruction{
+		OpCode:   OpCode(JumpClass).SetJumpOp(Call),
+		Constant: int64(fn),
+	}
+}

+ 133 - 0
vendor/github.com/cilium/ebpf/asm/func_string.go

@@ -0,0 +1,133 @@
+// Code generated by "stringer -output func_string.go -type=BuiltinFunc"; DO NOT EDIT.
+
+package asm
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[FnUnspec-0]
+	_ = x[FnMapLookupElem-1]
+	_ = x[FnMapUpdateElem-2]
+	_ = x[FnMapDeleteElem-3]
+	_ = x[FnProbeRead-4]
+	_ = x[FnKtimeGetNs-5]
+	_ = x[FnTracePrintk-6]
+	_ = x[FnGetPrandomU32-7]
+	_ = x[FnGetSmpProcessorId-8]
+	_ = x[FnSkbStoreBytes-9]
+	_ = x[FnL3CsumReplace-10]
+	_ = x[FnL4CsumReplace-11]
+	_ = x[FnTailCall-12]
+	_ = x[FnCloneRedirect-13]
+	_ = x[FnGetCurrentPidTgid-14]
+	_ = x[FnGetCurrentUidGid-15]
+	_ = x[FnGetCurrentComm-16]
+	_ = x[FnGetCgroupClassid-17]
+	_ = x[FnSkbVlanPush-18]
+	_ = x[FnSkbVlanPop-19]
+	_ = x[FnSkbGetTunnelKey-20]
+	_ = x[FnSkbSetTunnelKey-21]
+	_ = x[FnPerfEventRead-22]
+	_ = x[FnRedirect-23]
+	_ = x[FnGetRouteRealm-24]
+	_ = x[FnPerfEventOutput-25]
+	_ = x[FnSkbLoadBytes-26]
+	_ = x[FnGetStackid-27]
+	_ = x[FnCsumDiff-28]
+	_ = x[FnSkbGetTunnelOpt-29]
+	_ = x[FnSkbSetTunnelOpt-30]
+	_ = x[FnSkbChangeProto-31]
+	_ = x[FnSkbChangeType-32]
+	_ = x[FnSkbUnderCgroup-33]
+	_ = x[FnGetHashRecalc-34]
+	_ = x[FnGetCurrentTask-35]
+	_ = x[FnProbeWriteUser-36]
+	_ = x[FnCurrentTaskUnderCgroup-37]
+	_ = x[FnSkbChangeTail-38]
+	_ = x[FnSkbPullData-39]
+	_ = x[FnCsumUpdate-40]
+	_ = x[FnSetHashInvalid-41]
+	_ = x[FnGetNumaNodeId-42]
+	_ = x[FnSkbChangeHead-43]
+	_ = x[FnXdpAdjustHead-44]
+	_ = x[FnProbeReadStr-45]
+	_ = x[FnGetSocketCookie-46]
+	_ = x[FnGetSocketUid-47]
+	_ = x[FnSetHash-48]
+	_ = x[FnSetsockopt-49]
+	_ = x[FnSkbAdjustRoom-50]
+	_ = x[FnRedirectMap-51]
+	_ = x[FnSkRedirectMap-52]
+	_ = x[FnSockMapUpdate-53]
+	_ = x[FnXdpAdjustMeta-54]
+	_ = x[FnPerfEventReadValue-55]
+	_ = x[FnPerfProgReadValue-56]
+	_ = x[FnGetsockopt-57]
+	_ = x[FnOverrideReturn-58]
+	_ = x[FnSockOpsCbFlagsSet-59]
+	_ = x[FnMsgRedirectMap-60]
+	_ = x[FnMsgApplyBytes-61]
+	_ = x[FnMsgCorkBytes-62]
+	_ = x[FnMsgPullData-63]
+	_ = x[FnBind-64]
+	_ = x[FnXdpAdjustTail-65]
+	_ = x[FnSkbGetXfrmState-66]
+	_ = x[FnGetStack-67]
+	_ = x[FnSkbLoadBytesRelative-68]
+	_ = x[FnFibLookup-69]
+	_ = x[FnSockHashUpdate-70]
+	_ = x[FnMsgRedirectHash-71]
+	_ = x[FnSkRedirectHash-72]
+	_ = x[FnLwtPushEncap-73]
+	_ = x[FnLwtSeg6StoreBytes-74]
+	_ = x[FnLwtSeg6AdjustSrh-75]
+	_ = x[FnLwtSeg6Action-76]
+	_ = x[FnRcRepeat-77]
+	_ = x[FnRcKeydown-78]
+	_ = x[FnSkbCgroupId-79]
+	_ = x[FnGetCurrentCgroupId-80]
+	_ = x[FnGetLocalStorage-81]
+	_ = x[FnSkSelectReuseport-82]
+	_ = x[FnSkbAncestorCgroupId-83]
+	_ = x[FnSkLookupTcp-84]
+	_ = x[FnSkLookupUdp-85]
+	_ = x[FnSkRelease-86]
+	_ = x[FnMapPushElem-87]
+	_ = x[FnMapPopElem-88]
+	_ = x[FnMapPeekElem-89]
+	_ = x[FnMsgPushData-90]
+	_ = x[FnMsgPopData-91]
+	_ = x[FnRcPointerRel-92]
+	_ = x[FnSpinLock-93]
+	_ = x[FnSpinUnlock-94]
+	_ = x[FnSkFullsock-95]
+	_ = x[FnTcpSock-96]
+	_ = x[FnSkbEcnSetCe-97]
+	_ = x[FnGetListenerSock-98]
+	_ = x[FnSkcLookupTcp-99]
+	_ = x[FnTcpCheckSyncookie-100]
+	_ = x[FnSysctlGetName-101]
+	_ = x[FnSysctlGetCurrentValue-102]
+	_ = x[FnSysctlGetNewValue-103]
+	_ = x[FnSysctlSetNewValue-104]
+	_ = x[FnStrtol-105]
+	_ = x[FnStrtoul-106]
+	_ = x[FnSkStorageGet-107]
+	_ = x[FnSkStorageDelete-108]
+	_ = x[FnSendSignal-109]
+	_ = x[FnTcpGenSyncookie-110]
+}
+
+const _BuiltinFunc_name = "FnUnspecFnMapLookupElemFnMapUpdateElemFnMapDeleteElemFnProbeReadFnKtimeGetNsFnTracePrintkFnGetPrandomU32FnGetSmpProcessorIdFnSkbStoreBytesFnL3CsumReplaceFnL4CsumReplaceFnTailCallFnCloneRedirectFnGetCurrentPidTgidFnGetCurrentUidGidFnGetCurrentCommFnGetCgroupClassidFnSkbVlanPushFnSkbVlanPopFnSkbGetTunnelKeyFnSkbSetTunnelKeyFnPerfEventReadFnRedirectFnGetRouteRealmFnPerfEventOutputFnSkbLoadBytesFnGetStackidFnCsumDiffFnSkbGetTunnelOptFnSkbSetTunnelOptFnSkbChangeProtoFnSkbChangeTypeFnSkbUnderCgroupFnGetHashRecalcFnGetCurrentTaskFnProbeWriteUserFnCurrentTaskUnderCgroupFnSkbChangeTailFnSkbPullDataFnCsumUpdateFnSetHashInvalidFnGetNumaNodeIdFnSkbChangeHeadFnXdpAdjustHeadFnProbeReadStrFnGetSocketCookieFnGetSocketUidFnSetHashFnSetsockoptFnSkbAdjustRoomFnRedirectMapFnSkRedirectMapFnSockMapUpdateFnXdpAdjustMetaFnPerfEventReadValueFnPerfProgReadValueFnGetsockoptFnOverrideReturnFnSockOpsCbFlagsSetFnMsgRedirectMapFnMsgApplyBytesFnMsgCorkBytesFnMsgPullDataFnBindFnXdpAdjustTailFnSkbGetXfrmStateFnGetStackFnSkbLoadBytesRelativeFnFibLookupFnSockHashUpdateFnMsgRedirectHashFnSkRedirectHashFnLwtPushEncapFnLwtSeg6StoreBytesFnLwtSeg6AdjustSrhFnLwtSeg6ActionFnRcRepeatFnRcKeydownFnSkbCgroupIdFnGetCurrentCgroupIdFnGetLocalStorageFnSkSelectReuseportFnSkbAncestorCgroupIdFnSkLookupTcpFnSkLookupUdpFnSkReleaseFnMapPushElemFnMapPopElemFnMapPeekElemFnMsgPushDataFnMsgPopDataFnRcPointerRelFnSpinLockFnSpinUnlockFnSkFullsockFnTcpSockFnSkbEcnSetCeFnGetListenerSockFnSkcLookupTcpFnTcpCheckSyncookieFnSysctlGetNameFnSysctlGetCurrentValueFnSysctlGetNewValueFnSysctlSetNewValueFnStrtolFnStrtoulFnSkStorageGetFnSkStorageDeleteFnSendSignalFnTcpGenSyncookie"
+
+var _BuiltinFunc_index = [...]uint16{0, 8, 23, 38, 53, 64, 76, 89, 104, 123, 138, 153, 168, 178, 193, 212, 230, 246, 264, 277, 289, 306, 323, 338, 348, 363, 380, 394, 406, 416, 433, 450, 466, 481, 497, 512, 528, 544, 568, 583, 596, 608, 624, 639, 654, 669, 683, 700, 714, 723, 735, 750, 763, 778, 793, 808, 828, 847, 859, 875, 894, 910, 925, 939, 952, 958, 973, 990, 1000, 1022, 1033, 1049, 1066, 1082, 1096, 1115, 1133, 1148, 1158, 1169, 1182, 1202, 1219, 1238, 1259, 1272, 1285, 1296, 1309, 1321, 1334, 1347, 1359, 1373, 1383, 1395, 1407, 1416, 1429, 1446, 1460, 1479, 1494, 1517, 1536, 1555, 1563, 1572, 1586, 1603, 1615, 1632}
+
+func (i BuiltinFunc) String() string {
+	if i < 0 || i >= BuiltinFunc(len(_BuiltinFunc_index)-1) {
+		return "BuiltinFunc(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _BuiltinFunc_name[_BuiltinFunc_index[i]:_BuiltinFunc_index[i+1]]
+}

+ 416 - 0
vendor/github.com/cilium/ebpf/asm/instruction.go

@@ -0,0 +1,416 @@
+package asm
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"math"
+	"strings"
+
+	"github.com/pkg/errors"
+)
+
+// InstructionSize is the size of a BPF instruction in bytes
+const InstructionSize = 8
+
+// Instruction is a single eBPF instruction.
+type Instruction struct {
+	OpCode    OpCode
+	Dst       Register
+	Src       Register
+	Offset    int16
+	Constant  int64
+	Reference string
+	Symbol    string
+}
+
+// Sym creates a symbol.
+func (ins Instruction) Sym(name string) Instruction {
+	ins.Symbol = name
+	return ins
+}
+
+// Unmarshal decodes a BPF instruction.
+func (ins *Instruction) Unmarshal(r io.Reader, bo binary.ByteOrder) (uint64, error) {
+	var bi bpfInstruction
+	err := binary.Read(r, bo, &bi)
+	if err != nil {
+		return 0, err
+	}
+
+	ins.OpCode = bi.OpCode
+	ins.Dst = bi.Registers.Dst()
+	ins.Src = bi.Registers.Src()
+	ins.Offset = bi.Offset
+	ins.Constant = int64(bi.Constant)
+
+	if !bi.OpCode.isDWordLoad() {
+		return InstructionSize, nil
+	}
+
+	var bi2 bpfInstruction
+	if err := binary.Read(r, bo, &bi2); err != nil {
+		// No Wrap, to avoid io.EOF clash
+		return 0, errors.New("64bit immediate is missing second half")
+	}
+	if bi2.OpCode != 0 || bi2.Offset != 0 || bi2.Registers != 0 {
+		return 0, errors.New("64bit immediate has non-zero fields")
+	}
+	ins.Constant = int64(uint64(uint32(bi2.Constant))<<32 | uint64(uint32(bi.Constant)))
+
+	return 2 * InstructionSize, nil
+}
+
+// Marshal encodes a BPF instruction.
+func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error) {
+	if ins.OpCode == InvalidOpCode {
+		return 0, errors.New("invalid opcode")
+	}
+
+	isDWordLoad := ins.OpCode.isDWordLoad()
+
+	cons := int32(ins.Constant)
+	if isDWordLoad {
+		// Encode least significant 32bit first for 64bit operations.
+		cons = int32(uint32(ins.Constant))
+	}
+
+	bpfi := bpfInstruction{
+		ins.OpCode,
+		newBPFRegisters(ins.Dst, ins.Src),
+		ins.Offset,
+		cons,
+	}
+
+	if err := binary.Write(w, bo, &bpfi); err != nil {
+		return 0, err
+	}
+
+	if !isDWordLoad {
+		return InstructionSize, nil
+	}
+
+	bpfi = bpfInstruction{
+		Constant: int32(ins.Constant >> 32),
+	}
+
+	if err := binary.Write(w, bo, &bpfi); err != nil {
+		return 0, err
+	}
+
+	return 2 * InstructionSize, nil
+}
+
+// RewriteMapPtr changes an instruction to use a new map fd.
+//
+// Returns an error if the fd is invalid, or the instruction
+// is incorrect.
+func (ins *Instruction) RewriteMapPtr(fd int) error {
+	if !ins.OpCode.isDWordLoad() {
+		return errors.Errorf("%s is not a 64 bit load", ins.OpCode)
+	}
+
+	if fd < 0 {
+		return errors.New("invalid fd")
+	}
+
+	ins.Src = R1
+	ins.Constant = int64(fd)
+	return nil
+}
+
+// Format implements fmt.Formatter.
+func (ins Instruction) Format(f fmt.State, c rune) {
+	if c != 'v' {
+		fmt.Fprintf(f, "{UNRECOGNIZED: %c}", c)
+		return
+	}
+
+	op := ins.OpCode
+
+	if op == InvalidOpCode {
+		fmt.Fprint(f, "INVALID")
+		return
+	}
+
+	// Omit trailing space for Exit
+	if op.JumpOp() == Exit {
+		fmt.Fprint(f, op)
+		return
+	}
+
+	fmt.Fprintf(f, "%v ", op)
+	switch cls := op.Class(); cls {
+	case LdClass, LdXClass, StClass, StXClass:
+		switch op.Mode() {
+		case ImmMode:
+			fmt.Fprintf(f, "dst: %s imm: %d", ins.Dst, ins.Constant)
+		case AbsMode:
+			fmt.Fprintf(f, "imm: %d", ins.Constant)
+		case IndMode:
+			fmt.Fprintf(f, "dst: %s src: %s imm: %d", ins.Dst, ins.Src, ins.Constant)
+		case MemMode:
+			fmt.Fprintf(f, "dst: %s src: %s off: %d imm: %d", ins.Dst, ins.Src, ins.Offset, ins.Constant)
+		case XAddMode:
+			fmt.Fprintf(f, "dst: %s src: %s", ins.Dst, ins.Src)
+		}
+
+	case ALU64Class, ALUClass:
+		fmt.Fprintf(f, "dst: %s ", ins.Dst)
+		if op.ALUOp() == Swap || op.Source() == ImmSource {
+			fmt.Fprintf(f, "imm: %d", ins.Constant)
+		} else {
+			fmt.Fprintf(f, "src: %s", ins.Src)
+		}
+
+	case JumpClass:
+		switch jop := op.JumpOp(); jop {
+		case Call:
+			if ins.Src == R1 {
+				// bpf-to-bpf call
+				fmt.Fprint(f, ins.Constant)
+			} else {
+				fmt.Fprint(f, BuiltinFunc(ins.Constant))
+			}
+
+		default:
+			fmt.Fprintf(f, "dst: %s off: %d ", ins.Dst, ins.Offset)
+			if op.Source() == ImmSource {
+				fmt.Fprintf(f, "imm: %d", ins.Constant)
+			} else {
+				fmt.Fprintf(f, "src: %s", ins.Src)
+			}
+		}
+	}
+
+	if ins.Reference != "" {
+		fmt.Fprintf(f, " <%s>", ins.Reference)
+	}
+}
+
+// Instructions is an eBPF program.
+type Instructions []Instruction
+
+func (insns Instructions) String() string {
+	return fmt.Sprint(insns)
+}
+
+// RewriteMapPtr rewrites all loads of a specific map pointer to a new fd.
+//
+// Returns an error if the symbol isn't used, see IsUnreferencedSymbol.
+func (insns Instructions) RewriteMapPtr(symbol string, fd int) error {
+	if symbol == "" {
+		return errors.New("empty symbol")
+	}
+
+	found := false
+	for i := range insns {
+		ins := &insns[i]
+		if ins.Reference != symbol {
+			continue
+		}
+
+		if err := ins.RewriteMapPtr(fd); err != nil {
+			return err
+		}
+
+		found = true
+	}
+
+	if !found {
+		return &unreferencedSymbolError{symbol}
+	}
+
+	return nil
+}
+
+// SymbolOffsets returns the set of symbols and their offset in
+// the instructions.
+func (insns Instructions) SymbolOffsets() (map[string]int, error) {
+	offsets := make(map[string]int)
+
+	for i, ins := range insns {
+		if ins.Symbol == "" {
+			continue
+		}
+
+		if _, ok := offsets[ins.Symbol]; ok {
+			return nil, errors.Errorf("duplicate symbol %s", ins.Symbol)
+		}
+
+		offsets[ins.Symbol] = i
+	}
+
+	return offsets, nil
+}
+
+// ReferenceOffsets returns the set of references and their offset in
+// the instructions.
+func (insns Instructions) ReferenceOffsets() map[string][]int {
+	offsets := make(map[string][]int)
+
+	for i, ins := range insns {
+		if ins.Reference == "" {
+			continue
+		}
+
+		offsets[ins.Reference] = append(offsets[ins.Reference], i)
+	}
+
+	return offsets
+}
+
+func (insns Instructions) marshalledOffsets() (map[string]int, error) {
+	symbols := make(map[string]int)
+
+	marshalledPos := 0
+	for _, ins := range insns {
+		currentPos := marshalledPos
+		marshalledPos += ins.OpCode.marshalledInstructions()
+
+		if ins.Symbol == "" {
+			continue
+		}
+
+		if _, ok := symbols[ins.Symbol]; ok {
+			return nil, errors.Errorf("duplicate symbol %s", ins.Symbol)
+		}
+
+		symbols[ins.Symbol] = currentPos
+	}
+
+	return symbols, nil
+}
+
+// Format implements fmt.Formatter.
+//
+// You can control indentation of symbols by
+// specifying a width. Setting a precision controls the indentation of
+// instructions.
+// The default character is a tab, which can be overriden by specifying
+// the ' ' space flag.
+func (insns Instructions) Format(f fmt.State, c rune) {
+	if c != 's' && c != 'v' {
+		fmt.Fprintf(f, "{UNKNOWN FORMAT '%c'}", c)
+		return
+	}
+
+	// Precision is better in this case, because it allows
+	// specifying 0 padding easily.
+	padding, ok := f.Precision()
+	if !ok {
+		padding = 1
+	}
+
+	indent := strings.Repeat("\t", padding)
+	if f.Flag(' ') {
+		indent = strings.Repeat(" ", padding)
+	}
+
+	symPadding, ok := f.Width()
+	if !ok {
+		symPadding = padding - 1
+	}
+	if symPadding < 0 {
+		symPadding = 0
+	}
+
+	symIndent := strings.Repeat("\t", symPadding)
+	if f.Flag(' ') {
+		symIndent = strings.Repeat(" ", symPadding)
+	}
+
+	// Figure out how many digits we need to represent the highest
+	// offset.
+	highestOffset := 0
+	for _, ins := range insns {
+		highestOffset += ins.OpCode.marshalledInstructions()
+	}
+	offsetWidth := int(math.Ceil(math.Log10(float64(highestOffset))))
+
+	offset := 0
+	for _, ins := range insns {
+		if ins.Symbol != "" {
+			fmt.Fprintf(f, "%s%s:\n", symIndent, ins.Symbol)
+		}
+		fmt.Fprintf(f, "%s%*d: %v\n", indent, offsetWidth, offset, ins)
+		offset += ins.OpCode.marshalledInstructions()
+	}
+
+	return
+}
+
+// Marshal encodes a BPF program into the kernel format.
+func (insns Instructions) Marshal(w io.Writer, bo binary.ByteOrder) error {
+	absoluteOffsets, err := insns.marshalledOffsets()
+	if err != nil {
+		return err
+	}
+
+	num := 0
+	for i, ins := range insns {
+		switch {
+		case ins.OpCode.JumpOp() == Call && ins.Constant == -1:
+			// Rewrite bpf to bpf call
+			offset, ok := absoluteOffsets[ins.Reference]
+			if !ok {
+				return errors.Errorf("instruction %d: reference to missing symbol %s", i, ins.Reference)
+			}
+
+			ins.Constant = int64(offset - num - 1)
+
+		case ins.OpCode.Class() == JumpClass && ins.Offset == -1:
+			// Rewrite jump to label
+			offset, ok := absoluteOffsets[ins.Reference]
+			if !ok {
+				return errors.Errorf("instruction %d: reference to missing symbol %s", i, ins.Reference)
+			}
+
+			ins.Offset = int16(offset - num - 1)
+		}
+
+		n, err := ins.Marshal(w, bo)
+		if err != nil {
+			return errors.Wrapf(err, "instruction %d", i)
+		}
+
+		num += int(n / InstructionSize)
+	}
+	return nil
+}
+
+type bpfInstruction struct {
+	OpCode    OpCode
+	Registers bpfRegisters
+	Offset    int16
+	Constant  int32
+}
+
+type bpfRegisters uint8
+
+func newBPFRegisters(dst, src Register) bpfRegisters {
+	return bpfRegisters((src << 4) | (dst & 0xF))
+}
+
+func (r bpfRegisters) Dst() Register {
+	return Register(r & 0xF)
+}
+
+func (r bpfRegisters) Src() Register {
+	return Register(r >> 4)
+}
+
+type unreferencedSymbolError struct {
+	symbol string
+}
+
+func (use *unreferencedSymbolError) Error() string {
+	return fmt.Sprintf("unreferenced symbol %s", use.symbol)
+}
+
+// IsUnreferencedSymbol returns true if err was caused by
+// an unreferenced symbol.
+func IsUnreferencedSymbol(err error) bool {
+	_, ok := err.(*unreferencedSymbolError)
+	return ok
+}

+ 109 - 0
vendor/github.com/cilium/ebpf/asm/jump.go

@@ -0,0 +1,109 @@
+package asm
+
+//go:generate stringer -output jump_string.go -type=JumpOp
+
+// JumpOp affect control flow.
+//
+//    msb      lsb
+//    +----+-+---+
+//    |OP  |s|cls|
+//    +----+-+---+
+type JumpOp uint8
+
+const jumpMask OpCode = aluMask
+
+const (
+	// InvalidJumpOp is returned by getters when invoked
+	// on non branch OpCodes
+	InvalidJumpOp JumpOp = 0xff
+	// Ja jumps by offset unconditionally
+	Ja JumpOp = 0x00
+	// JEq jumps by offset if r == imm
+	JEq JumpOp = 0x10
+	// JGT jumps by offset if r > imm
+	JGT JumpOp = 0x20
+	// JGE jumps by offset if r >= imm
+	JGE JumpOp = 0x30
+	// JSet jumps by offset if r & imm
+	JSet JumpOp = 0x40
+	// JNE jumps by offset if r != imm
+	JNE JumpOp = 0x50
+	// JSGT jumps by offset if signed r > signed imm
+	JSGT JumpOp = 0x60
+	// JSGE jumps by offset if signed r >= signed imm
+	JSGE JumpOp = 0x70
+	// Call builtin or user defined function from imm
+	Call JumpOp = 0x80
+	// Exit ends execution, with value in r0
+	Exit JumpOp = 0x90
+	// JLT jumps by offset if r < imm
+	JLT JumpOp = 0xa0
+	// JLE jumps by offset if r <= imm
+	JLE JumpOp = 0xb0
+	// JSLT jumps by offset if signed r < signed imm
+	JSLT JumpOp = 0xc0
+	// JSLE jumps by offset if signed r <= signed imm
+	JSLE JumpOp = 0xd0
+)
+
+// Return emits an exit instruction.
+//
+// Requires a return value in R0.
+func Return() Instruction {
+	return Instruction{
+		OpCode: OpCode(JumpClass).SetJumpOp(Exit),
+	}
+}
+
+// Op returns the OpCode for a given jump source.
+func (op JumpOp) Op(source Source) OpCode {
+	return OpCode(JumpClass).SetJumpOp(op).SetSource(source)
+}
+
+// Imm compares dst to value, and adjusts PC by offset if the condition is fulfilled.
+func (op JumpOp) Imm(dst Register, value int32, label string) Instruction {
+	if op == Exit || op == Call || op == Ja {
+		return Instruction{OpCode: InvalidOpCode}
+	}
+
+	return Instruction{
+		OpCode:    OpCode(JumpClass).SetJumpOp(op).SetSource(ImmSource),
+		Dst:       dst,
+		Offset:    -1,
+		Constant:  int64(value),
+		Reference: label,
+	}
+}
+
+// Reg compares dst to src, and adjusts PC by offset if the condition is fulfilled.
+func (op JumpOp) Reg(dst, src Register, label string) Instruction {
+	if op == Exit || op == Call || op == Ja {
+		return Instruction{OpCode: InvalidOpCode}
+	}
+
+	return Instruction{
+		OpCode:    OpCode(JumpClass).SetJumpOp(op).SetSource(RegSource),
+		Dst:       dst,
+		Src:       src,
+		Offset:    -1,
+		Reference: label,
+	}
+}
+
+// Label adjusts PC to the address of the label.
+func (op JumpOp) Label(label string) Instruction {
+	if op == Call {
+		return Instruction{
+			OpCode:    OpCode(JumpClass).SetJumpOp(Call),
+			Src:       R1,
+			Constant:  -1,
+			Reference: label,
+		}
+	}
+
+	return Instruction{
+		OpCode:    OpCode(JumpClass).SetJumpOp(op),
+		Offset:    -1,
+		Reference: label,
+	}
+}

+ 53 - 0
vendor/github.com/cilium/ebpf/asm/jump_string.go

@@ -0,0 +1,53 @@
+// Code generated by "stringer -output jump_string.go -type=JumpOp"; DO NOT EDIT.
+
+package asm
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[InvalidJumpOp-255]
+	_ = x[Ja-0]
+	_ = x[JEq-16]
+	_ = x[JGT-32]
+	_ = x[JGE-48]
+	_ = x[JSet-64]
+	_ = x[JNE-80]
+	_ = x[JSGT-96]
+	_ = x[JSGE-112]
+	_ = x[Call-128]
+	_ = x[Exit-144]
+	_ = x[JLT-160]
+	_ = x[JLE-176]
+	_ = x[JSLT-192]
+	_ = x[JSLE-208]
+}
+
+const _JumpOp_name = "JaJEqJGTJGEJSetJNEJSGTJSGECallExitJLTJLEJSLTJSLEInvalidJumpOp"
+
+var _JumpOp_map = map[JumpOp]string{
+	0:   _JumpOp_name[0:2],
+	16:  _JumpOp_name[2:5],
+	32:  _JumpOp_name[5:8],
+	48:  _JumpOp_name[8:11],
+	64:  _JumpOp_name[11:15],
+	80:  _JumpOp_name[15:18],
+	96:  _JumpOp_name[18:22],
+	112: _JumpOp_name[22:26],
+	128: _JumpOp_name[26:30],
+	144: _JumpOp_name[30:34],
+	160: _JumpOp_name[34:37],
+	176: _JumpOp_name[37:40],
+	192: _JumpOp_name[40:44],
+	208: _JumpOp_name[44:48],
+	255: _JumpOp_name[48:61],
+}
+
+func (i JumpOp) String() string {
+	if str, ok := _JumpOp_map[i]; ok {
+		return str
+	}
+	return "JumpOp(" + strconv.FormatInt(int64(i), 10) + ")"
+}

+ 189 - 0
vendor/github.com/cilium/ebpf/asm/load_store.go

@@ -0,0 +1,189 @@
+package asm
+
+//go:generate stringer -output load_store_string.go -type=Mode,Size
+
+// Mode for load and store operations
+//
+//    msb      lsb
+//    +---+--+---+
+//    |MDE|sz|cls|
+//    +---+--+---+
+type Mode uint8
+
+const modeMask OpCode = 0xe0
+
+const (
+	// InvalidMode is returned by getters when invoked
+	// on non load / store OpCodes
+	InvalidMode Mode = 0xff
+	// ImmMode - immediate value
+	ImmMode Mode = 0x00
+	// AbsMode - immediate value + offset
+	AbsMode Mode = 0x20
+	// IndMode - indirect (imm+src)
+	IndMode Mode = 0x40
+	// MemMode - load from memory
+	MemMode Mode = 0x60
+	// XAddMode - add atomically across processors.
+	XAddMode Mode = 0xc0
+)
+
+// Size of load and store operations
+//
+//    msb      lsb
+//    +---+--+---+
+//    |mde|SZ|cls|
+//    +---+--+---+
+type Size uint8
+
+const sizeMask OpCode = 0x18
+
+const (
+	// InvalidSize is returned by getters when invoked
+	// on non load / store OpCodes
+	InvalidSize Size = 0xff
+	// DWord - double word; 64 bits
+	DWord Size = 0x18
+	// Word - word; 32 bits
+	Word Size = 0x00
+	// Half - half-word; 16 bits
+	Half Size = 0x08
+	// Byte - byte; 8 bits
+	Byte Size = 0x10
+)
+
+// Sizeof returns the size in bytes.
+func (s Size) Sizeof() int {
+	switch s {
+	case DWord:
+		return 8
+	case Word:
+		return 4
+	case Half:
+		return 2
+	case Byte:
+		return 1
+	default:
+		return -1
+	}
+}
+
+// LoadMemOp returns the OpCode to load a value of given size from memory.
+func LoadMemOp(size Size) OpCode {
+	return OpCode(LdXClass).SetMode(MemMode).SetSize(size)
+}
+
+// LoadMem emits `dst = *(size *)(src + offset)`.
+func LoadMem(dst, src Register, offset int16, size Size) Instruction {
+	return Instruction{
+		OpCode: LoadMemOp(size),
+		Dst:    dst,
+		Src:    src,
+		Offset: offset,
+	}
+}
+
+// LoadImmOp returns the OpCode to load an immediate of given size.
+//
+// As of kernel 4.20, only DWord size is accepted.
+func LoadImmOp(size Size) OpCode {
+	return OpCode(LdClass).SetMode(ImmMode).SetSize(size)
+}
+
+// LoadImm emits `dst = (size)value`.
+//
+// As of kernel 4.20, only DWord size is accepted.
+func LoadImm(dst Register, value int64, size Size) Instruction {
+	return Instruction{
+		OpCode:   LoadImmOp(size),
+		Dst:      dst,
+		Constant: value,
+	}
+}
+
+// LoadMapPtr stores a pointer to a map in dst.
+func LoadMapPtr(dst Register, fd int) Instruction {
+	if fd < 0 {
+		return Instruction{OpCode: InvalidOpCode}
+	}
+
+	return Instruction{
+		OpCode:   LoadImmOp(DWord),
+		Dst:      dst,
+		Src:      R1,
+		Constant: int64(fd),
+	}
+}
+
+// LoadIndOp returns the OpCode for loading a value of given size from an sk_buff.
+func LoadIndOp(size Size) OpCode {
+	return OpCode(LdClass).SetMode(IndMode).SetSize(size)
+}
+
+// LoadInd emits `dst = ntoh(*(size *)(((sk_buff *)R6)->data + src + offset))`.
+func LoadInd(dst, src Register, offset int32, size Size) Instruction {
+	return Instruction{
+		OpCode:   LoadIndOp(size),
+		Dst:      dst,
+		Src:      src,
+		Constant: int64(offset),
+	}
+}
+
+// LoadAbsOp returns the OpCode for loading a value of given size from an sk_buff.
+func LoadAbsOp(size Size) OpCode {
+	return OpCode(LdClass).SetMode(AbsMode).SetSize(size)
+}
+
+// LoadAbs emits `r0 = ntoh(*(size *)(((sk_buff *)R6)->data + offset))`.
+func LoadAbs(offset int32, size Size) Instruction {
+	return Instruction{
+		OpCode:   LoadAbsOp(size),
+		Dst:      R0,
+		Constant: int64(offset),
+	}
+}
+
+// StoreMemOp returns the OpCode for storing a register of given size in memory.
+func StoreMemOp(size Size) OpCode {
+	return OpCode(StXClass).SetMode(MemMode).SetSize(size)
+}
+
+// StoreMem emits `*(size *)(dst + offset) = src`
+func StoreMem(dst Register, offset int16, src Register, size Size) Instruction {
+	return Instruction{
+		OpCode: StoreMemOp(size),
+		Dst:    dst,
+		Src:    src,
+		Offset: offset,
+	}
+}
+
+// StoreImmOp returns the OpCode for storing an immediate of given size in memory.
+func StoreImmOp(size Size) OpCode {
+	return OpCode(StClass).SetMode(MemMode).SetSize(size)
+}
+
+// StoreImm emits `*(size *)(dst + offset) = value`.
+func StoreImm(dst Register, offset int16, value int64, size Size) Instruction {
+	return Instruction{
+		OpCode:   StoreImmOp(size),
+		Dst:      dst,
+		Offset:   offset,
+		Constant: value,
+	}
+}
+
+// StoreXAddOp returns the OpCode to atomically add a register to a value in memory.
+func StoreXAddOp(size Size) OpCode {
+	return OpCode(StXClass).SetMode(XAddMode).SetSize(size)
+}
+
+// StoreXAdd atomically adds src to *dst.
+func StoreXAdd(dst, src Register, size Size) Instruction {
+	return Instruction{
+		OpCode: StoreXAddOp(size),
+		Dst:    dst,
+		Src:    src,
+	}
+}

+ 80 - 0
vendor/github.com/cilium/ebpf/asm/load_store_string.go

@@ -0,0 +1,80 @@
+// Code generated by "stringer -output load_store_string.go -type=Mode,Size"; DO NOT EDIT.
+
+package asm
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[InvalidMode-255]
+	_ = x[ImmMode-0]
+	_ = x[AbsMode-32]
+	_ = x[IndMode-64]
+	_ = x[MemMode-96]
+	_ = x[XAddMode-192]
+}
+
+const (
+	_Mode_name_0 = "ImmMode"
+	_Mode_name_1 = "AbsMode"
+	_Mode_name_2 = "IndMode"
+	_Mode_name_3 = "MemMode"
+	_Mode_name_4 = "XAddMode"
+	_Mode_name_5 = "InvalidMode"
+)
+
+func (i Mode) String() string {
+	switch {
+	case i == 0:
+		return _Mode_name_0
+	case i == 32:
+		return _Mode_name_1
+	case i == 64:
+		return _Mode_name_2
+	case i == 96:
+		return _Mode_name_3
+	case i == 192:
+		return _Mode_name_4
+	case i == 255:
+		return _Mode_name_5
+	default:
+		return "Mode(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+}
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[InvalidSize-255]
+	_ = x[DWord-24]
+	_ = x[Word-0]
+	_ = x[Half-8]
+	_ = x[Byte-16]
+}
+
+const (
+	_Size_name_0 = "Word"
+	_Size_name_1 = "Half"
+	_Size_name_2 = "Byte"
+	_Size_name_3 = "DWord"
+	_Size_name_4 = "InvalidSize"
+)
+
+func (i Size) String() string {
+	switch {
+	case i == 0:
+		return _Size_name_0
+	case i == 8:
+		return _Size_name_1
+	case i == 16:
+		return _Size_name_2
+	case i == 24:
+		return _Size_name_3
+	case i == 255:
+		return _Size_name_4
+	default:
+		return "Size(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+}

+ 237 - 0
vendor/github.com/cilium/ebpf/asm/opcode.go

@@ -0,0 +1,237 @@
+package asm
+
+import (
+	"fmt"
+	"strings"
+)
+
+//go:generate stringer -output opcode_string.go -type=Class
+
+type encoding int
+
+const (
+	unknownEncoding encoding = iota
+	loadOrStore
+	jumpOrALU
+)
+
+// Class of operations
+//
+//    msb      lsb
+//    +---+--+---+
+//    |  ??  |CLS|
+//    +---+--+---+
+type Class uint8
+
+const classMask OpCode = 0x07
+
+const (
+	// LdClass load memory
+	LdClass Class = 0x00
+	// LdXClass load memory from constant
+	LdXClass Class = 0x01
+	// StClass load register from memory
+	StClass Class = 0x02
+	// StXClass load register from constant
+	StXClass Class = 0x03
+	// ALUClass arithmetic operators
+	ALUClass Class = 0x04
+	// JumpClass jump operators
+	JumpClass Class = 0x05
+	// ALU64Class arithmetic in 64 bit mode
+	ALU64Class Class = 0x07
+)
+
+func (cls Class) encoding() encoding {
+	switch cls {
+	case LdClass, LdXClass, StClass, StXClass:
+		return loadOrStore
+	case ALU64Class, ALUClass, JumpClass:
+		return jumpOrALU
+	default:
+		return unknownEncoding
+	}
+}
+
+// OpCode is a packed eBPF opcode.
+//
+// Its encoding is defined by a Class value:
+//
+//    msb      lsb
+//    +----+-+---+
+//    | ???? |CLS|
+//    +----+-+---+
+type OpCode uint8
+
+// InvalidOpCode is returned by setters on OpCode
+const InvalidOpCode OpCode = 0xff
+
+// marshalledInstructions returns the number of BPF instructions required
+// to encode this opcode.
+func (op OpCode) marshalledInstructions() int {
+	if op == LoadImmOp(DWord) {
+		return 2
+	}
+	return 1
+}
+
+func (op OpCode) isDWordLoad() bool {
+	return op == LoadImmOp(DWord)
+}
+
+// Class returns the class of operation.
+func (op OpCode) Class() Class {
+	return Class(op & classMask)
+}
+
+// Mode returns the mode for load and store operations.
+func (op OpCode) Mode() Mode {
+	if op.Class().encoding() != loadOrStore {
+		return InvalidMode
+	}
+	return Mode(op & modeMask)
+}
+
+// Size returns the size for load and store operations.
+func (op OpCode) Size() Size {
+	if op.Class().encoding() != loadOrStore {
+		return InvalidSize
+	}
+	return Size(op & sizeMask)
+}
+
+// Source returns the source for branch and ALU operations.
+func (op OpCode) Source() Source {
+	if op.Class().encoding() != jumpOrALU || op.ALUOp() == Swap {
+		return InvalidSource
+	}
+	return Source(op & sourceMask)
+}
+
+// ALUOp returns the ALUOp.
+func (op OpCode) ALUOp() ALUOp {
+	if op.Class().encoding() != jumpOrALU {
+		return InvalidALUOp
+	}
+	return ALUOp(op & aluMask)
+}
+
+// Endianness returns the Endianness for a byte swap instruction.
+func (op OpCode) Endianness() Endianness {
+	if op.ALUOp() != Swap {
+		return InvalidEndian
+	}
+	return Endianness(op & endianMask)
+}
+
+// JumpOp returns the JumpOp.
+func (op OpCode) JumpOp() JumpOp {
+	if op.Class().encoding() != jumpOrALU {
+		return InvalidJumpOp
+	}
+	return JumpOp(op & jumpMask)
+}
+
+// SetMode sets the mode on load and store operations.
+//
+// Returns InvalidOpCode if op is of the wrong class.
+func (op OpCode) SetMode(mode Mode) OpCode {
+	if op.Class().encoding() != loadOrStore || !valid(OpCode(mode), modeMask) {
+		return InvalidOpCode
+	}
+	return (op & ^modeMask) | OpCode(mode)
+}
+
+// SetSize sets the size on load and store operations.
+//
+// Returns InvalidOpCode if op is of the wrong class.
+func (op OpCode) SetSize(size Size) OpCode {
+	if op.Class().encoding() != loadOrStore || !valid(OpCode(size), sizeMask) {
+		return InvalidOpCode
+	}
+	return (op & ^sizeMask) | OpCode(size)
+}
+
+// SetSource sets the source on jump and ALU operations.
+//
+// Returns InvalidOpCode if op is of the wrong class.
+func (op OpCode) SetSource(source Source) OpCode {
+	if op.Class().encoding() != jumpOrALU || !valid(OpCode(source), sourceMask) {
+		return InvalidOpCode
+	}
+	return (op & ^sourceMask) | OpCode(source)
+}
+
+// SetALUOp sets the ALUOp on ALU operations.
+//
+// Returns InvalidOpCode if op is of the wrong class.
+func (op OpCode) SetALUOp(alu ALUOp) OpCode {
+	class := op.Class()
+	if (class != ALUClass && class != ALU64Class) || !valid(OpCode(alu), aluMask) {
+		return InvalidOpCode
+	}
+	return (op & ^aluMask) | OpCode(alu)
+}
+
+// SetJumpOp sets the JumpOp on jump operations.
+//
+// Returns InvalidOpCode if op is of the wrong class.
+func (op OpCode) SetJumpOp(jump JumpOp) OpCode {
+	if op.Class() != JumpClass || !valid(OpCode(jump), jumpMask) {
+		return InvalidOpCode
+	}
+	return (op & ^jumpMask) | OpCode(jump)
+}
+
+func (op OpCode) String() string {
+	var f strings.Builder
+
+	switch class := op.Class(); class {
+	case LdClass, LdXClass, StClass, StXClass:
+		f.WriteString(strings.TrimSuffix(class.String(), "Class"))
+
+		mode := op.Mode()
+		f.WriteString(strings.TrimSuffix(mode.String(), "Mode"))
+
+		switch op.Size() {
+		case DWord:
+			f.WriteString("DW")
+		case Word:
+			f.WriteString("W")
+		case Half:
+			f.WriteString("H")
+		case Byte:
+			f.WriteString("B")
+		}
+
+	case ALU64Class, ALUClass:
+		f.WriteString(op.ALUOp().String())
+
+		if op.ALUOp() == Swap {
+			// Width for Endian is controlled by Constant
+			f.WriteString(op.Endianness().String())
+		} else {
+			if class == ALUClass {
+				f.WriteString("32")
+			}
+
+			f.WriteString(strings.TrimSuffix(op.Source().String(), "Source"))
+		}
+
+	case JumpClass:
+		f.WriteString(op.JumpOp().String())
+		if jop := op.JumpOp(); jop != Exit && jop != Call {
+			f.WriteString(strings.TrimSuffix(op.Source().String(), "Source"))
+		}
+
+	default:
+		fmt.Fprintf(&f, "%#x", op)
+	}
+
+	return f.String()
+}
+
+// valid returns true if all bits in value are covered by mask.
+func valid(value, mask OpCode) bool {
+	return value & ^mask == 0
+}

+ 38 - 0
vendor/github.com/cilium/ebpf/asm/opcode_string.go

@@ -0,0 +1,38 @@
+// Code generated by "stringer -output opcode_string.go -type=Class"; DO NOT EDIT.
+
+package asm
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[LdClass-0]
+	_ = x[LdXClass-1]
+	_ = x[StClass-2]
+	_ = x[StXClass-3]
+	_ = x[ALUClass-4]
+	_ = x[JumpClass-5]
+	_ = x[ALU64Class-7]
+}
+
+const (
+	_Class_name_0 = "LdClassLdXClassStClassStXClassALUClassJumpClass"
+	_Class_name_1 = "ALU64Class"
+)
+
+var (
+	_Class_index_0 = [...]uint8{0, 7, 15, 22, 30, 38, 47}
+)
+
+func (i Class) String() string {
+	switch {
+	case 0 <= i && i <= 5:
+		return _Class_name_0[_Class_index_0[i]:_Class_index_0[i+1]]
+	case i == 7:
+		return _Class_name_1
+	default:
+		return "Class(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+}

+ 42 - 0
vendor/github.com/cilium/ebpf/asm/register.go

@@ -0,0 +1,42 @@
+package asm
+
+import (
+	"fmt"
+)
+
+// Register is the source or destination of most operations.
+type Register uint8
+
+// R0 contains return values.
+const R0 Register = 0
+
+// Registers for function arguments.
+const (
+	R1 Register = R0 + 1 + iota
+	R2
+	R3
+	R4
+	R5
+)
+
+// Callee saved registers preserved by function calls.
+const (
+	R6 Register = R5 + 1 + iota
+	R7
+	R8
+	R9
+)
+
+// Read-only frame pointer to access stack.
+const (
+	R10 Register = R9 + 1
+	RFP          = R10
+)
+
+func (r Register) String() string {
+	v := uint8(r)
+	if v == 10 {
+		return "rfp"
+	}
+	return fmt.Sprintf("r%d", v)
+}

+ 148 - 0
vendor/github.com/cilium/ebpf/collection.go

@@ -0,0 +1,148 @@
+package ebpf
+
+import (
+	"github.com/cilium/ebpf/asm"
+	"github.com/pkg/errors"
+)
+
+// CollectionOptions control loading a collection into the kernel.
+type CollectionOptions struct {
+	Programs ProgramOptions
+}
+
+// CollectionSpec describes a collection.
+type CollectionSpec struct {
+	Maps     map[string]*MapSpec
+	Programs map[string]*ProgramSpec
+}
+
+// Copy returns a recursive copy of the spec.
+func (cs *CollectionSpec) Copy() *CollectionSpec {
+	if cs == nil {
+		return nil
+	}
+
+	cpy := CollectionSpec{
+		Maps:     make(map[string]*MapSpec, len(cs.Maps)),
+		Programs: make(map[string]*ProgramSpec, len(cs.Programs)),
+	}
+
+	for name, spec := range cs.Maps {
+		cpy.Maps[name] = spec.Copy()
+	}
+
+	for name, spec := range cs.Programs {
+		cpy.Programs[name] = spec.Copy()
+	}
+
+	return &cpy
+}
+
+// Collection is a collection of Programs and Maps associated
+// with their symbols
+type Collection struct {
+	Programs map[string]*Program
+	Maps     map[string]*Map
+}
+
+// NewCollection creates a Collection from a specification.
+//
+// Only maps referenced by at least one of the programs are initialized.
+func NewCollection(spec *CollectionSpec) (*Collection, error) {
+	return NewCollectionWithOptions(spec, CollectionOptions{})
+}
+
+// NewCollectionWithOptions creates a Collection from a specification.
+//
+// Only maps referenced by at least one of the programs are initialized.
+func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Collection, error) {
+	maps := make(map[string]*Map)
+	for mapName, mapSpec := range spec.Maps {
+		m, err := NewMap(mapSpec)
+		if err != nil {
+			return nil, errors.Wrapf(err, "map %s", mapName)
+		}
+		maps[mapName] = m
+	}
+
+	progs := make(map[string]*Program)
+	for progName, origProgSpec := range spec.Programs {
+		progSpec := origProgSpec.Copy()
+
+		// Rewrite any reference to a valid map.
+		for i := range progSpec.Instructions {
+			var (
+				ins = &progSpec.Instructions[i]
+				m   = maps[ins.Reference]
+			)
+
+			if ins.Reference == "" || m == nil {
+				continue
+			}
+
+			if ins.Src == asm.R1 {
+				// Don't overwrite maps already rewritten, users can
+				// rewrite programs in the spec themselves
+				continue
+			}
+
+			if err := ins.RewriteMapPtr(m.FD()); err != nil {
+				return nil, errors.Wrapf(err, "progam %s: map %s", progName, ins.Reference)
+			}
+		}
+
+		prog, err := NewProgramWithOptions(progSpec, opts.Programs)
+		if err != nil {
+			return nil, errors.Wrapf(err, "program %s", progName)
+		}
+		progs[progName] = prog
+	}
+
+	return &Collection{
+		progs,
+		maps,
+	}, nil
+}
+
+// LoadCollection parses an object file and converts it to a collection.
+func LoadCollection(file string) (*Collection, error) {
+	spec, err := LoadCollectionSpec(file)
+	if err != nil {
+		return nil, err
+	}
+	return NewCollection(spec)
+}
+
+// Close frees all maps and programs associated with the collection.
+//
+// The collection mustn't be used afterwards.
+func (coll *Collection) Close() {
+	for _, prog := range coll.Programs {
+		prog.Close()
+	}
+	for _, m := range coll.Maps {
+		m.Close()
+	}
+}
+
+// DetachMap removes the named map from the Collection.
+//
+// This means that a later call to Close() will not affect this map.
+//
+// Returns nil if no map of that name exists.
+func (coll *Collection) DetachMap(name string) *Map {
+	m := coll.Maps[name]
+	delete(coll.Maps, name)
+	return m
+}
+
+// DetachProgram removes the named program from the Collection.
+//
+// This means that a later call to Close() will not affect this program.
+//
+// Returns nil if no program of that name exists.
+func (coll *Collection) DetachProgram(name string) *Program {
+	p := coll.Programs[name]
+	delete(coll.Programs, name)
+	return p
+}

+ 17 - 0
vendor/github.com/cilium/ebpf/doc.go

@@ -0,0 +1,17 @@
+// Package ebpf is a toolkit for working with eBPF programs.
+//
+// eBPF programs are small snippets of code which are executed directly
+// in a VM in the Linux kernel, which makes them very fast and flexible.
+// Many Linux subsystems now accept eBPF programs. This makes it possible
+// to implement highly application specific logic inside the kernel,
+// without having to modify the actual kernel itself.
+//
+// This package is designed for long-running processes which
+// want to use eBPF to implement part of their application logic. It has no
+// run-time dependencies outside of the library and the Linux kernel itself.
+// eBPF code should be compiled ahead of time using clang, and shipped with
+// your application as any other resource.
+//
+// This package doesn't include code required to attach eBPF to Linux
+// subsystems, since this varies per subsystem.
+package ebpf

+ 392 - 0
vendor/github.com/cilium/ebpf/elf_reader.go

@@ -0,0 +1,392 @@
+package ebpf
+
+import (
+	"bytes"
+	"debug/elf"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+
+	"github.com/cilium/ebpf/asm"
+
+	"github.com/pkg/errors"
+)
+
+type elfCode struct {
+	*elf.File
+	symbols           []elf.Symbol
+	symbolsPerSection map[elf.SectionIndex]map[uint64]string
+}
+
+// LoadCollectionSpec parses an ELF file into a CollectionSpec.
+func LoadCollectionSpec(file string) (*CollectionSpec, error) {
+	f, err := os.Open(file)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	spec, err := LoadCollectionSpecFromReader(f)
+	return spec, errors.Wrapf(err, "file %s", file)
+}
+
+// LoadCollectionSpecFromReader parses an ELF file into a CollectionSpec.
+func LoadCollectionSpecFromReader(code io.ReaderAt) (*CollectionSpec, error) {
+	f, err := elf.NewFile(code)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	symbols, err := f.Symbols()
+	if err != nil {
+		return nil, errors.Wrap(err, "load symbols")
+	}
+
+	ec := &elfCode{f, symbols, symbolsPerSection(symbols)}
+
+	var licenseSection, versionSection *elf.Section
+	progSections := make(map[elf.SectionIndex]*elf.Section)
+	relSections := make(map[elf.SectionIndex]*elf.Section)
+	mapSections := make(map[elf.SectionIndex]*elf.Section)
+	for i, sec := range ec.Sections {
+		switch {
+		case strings.HasPrefix(sec.Name, "license"):
+			licenseSection = sec
+		case strings.HasPrefix(sec.Name, "version"):
+			versionSection = sec
+		case strings.HasPrefix(sec.Name, "maps"):
+			mapSections[elf.SectionIndex(i)] = sec
+		case sec.Type == elf.SHT_REL:
+			if int(sec.Info) >= len(ec.Sections) {
+				return nil, errors.Errorf("found relocation section %v for missing section %v", i, sec.Info)
+			}
+
+			// Store relocations under the section index of the target
+			idx := elf.SectionIndex(sec.Info)
+			if relSections[idx] != nil {
+				return nil, errors.Errorf("section %d has multiple relocation sections", idx)
+			}
+			relSections[idx] = sec
+		case sec.Type == elf.SHT_PROGBITS && (sec.Flags&elf.SHF_EXECINSTR) != 0 && sec.Size > 0:
+			progSections[elf.SectionIndex(i)] = sec
+		}
+	}
+
+	license, err := loadLicense(licenseSection)
+	if err != nil {
+		return nil, errors.Wrap(err, "load license")
+	}
+
+	version, err := loadVersion(versionSection, ec.ByteOrder)
+	if err != nil {
+		return nil, errors.Wrap(err, "load version")
+	}
+
+	maps, err := ec.loadMaps(mapSections)
+	if err != nil {
+		return nil, errors.Wrap(err, "load maps")
+	}
+
+	progs, libs, err := ec.loadPrograms(progSections, relSections, license, version)
+	if err != nil {
+		return nil, errors.Wrap(err, "load programs")
+	}
+
+	if len(libs) > 0 {
+		for name, prog := range progs {
+			prog.Instructions, err = link(prog.Instructions, libs...)
+			if err != nil {
+				return nil, errors.Wrapf(err, "program %s", name)
+			}
+		}
+	}
+
+	return &CollectionSpec{maps, progs}, nil
+}
+
+func loadLicense(sec *elf.Section) (string, error) {
+	if sec == nil {
+		return "", errors.Errorf("missing license section")
+	}
+	data, err := sec.Data()
+	if err != nil {
+		return "", errors.Wrapf(err, "section %s", sec.Name)
+	}
+	return string(bytes.TrimRight(data, "\000")), nil
+}
+
+func loadVersion(sec *elf.Section, bo binary.ByteOrder) (uint32, error) {
+	if sec == nil {
+		return 0, nil
+	}
+
+	var version uint32
+	err := binary.Read(sec.Open(), bo, &version)
+	return version, errors.Wrapf(err, "section %s", sec.Name)
+}
+
+func (ec *elfCode) loadPrograms(progSections, relSections map[elf.SectionIndex]*elf.Section, license string, version uint32) (map[string]*ProgramSpec, []asm.Instructions, error) {
+	var (
+		progs = make(map[string]*ProgramSpec)
+		libs  []asm.Instructions
+	)
+	for idx, prog := range progSections {
+		syms := ec.symbolsPerSection[idx]
+		if len(syms) == 0 {
+			return nil, nil, errors.Errorf("section %v: missing symbols", prog.Name)
+		}
+
+		funcSym := syms[0]
+		if funcSym == "" {
+			return nil, nil, errors.Errorf("section %v: no label at start", prog.Name)
+		}
+
+		rels, err := ec.loadRelocations(relSections[idx])
+		if err != nil {
+			return nil, nil, errors.Wrapf(err, "program %s: can't load relocations", funcSym)
+		}
+
+		insns, err := ec.loadInstructions(prog, syms, rels)
+		if err != nil {
+			return nil, nil, errors.Wrapf(err, "program %s: can't unmarshal instructions", funcSym)
+		}
+
+		if progType, attachType := getProgType(prog.Name); progType == UnspecifiedProgram {
+			// There is no single name we can use for "library" sections,
+			// since they may contain multiple functions. We'll decode the
+			// labels they contain later on, and then link sections that way.
+			libs = append(libs, insns)
+		} else {
+			progs[funcSym] = &ProgramSpec{
+				Name:          funcSym,
+				Type:          progType,
+				AttachType:    attachType,
+				License:       license,
+				KernelVersion: version,
+				Instructions:  insns,
+			}
+		}
+	}
+	return progs, libs, nil
+}
+
+func (ec *elfCode) loadInstructions(section *elf.Section, symbols, relocations map[uint64]string) (asm.Instructions, error) {
+	var (
+		r      = section.Open()
+		insns  asm.Instructions
+		ins    asm.Instruction
+		offset uint64
+	)
+	for {
+		n, err := ins.Unmarshal(r, ec.ByteOrder)
+		if err == io.EOF {
+			return insns, nil
+		}
+		if err != nil {
+			return nil, errors.Wrapf(err, "offset %d", offset)
+		}
+
+		ins.Symbol = symbols[offset]
+		ins.Reference = relocations[offset]
+
+		insns = append(insns, ins)
+		offset += n
+	}
+}
+
+func (ec *elfCode) loadMaps(mapSections map[elf.SectionIndex]*elf.Section) (map[string]*MapSpec, error) {
+	var (
+		maps = make(map[string]*MapSpec)
+		b    = make([]byte, 1)
+	)
+	for idx, sec := range mapSections {
+		syms := ec.symbolsPerSection[idx]
+		if len(syms) == 0 {
+			return nil, errors.Errorf("section %v: no symbols", sec.Name)
+		}
+
+		if sec.Size%uint64(len(syms)) != 0 {
+			return nil, errors.Errorf("section %v: map descriptors are not of equal size", sec.Name)
+		}
+
+		var (
+			r    = sec.Open()
+			size = sec.Size / uint64(len(syms))
+		)
+		for i, offset := 0, uint64(0); i < len(syms); i, offset = i+1, offset+size {
+			mapSym := syms[offset]
+			if mapSym == "" {
+				fmt.Println(syms)
+				return nil, errors.Errorf("section %s: missing symbol for map at offset %d", sec.Name, offset)
+			}
+
+			if maps[mapSym] != nil {
+				return nil, errors.Errorf("section %v: map %v already exists", sec.Name, mapSym)
+			}
+
+			lr := io.LimitReader(r, int64(size))
+
+			var spec MapSpec
+			switch {
+			case binary.Read(lr, ec.ByteOrder, &spec.Type) != nil:
+				return nil, errors.Errorf("map %v: missing type", mapSym)
+			case binary.Read(lr, ec.ByteOrder, &spec.KeySize) != nil:
+				return nil, errors.Errorf("map %v: missing key size", mapSym)
+			case binary.Read(lr, ec.ByteOrder, &spec.ValueSize) != nil:
+				return nil, errors.Errorf("map %v: missing value size", mapSym)
+			case binary.Read(lr, ec.ByteOrder, &spec.MaxEntries) != nil:
+				return nil, errors.Errorf("map %v: missing max entries", mapSym)
+			case binary.Read(lr, ec.ByteOrder, &spec.Flags) != nil:
+				return nil, errors.Errorf("map %v: missing flags", mapSym)
+			}
+
+			for {
+				_, err := lr.Read(b)
+				if err == io.EOF {
+					break
+				}
+				if err != nil {
+					return nil, err
+				}
+				if b[0] != 0 {
+					return nil, errors.Errorf("map %v: unknown and non-zero fields in definition", mapSym)
+				}
+			}
+
+			maps[mapSym] = &spec
+		}
+	}
+	return maps, nil
+}
+
+func getProgType(v string) (ProgramType, AttachType) {
+	types := map[string]ProgramType{
+		// From https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/bpf/libbpf.c#n3568
+		"socket":         SocketFilter,
+		"seccomp":        SocketFilter,
+		"kprobe/":        Kprobe,
+		"kretprobe/":     Kprobe,
+		"tracepoint/":    TracePoint,
+		"xdp":            XDP,
+		"perf_event":     PerfEvent,
+		"sockops":        SockOps,
+		"sk_skb":         SkSKB,
+		"sk_msg":         SkMsg,
+		"lirc_mode2":     LircMode2,
+		"flow_dissector": FlowDissector,
+
+		"cgroup_skb/":       CGroupSKB,
+		"cgroup/dev":        CGroupDevice,
+		"cgroup/skb":        CGroupSKB,
+		"cgroup/sock":       CGroupSock,
+		"cgroup/post_bind":  CGroupSock,
+		"cgroup/bind":       CGroupSockAddr,
+		"cgroup/connect":    CGroupSockAddr,
+		"cgroup/sendmsg":    CGroupSockAddr,
+		"cgroup/recvmsg":    CGroupSockAddr,
+		"cgroup/sysctl":     CGroupSysctl,
+		"cgroup/getsockopt": CGroupSockopt,
+		"cgroup/setsockopt": CGroupSockopt,
+		"classifier":        SchedCLS,
+		"action":            SchedACT,
+	}
+	attachTypes := map[string]AttachType{
+		"cgroup_skb/ingress":    AttachCGroupInetIngress,
+		"cgroup_skb/egress":     AttachCGroupInetEgress,
+		"cgroup/sock":           AttachCGroupInetSockCreate,
+		"cgroup/post_bind4":     AttachCGroupInet4PostBind,
+		"cgroup/post_bind6":     AttachCGroupInet6PostBind,
+		"cgroup/dev":            AttachCGroupDevice,
+		"sockops":               AttachCGroupSockOps,
+		"sk_skb/stream_parser":  AttachSkSKBStreamParser,
+		"sk_skb/stream_verdict": AttachSkSKBStreamVerdict,
+		"sk_msg":                AttachSkSKBStreamVerdict,
+		"lirc_mode2":            AttachLircMode2,
+		"flow_dissector":        AttachFlowDissector,
+		"cgroup/bind4":          AttachCGroupInet4Bind,
+		"cgroup/bind6":          AttachCGroupInet6Bind,
+		"cgroup/connect4":       AttachCGroupInet4Connect,
+		"cgroup/connect6":       AttachCGroupInet6Connect,
+		"cgroup/sendmsg4":       AttachCGroupUDP4Sendmsg,
+		"cgroup/sendmsg6":       AttachCGroupUDP6Sendmsg,
+		"cgroup/recvmsg4":       AttachCGroupUDP4Recvmsg,
+		"cgroup/recvmsg6":       AttachCGroupUDP6Recvmsg,
+		"cgroup/sysctl":         AttachCGroupSysctl,
+		"cgroup/getsockopt":     AttachCGroupGetsockopt,
+		"cgroup/setsockopt":     AttachCGroupSetsockopt,
+	}
+	attachType := AttachNone
+	for k, t := range attachTypes {
+		if strings.HasPrefix(v, k) {
+			attachType = t
+		}
+	}
+
+	for k, t := range types {
+		if strings.HasPrefix(v, k) {
+			return t, attachType
+		}
+	}
+	return UnspecifiedProgram, AttachNone
+}
+
+func (ec *elfCode) loadRelocations(sec *elf.Section) (map[uint64]string, error) {
+	rels := make(map[uint64]string)
+	if sec == nil {
+		return rels, nil
+	}
+
+	if sec.Entsize < 16 {
+		return nil, errors.New("rels are less than 16 bytes")
+	}
+
+	r := sec.Open()
+	for off := uint64(0); off < sec.Size; off += sec.Entsize {
+		ent := io.LimitReader(r, int64(sec.Entsize))
+
+		var rel elf.Rel64
+		if binary.Read(ent, ec.ByteOrder, &rel) != nil {
+			return nil, errors.Errorf("can't parse relocation at offset %v", off)
+		}
+
+		symNo := int(elf.R_SYM64(rel.Info) - 1)
+		if symNo >= len(ec.symbols) {
+			return nil, errors.Errorf("relocation at offset %d: symbol %v doesnt exist", off, symNo)
+		}
+
+		rels[rel.Off] = ec.symbols[symNo].Name
+	}
+	return rels, nil
+}
+
+func symbolsPerSection(symbols []elf.Symbol) map[elf.SectionIndex]map[uint64]string {
+	result := make(map[elf.SectionIndex]map[uint64]string)
+	for i, sym := range symbols {
+		switch elf.ST_TYPE(sym.Info) {
+		case elf.STT_NOTYPE:
+			// Older versions of LLVM doesn't tag
+			// symbols correctly.
+			break
+		case elf.STT_OBJECT:
+			break
+		case elf.STT_FUNC:
+			break
+		default:
+			continue
+		}
+
+		if sym.Name == "" {
+			continue
+		}
+
+		idx := sym.Section
+		if _, ok := result[idx]; !ok {
+			result[idx] = make(map[uint64]string)
+		}
+		result[idx][sym.Value] = symbols[i].Name
+	}
+	return result
+}

+ 8 - 0
vendor/github.com/cilium/ebpf/go.mod

@@ -0,0 +1,8 @@
+module github.com/cilium/ebpf
+
+go 1.12
+
+require (
+	github.com/pkg/errors v0.8.1
+	golang.org/x/sys v0.0.0-20191022100944-742c48ecaeb7
+)

+ 64 - 0
vendor/github.com/cilium/ebpf/internal/cpu.go

@@ -0,0 +1,64 @@
+package internal
+
+import (
+	"fmt"
+	"os"
+	"sync"
+
+	"github.com/pkg/errors"
+)
+
+var sysCPU struct {
+	once sync.Once
+	err  error
+	num  int
+}
+
+// PossibleCPUs returns the max number of CPUs a system may possibly have
+// Logical CPU numbers must be of the form 0-n
+func PossibleCPUs() (int, error) {
+	sysCPU.once.Do(func() {
+		sysCPU.num, sysCPU.err = parseCPUs("/sys/devices/system/cpu/possible")
+	})
+
+	return sysCPU.num, sysCPU.err
+}
+
+var onlineCPU struct {
+	once sync.Once
+	err  error
+	num  int
+}
+
+// OnlineCPUs returns the number of currently online CPUs
+// Logical CPU numbers must be of the form 0-n
+func OnlineCPUs() (int, error) {
+	onlineCPU.once.Do(func() {
+		onlineCPU.num, onlineCPU.err = parseCPUs("/sys/devices/system/cpu/online")
+	})
+
+	return onlineCPU.num, onlineCPU.err
+}
+
+// parseCPUs parses the number of cpus from sysfs,
+// in the format of "/sys/devices/system/cpu/{possible,online,..}.
+// Logical CPU numbers must be of the form 0-n
+func parseCPUs(path string) (int, error) {
+	file, err := os.Open(path)
+	if err != nil {
+		return 0, err
+	}
+	defer file.Close()
+
+	var low, high int
+	n, _ := fmt.Fscanf(file, "%d-%d", &low, &high)
+	if n < 1 || low != 0 {
+		return 0, errors.Wrapf(err, "%s has unknown format", path)
+	}
+	if n == 1 {
+		high = low
+	}
+
+	// cpus is 0 indexed
+	return high + 1, nil
+}

+ 24 - 0
vendor/github.com/cilium/ebpf/internal/endian.go

@@ -0,0 +1,24 @@
+package internal
+
+import (
+	"encoding/binary"
+	"unsafe"
+)
+
+// NativeEndian is set to either binary.BigEndian or binary.LittleEndian,
+// depending on the host's endianness.
+var NativeEndian binary.ByteOrder
+
+func init() {
+	if isBigEndian() {
+		NativeEndian = binary.BigEndian
+	} else {
+		NativeEndian = binary.LittleEndian
+	}
+}
+
+func isBigEndian() (ret bool) {
+	i := int(0x1)
+	bs := (*[int(unsafe.Sizeof(i))]byte)(unsafe.Pointer(&i))
+	return bs[0] == 0
+}

+ 85 - 0
vendor/github.com/cilium/ebpf/internal/feature.go

@@ -0,0 +1,85 @@
+package internal
+
+import (
+	"fmt"
+	"sync"
+
+	"github.com/pkg/errors"
+)
+
+// UnsupportedFeatureError is returned by FeatureTest() functions.
+type UnsupportedFeatureError struct {
+	// The minimum Linux mainline version required for this feature.
+	// Used for the error string, and for sanity checking during testing.
+	MinimumVersion Version
+
+	// The name of the feature that isn't supported.
+	Name string
+}
+
+func (ufe *UnsupportedFeatureError) Error() string {
+	return fmt.Sprintf("%s not supported (requires >= %s)", ufe.Name, ufe.MinimumVersion)
+}
+
+// FeatureTest wraps a function so that it is run at most once.
+//
+// name should identify the tested feature, while version must be in the
+// form Major.Minor[.Patch].
+//
+// Returns a descriptive UnsupportedFeatureError if the feature is not available.
+func FeatureTest(name, version string, fn func() bool) func() error {
+	v, err := NewVersion(version)
+	if err != nil {
+		return func() error { return err }
+	}
+
+	var (
+		once   sync.Once
+		result error
+	)
+
+	return func() error {
+		once.Do(func() {
+			if !fn() {
+				result = &UnsupportedFeatureError{
+					MinimumVersion: v,
+					Name:           name,
+				}
+			}
+		})
+		return result
+	}
+}
+
+// A Version in the form Major.Minor.Patch.
+type Version [3]uint16
+
+// NewVersion creates a version from a string like "Major.Minor.Patch".
+//
+// Patch is optional.
+func NewVersion(ver string) (Version, error) {
+	var major, minor, patch uint16
+	n, _ := fmt.Sscanf(ver, "%d.%d.%d", &major, &minor, &patch)
+	if n < 2 {
+		return Version{}, errors.Errorf("invalid version: %s", ver)
+	}
+	return Version{major, minor, patch}, nil
+}
+
+func (v Version) String() string {
+	if v[2] == 0 {
+		return fmt.Sprintf("v%d.%d", v[0], v[1])
+	}
+	return fmt.Sprintf("v%d.%d.%d", v[0], v[1], v[2])
+}
+
+// Less returns true if the version is less than another version.
+func (v Version) Less(other Version) bool {
+	for i, a := range v {
+		if a == other[i] {
+			continue
+		}
+		return a < other[i]
+	}
+	return false
+}

+ 127 - 0
vendor/github.com/cilium/ebpf/internal/unix/types_linux.go

@@ -0,0 +1,127 @@
+// +build linux
+
+package unix
+
+import (
+	"syscall"
+
+	linux "golang.org/x/sys/unix"
+)
+
+const (
+	ENOENT                   = linux.ENOENT
+	EAGAIN                   = linux.EAGAIN
+	ENOSPC                   = linux.ENOSPC
+	EINVAL                   = linux.EINVAL
+	EPOLLIN                  = linux.EPOLLIN
+	BPF_OBJ_NAME_LEN         = linux.BPF_OBJ_NAME_LEN
+	BPF_TAG_SIZE             = linux.BPF_TAG_SIZE
+	SYS_BPF                  = linux.SYS_BPF
+	F_DUPFD_CLOEXEC          = linux.F_DUPFD_CLOEXEC
+	EPOLL_CTL_ADD            = linux.EPOLL_CTL_ADD
+	EPOLL_CLOEXEC            = linux.EPOLL_CLOEXEC
+	O_CLOEXEC                = linux.O_CLOEXEC
+	O_NONBLOCK               = linux.O_NONBLOCK
+	PROT_READ                = linux.PROT_READ
+	PROT_WRITE               = linux.PROT_WRITE
+	MAP_SHARED               = linux.MAP_SHARED
+	PERF_TYPE_SOFTWARE       = linux.PERF_TYPE_SOFTWARE
+	PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT
+	PerfBitWatermark         = linux.PerfBitWatermark
+	PERF_SAMPLE_RAW          = linux.PERF_SAMPLE_RAW
+	PERF_FLAG_FD_CLOEXEC     = linux.PERF_FLAG_FD_CLOEXEC
+	RLIM_INFINITY            = linux.RLIM_INFINITY
+)
+
+// Statfs_t is a wrapper
+type Statfs_t = linux.Statfs_t
+
+// Rlimit is a wrapper
+type Rlimit = linux.Rlimit
+
+// Setrlimit is a wrapper
+func Setrlimit(resource int, rlim *Rlimit) (err error) {
+	return linux.Setrlimit(resource, rlim)
+}
+
+// Syscall is a wrapper
+func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
+	return linux.Syscall(trap, a1, a2, a3)
+}
+
+// FcntlInt is a wrapper
+func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
+	return linux.FcntlInt(fd, cmd, arg)
+}
+
+// Statfs is a wrapper
+func Statfs(path string, buf *Statfs_t) (err error) {
+	return linux.Statfs(path, buf)
+}
+
+// Close is a wrapper
+func Close(fd int) (err error) {
+	return linux.Close(fd)
+}
+
+// EpollEvent is a wrapper
+type EpollEvent = linux.EpollEvent
+
+// EpollWait is a wrapper
+func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
+	return linux.EpollWait(epfd, events, msec)
+}
+
+// EpollCtl is a wrapper
+func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
+	return linux.EpollCtl(epfd, op, fd, event)
+}
+
+// Eventfd is a wrapper
+func Eventfd(initval uint, flags int) (fd int, err error) {
+	return linux.Eventfd(initval, flags)
+}
+
+// Write is a wrapper
+func Write(fd int, p []byte) (n int, err error) {
+	return linux.Write(fd, p)
+}
+
+// EpollCreate1 is a wrapper
+func EpollCreate1(flag int) (fd int, err error) {
+	return linux.EpollCreate1(flag)
+}
+
+// PerfEventMmapPage is a wrapper
+type PerfEventMmapPage linux.PerfEventMmapPage
+
+// SetNonblock is a wrapper
+func SetNonblock(fd int, nonblocking bool) (err error) {
+	return linux.SetNonblock(fd, nonblocking)
+}
+
+// Mmap is a wrapper
+func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
+	return linux.Mmap(fd, offset, length, prot, flags)
+}
+
+// Munmap is a wrapper
+func Munmap(b []byte) (err error) {
+	return linux.Munmap(b)
+}
+
+// PerfEventAttr is a wrapper
+type PerfEventAttr = linux.PerfEventAttr
+
+// PerfEventOpen is a wrapper
+func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
+	return linux.PerfEventOpen(attr, pid, cpu, groupFd, flags)
+}
+
+// Utsname is a wrapper
+type Utsname = linux.Utsname
+
+// Uname is a wrapper
+func Uname(buf *Utsname) (err error) {
+	return linux.Uname(buf)
+}

+ 193 - 0
vendor/github.com/cilium/ebpf/internal/unix/types_other.go

@@ -0,0 +1,193 @@
+// +build !linux
+
+package unix
+
+import (
+	"fmt"
+	"runtime"
+	"syscall"
+)
+
+var errNonLinux = fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
+
+const (
+	ENOENT                   = syscall.ENOENT
+	EAGAIN                   = syscall.EAGAIN
+	ENOSPC                   = syscall.ENOSPC
+	EINVAL                   = syscall.EINVAL
+	BPF_OBJ_NAME_LEN         = 0x10
+	BPF_TAG_SIZE             = 0x8
+	SYS_BPF                  = 321
+	F_DUPFD_CLOEXEC          = 0x406
+	EPOLLIN                  = 0x1
+	EPOLL_CTL_ADD            = 0x1
+	EPOLL_CLOEXEC            = 0x80000
+	O_CLOEXEC                = 0x80000
+	O_NONBLOCK               = 0x800
+	PROT_READ                = 0x1
+	PROT_WRITE               = 0x2
+	MAP_SHARED               = 0x1
+	PERF_TYPE_SOFTWARE       = 0x1
+	PERF_COUNT_SW_BPF_OUTPUT = 0xa
+	PerfBitWatermark         = 0x4000
+	PERF_SAMPLE_RAW          = 0x400
+	PERF_FLAG_FD_CLOEXEC     = 0x8
+)
+
+// Statfs_t is a wrapper
+type Statfs_t struct {
+	Type    int64
+	Bsize   int64
+	Blocks  uint64
+	Bfree   uint64
+	Bavail  uint64
+	Files   uint64
+	Ffree   uint64
+	Fsid    [2]int32
+	Namelen int64
+	Frsize  int64
+	Flags   int64
+	Spare   [4]int64
+}
+
+// Rlimit is a wrapper
+type Rlimit struct {
+	Cur uint64
+	Max uint64
+}
+
+// Setrlimit is a wrapper
+func Setrlimit(resource int, rlim *Rlimit) (err error) {
+	return errNonLinux
+}
+
+// Syscall is a wrapper
+func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
+	return 0, 0, syscall.Errno(1)
+}
+
+// FcntlInt is a wrapper
+func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
+	return -1, errNonLinux
+}
+
+// Statfs is a wrapper
+func Statfs(path string, buf *Statfs_t) error {
+	return errNonLinux
+}
+
+// Close is a wrapper
+func Close(fd int) (err error) {
+	return errNonLinux
+}
+
+// EpollEvent is a wrapper
+type EpollEvent struct {
+	Events uint32
+	Fd     int32
+	Pad    int32
+}
+
+// EpollWait is a wrapper
+func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
+	return 0, errNonLinux
+}
+
+// EpollCtl is a wrapper
+func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
+	return errNonLinux
+}
+
+// Eventfd is a wrapper
+func Eventfd(initval uint, flags int) (fd int, err error) {
+	return 0, errNonLinux
+}
+
+// Write is a wrapper
+func Write(fd int, p []byte) (n int, err error) {
+	return 0, errNonLinux
+}
+
+// EpollCreate1 is a wrapper
+func EpollCreate1(flag int) (fd int, err error) {
+	return 0, errNonLinux
+}
+
+// PerfEventMmapPage is a wrapper
+type PerfEventMmapPage struct {
+	Version        uint32
+	Compat_version uint32
+	Lock           uint32
+	Index          uint32
+	Offset         int64
+	Time_enabled   uint64
+	Time_running   uint64
+	Capabilities   uint64
+	Pmc_width      uint16
+	Time_shift     uint16
+	Time_mult      uint32
+	Time_offset    uint64
+	Time_zero      uint64
+	Size           uint32
+
+	Data_head   uint64
+	Data_tail   uint64
+	Data_offset uint64
+	Data_size   uint64
+	Aux_head    uint64
+	Aux_tail    uint64
+	Aux_offset  uint64
+	Aux_size    uint64
+}
+
+// SetNonblock is a wrapper
+func SetNonblock(fd int, nonblocking bool) (err error) {
+	return errNonLinux
+}
+
+// Mmap is a wrapper
+func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
+	return []byte{}, errNonLinux
+}
+
+// Munmap is a wrapper
+func Munmap(b []byte) (err error) {
+	return errNonLinux
+}
+
+// PerfEventAttr is a wrapper
+type PerfEventAttr struct {
+	Type               uint32
+	Size               uint32
+	Config             uint64
+	Sample             uint64
+	Sample_type        uint64
+	Read_format        uint64
+	Bits               uint64
+	Wakeup             uint32
+	Bp_type            uint32
+	Ext1               uint64
+	Ext2               uint64
+	Branch_sample_type uint64
+	Sample_regs_user   uint64
+	Sample_stack_user  uint32
+	Clockid            int32
+	Sample_regs_intr   uint64
+	Aux_watermark      uint32
+	Sample_max_stack   uint16
+}
+
+// PerfEventOpen is a wrapper
+func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
+	return 0, errNonLinux
+}
+
+// Utsname is a wrapper
+type Utsname struct {
+    Release    [65]byte
+}
+
+// Uname is a wrapper
+func Uname(buf *Utsname) (err error) {
+	return errNonLinux
+}

+ 58 - 0
vendor/github.com/cilium/ebpf/linker.go

@@ -0,0 +1,58 @@
+package ebpf
+
+import (
+	"github.com/cilium/ebpf/asm"
+)
+
+// link resolves bpf-to-bpf calls.
+//
+// Each section may contain multiple functions / labels, and is only linked
+// if the program being edited references one of these functions.
+//
+// Sections must not require linking themselves.
+func link(insns asm.Instructions, sections ...asm.Instructions) (asm.Instructions, error) {
+	for _, section := range sections {
+		var err error
+		insns, err = linkSection(insns, section)
+		if err != nil {
+			return nil, err
+		}
+	}
+	return insns, nil
+}
+
+func linkSection(insns, section asm.Instructions) (asm.Instructions, error) {
+	// A map of symbols to the libraries which contain them.
+	symbols, err := section.SymbolOffsets()
+	if err != nil {
+		return nil, err
+	}
+
+	for _, ins := range insns {
+		if ins.Reference == "" {
+			continue
+		}
+
+		if ins.OpCode.JumpOp() != asm.Call || ins.Src != asm.R1 {
+			continue
+		}
+
+		if ins.Constant != -1 {
+			// This is already a valid call, no need to link again.
+			continue
+		}
+
+		if _, ok := symbols[ins.Reference]; !ok {
+			// Symbol isn't available in this section
+			continue
+		}
+
+		// At this point we know that at least one function in the
+		// library is called from insns. Merge the two sections.
+		// The rewrite of ins.Constant happens in asm.Instruction.Marshal.
+		return append(insns, section...), nil
+	}
+
+	// None of the functions in the section are called. Do nothing.
+	return insns, nil
+}

+ 604 - 0
vendor/github.com/cilium/ebpf/map.go

@@ -0,0 +1,604 @@
+package ebpf
+
+import (
+	"fmt"
+	"unsafe"
+
+	"github.com/cilium/ebpf/internal"
+	"github.com/cilium/ebpf/internal/unix"
+
+	"github.com/pkg/errors"
+)
+
+// MapSpec defines a Map.
+type MapSpec struct {
+	// Name is passed to the kernel as a debug aid. Must only contain
+	// alpha numeric and '_' characters.
+	Name       string
+	Type       MapType
+	KeySize    uint32
+	ValueSize  uint32
+	MaxEntries uint32
+	Flags      uint32
+	// InnerMap is used as a template for ArrayOfMaps and HashOfMaps
+	InnerMap *MapSpec
+}
+
+func (ms *MapSpec) String() string {
+	return fmt.Sprintf("%s(keySize=%d, valueSize=%d, maxEntries=%d, flags=%d)", ms.Type, ms.KeySize, ms.ValueSize, ms.MaxEntries, ms.Flags)
+}
+
+// Copy returns a copy of the spec.
+func (ms *MapSpec) Copy() *MapSpec {
+	if ms == nil {
+		return nil
+	}
+
+	cpy := *ms
+	cpy.InnerMap = ms.InnerMap.Copy()
+	return &cpy
+}
+
+// Map represents a Map file descriptor.
+//
+// It is not safe to close a map which is used by other goroutines.
+//
+// Methods which take interface{} arguments by default encode
+// them using binary.Read/Write in the machine's native endianness.
+//
+// Implement encoding.BinaryMarshaler or encoding.BinaryUnmarshaler
+// if you require custom encoding.
+type Map struct {
+	name string
+	fd   *bpfFD
+	abi  MapABI
+	// Per CPU maps return values larger than the size in the spec
+	fullValueSize int
+}
+
+// NewMapFromFD creates a map from a raw fd.
+//
+// You should not use fd after calling this function.
+func NewMapFromFD(fd int) (*Map, error) {
+	if fd < 0 {
+		return nil, errors.New("invalid fd")
+	}
+	bpfFd := newBPFFD(uint32(fd))
+
+	name, abi, err := newMapABIFromFd(bpfFd)
+	if err != nil {
+		bpfFd.forget()
+		return nil, err
+	}
+	return newMap(bpfFd, name, abi)
+}
+
+// NewMap creates a new Map.
+//
+// Creating a map for the first time will perform feature detection
+// by creating small, temporary maps.
+func NewMap(spec *MapSpec) (*Map, error) {
+	if spec.Type != ArrayOfMaps && spec.Type != HashOfMaps {
+		return createMap(spec, nil)
+	}
+
+	if spec.InnerMap == nil {
+		return nil, errors.Errorf("%s requires InnerMap", spec.Type)
+	}
+
+	template, err := createMap(spec.InnerMap, nil)
+	if err != nil {
+		return nil, err
+	}
+	defer template.Close()
+
+	return createMap(spec, template.fd)
+}
+
+func createMap(spec *MapSpec, inner *bpfFD) (*Map, error) {
+	spec = spec.Copy()
+
+	switch spec.Type {
+	case ArrayOfMaps:
+		fallthrough
+	case HashOfMaps:
+		if err := haveNestedMaps(); err != nil {
+			return nil, err
+		}
+
+		if spec.ValueSize != 0 && spec.ValueSize != 4 {
+			return nil, errors.Errorf("ValueSize must be zero or four for map of map")
+		}
+		spec.ValueSize = 4
+
+	case PerfEventArray:
+		if spec.KeySize != 0 {
+			return nil, errors.Errorf("KeySize must be zero for perf event array")
+		}
+		if spec.ValueSize != 0 {
+			return nil, errors.Errorf("ValueSize must be zero for perf event array")
+		}
+		if spec.MaxEntries == 0 {
+			n, err := internal.OnlineCPUs()
+			if err != nil {
+				return nil, errors.Wrap(err, "perf event array")
+			}
+			spec.MaxEntries = uint32(n)
+		}
+
+		spec.KeySize = 4
+		spec.ValueSize = 4
+	}
+
+	attr := bpfMapCreateAttr{
+		mapType:    spec.Type,
+		keySize:    spec.KeySize,
+		valueSize:  spec.ValueSize,
+		maxEntries: spec.MaxEntries,
+		flags:      spec.Flags,
+	}
+
+	if inner != nil {
+		var err error
+		attr.innerMapFd, err = inner.value()
+		if err != nil {
+			return nil, errors.Wrap(err, "map create")
+		}
+	}
+
+	name, err := newBPFObjName(spec.Name)
+	if err != nil {
+		return nil, errors.Wrap(err, "map create")
+	}
+
+	if haveObjName() == nil {
+		attr.mapName = name
+	}
+
+	fd, err := bpfMapCreate(&attr)
+	if err != nil {
+		return nil, errors.Wrap(err, "map create")
+	}
+
+	return newMap(fd, spec.Name, newMapABIFromSpec(spec))
+}
+
+func newMap(fd *bpfFD, name string, abi *MapABI) (*Map, error) {
+	m := &Map{
+		name,
+		fd,
+		*abi,
+		int(abi.ValueSize),
+	}
+
+	if !abi.Type.hasPerCPUValue() {
+		return m, nil
+	}
+
+	possibleCPUs, err := internal.PossibleCPUs()
+	if err != nil {
+		return nil, err
+	}
+
+	m.fullValueSize = align(int(abi.ValueSize), 8) * possibleCPUs
+	return m, nil
+}
+
+func (m *Map) String() string {
+	if m.name != "" {
+		return fmt.Sprintf("%s(%s)#%v", m.abi.Type, m.name, m.fd)
+	}
+	return fmt.Sprintf("%s#%v", m.abi.Type, m.fd)
+}
+
+// ABI gets the ABI of the Map
+func (m *Map) ABI() MapABI {
+	return m.abi
+}
+
+// Lookup retrieves a value from a Map.
+//
+// Calls Close() on valueOut if it is of type **Map or **Program,
+// and *valueOut is not nil.
+//
+// Returns an error if the key doesn't exist, see IsNotExist.
+func (m *Map) Lookup(key, valueOut interface{}) error {
+	valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)
+
+	if err := m.lookup(key, valuePtr); err != nil {
+		return err
+	}
+
+	if valueBytes == nil {
+		return nil
+	}
+
+	if m.abi.Type.hasPerCPUValue() {
+		return unmarshalPerCPUValue(valueOut, int(m.abi.ValueSize), valueBytes)
+	}
+
+	switch value := valueOut.(type) {
+	case **Map:
+		m, err := unmarshalMap(valueBytes)
+		if err != nil {
+			return err
+		}
+
+		(*value).Close()
+		*value = m
+		return nil
+	case *Map:
+		return errors.Errorf("can't unmarshal into %T, need %T", value, (**Map)(nil))
+	case Map:
+		return errors.Errorf("can't unmarshal into %T, need %T", value, (**Map)(nil))
+
+	case **Program:
+		p, err := unmarshalProgram(valueBytes)
+		if err != nil {
+			return err
+		}
+
+		(*value).Close()
+		*value = p
+		return nil
+	case *Program:
+		return errors.Errorf("can't unmarshal into %T, need %T", value, (**Program)(nil))
+	case Program:
+		return errors.Errorf("can't unmarshal into %T, need %T", value, (**Program)(nil))
+
+	default:
+		return unmarshalBytes(valueOut, valueBytes)
+	}
+}
+
+// LookupBytes gets a value from Map.
+//
+// Returns a nil value if a key doesn't exist.
+func (m *Map) LookupBytes(key interface{}) ([]byte, error) {
+	valueBytes := make([]byte, m.fullValueSize)
+	valuePtr := newPtr(unsafe.Pointer(&valueBytes[0]))
+
+	err := m.lookup(key, valuePtr)
+	if IsNotExist(err) {
+		return nil, nil
+	}
+
+	return valueBytes, err
+}
+
+func (m *Map) lookup(key interface{}, valueOut syscallPtr) error {
+	keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
+	if err != nil {
+		return errors.WithMessage(err, "can't marshal key")
+	}
+
+	err = bpfMapLookupElem(m.fd, keyPtr, valueOut)
+	return errors.WithMessage(err, "lookup failed")
+}
+
+// MapUpdateFlags controls the behaviour of the Map.Update call.
+//
+// The exact semantics depend on the specific MapType.
+type MapUpdateFlags uint64
+
+const (
+	// UpdateAny creates a new element or update an existing one.
+	UpdateAny MapUpdateFlags = iota
+	// UpdateNoExist creates a new element.
+	UpdateNoExist MapUpdateFlags = 1 << (iota - 1)
+	// UpdateExist updates an existing element.
+	UpdateExist
+)
+
+// Put replaces or creates a value in map.
+//
+// It is equivalent to calling Update with UpdateAny.
+func (m *Map) Put(key, value interface{}) error {
+	return m.Update(key, value, UpdateAny)
+}
+
+// Update changes the value of a key.
+func (m *Map) Update(key, value interface{}, flags MapUpdateFlags) error {
+	keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
+	if err != nil {
+		return errors.WithMessage(err, "can't marshal key")
+	}
+
+	var valuePtr syscallPtr
+	if m.abi.Type.hasPerCPUValue() {
+		valuePtr, err = marshalPerCPUValue(value, int(m.abi.ValueSize))
+	} else {
+		valuePtr, err = marshalPtr(value, int(m.abi.ValueSize))
+	}
+	if err != nil {
+		return errors.WithMessage(err, "can't marshal value")
+	}
+
+	return bpfMapUpdateElem(m.fd, keyPtr, valuePtr, uint64(flags))
+}
+
+// Delete removes a value.
+//
+// Returns an error if the key does not exist, see IsNotExist.
+func (m *Map) Delete(key interface{}) error {
+	keyPtr, err := marshalPtr(key, int(m.abi.KeySize))
+	if err != nil {
+		return errors.WithMessage(err, "can't marshal key")
+	}
+
+	err = bpfMapDeleteElem(m.fd, keyPtr)
+	return errors.WithMessage(err, "can't delete key")
+}
+
+// NextKey finds the key following an initial key.
+//
+// See NextKeyBytes for details.
+func (m *Map) NextKey(key, nextKeyOut interface{}) error {
+	nextKeyPtr, nextKeyBytes := makeBuffer(nextKeyOut, int(m.abi.KeySize))
+
+	if err := m.nextKey(key, nextKeyPtr); err != nil {
+		return err
+	}
+
+	if nextKeyBytes == nil {
+		return nil
+	}
+
+	err := unmarshalBytes(nextKeyOut, nextKeyBytes)
+	return errors.WithMessage(err, "can't unmarshal next key")
+}
+
+// NextKeyBytes returns the key following an initial key as a byte slice.
+//
+// Passing nil will return the first key.
+//
+// Use Iterate if you want to traverse all entries in the map.
+func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) {
+	nextKey := make([]byte, m.abi.KeySize)
+	nextKeyPtr := newPtr(unsafe.Pointer(&nextKey[0]))
+
+	err := m.nextKey(key, nextKeyPtr)
+	if IsNotExist(err) {
+		return nil, nil
+	}
+
+	return nextKey, err
+}
+
+func (m *Map) nextKey(key interface{}, nextKeyOut syscallPtr) error {
+	var (
+		keyPtr syscallPtr
+		err    error
+	)
+
+	if key != nil {
+		keyPtr, err = marshalPtr(key, int(m.abi.KeySize))
+		if err != nil {
+			return errors.WithMessage(err, "can't marshal key")
+		}
+	}
+
+	err = bpfMapGetNextKey(m.fd, keyPtr, nextKeyOut)
+	return errors.WithMessage(err, "can't get next key")
+}
+
+// Iterate traverses a map.
+//
+// It's safe to create multiple iterators at the same time.
+//
+// It's not possible to guarantee that all keys in a map will be
+// returned if there are concurrent modifications to the map.
+func (m *Map) Iterate() *MapIterator {
+	return newMapIterator(m)
+}
+
+// Close removes a Map
+func (m *Map) Close() error {
+	if m == nil {
+		// This makes it easier to clean up when iterating maps
+		// of maps / programs.
+		return nil
+	}
+
+	return m.fd.close()
+}
+
+// FD gets the file descriptor of the Map.
+//
+// Calling this function is invalid after Close has been called.
+func (m *Map) FD() int {
+	fd, err := m.fd.value()
+	if err != nil {
+		// Best effort: -1 is the number most likely to be an
+		// invalid file descriptor.
+		return -1
+	}
+
+	return int(fd)
+}
+
+// Clone creates a duplicate of the Map.
+//
+// Closing the duplicate does not affect the original, and vice versa.
+// Changes made to the map are reflected by both instances however.
+//
+// Cloning a nil Map returns nil.
+func (m *Map) Clone() (*Map, error) {
+	if m == nil {
+		return nil, nil
+	}
+
+	dup, err := m.fd.dup()
+	if err != nil {
+		return nil, errors.Wrap(err, "can't clone map")
+	}
+
+	return newMap(dup, m.name, &m.abi)
+}
+
+// Pin persists the map past the lifetime of the process that created it.
+//
+// This requires bpffs to be mounted above fileName. See http://cilium.readthedocs.io/en/doc-1.0/kubernetes/install/#mounting-the-bpf-fs-optional
+func (m *Map) Pin(fileName string) error {
+	return bpfPinObject(fileName, m.fd)
+}
+
+// LoadPinnedMap load a Map from a BPF file.
+//
+// The function is not compatible with nested maps.
+// Use LoadPinnedMapExplicit in these situations.
+func LoadPinnedMap(fileName string) (*Map, error) {
+	fd, err := bpfGetObject(fileName)
+	if err != nil {
+		return nil, err
+	}
+	name, abi, err := newMapABIFromFd(fd)
+	if err != nil {
+		_ = fd.close()
+		return nil, err
+	}
+	return newMap(fd, name, abi)
+}
+
+// LoadPinnedMapExplicit loads a map with explicit parameters.
+func LoadPinnedMapExplicit(fileName string, abi *MapABI) (*Map, error) {
+	fd, err := bpfGetObject(fileName)
+	if err != nil {
+		return nil, err
+	}
+	return newMap(fd, "", abi)
+}
+
+func unmarshalMap(buf []byte) (*Map, error) {
+	if len(buf) != 4 {
+		return nil, errors.New("map id requires 4 byte value")
+	}
+
+	// Looking up an entry in a nested map or prog array returns an id,
+	// not an fd.
+	id := internal.NativeEndian.Uint32(buf)
+	fd, err := bpfGetMapFDByID(id)
+	if err != nil {
+		return nil, err
+	}
+
+	name, abi, err := newMapABIFromFd(fd)
+	if err != nil {
+		_ = fd.close()
+		return nil, err
+	}
+
+	return newMap(fd, name, abi)
+}
+
+// MarshalBinary implements BinaryMarshaler.
+func (m *Map) MarshalBinary() ([]byte, error) {
+	fd, err := m.fd.value()
+	if err != nil {
+		return nil, err
+	}
+
+	buf := make([]byte, 4)
+	internal.NativeEndian.PutUint32(buf, fd)
+	return buf, nil
+}
+
+// MapIterator iterates a Map.
+//
+// See Map.Iterate.
+type MapIterator struct {
+	target            *Map
+	prevKey           interface{}
+	prevBytes         []byte
+	count, maxEntries uint32
+	done              bool
+	err               error
+}
+
+func newMapIterator(target *Map) *MapIterator {
+	return &MapIterator{
+		target:     target,
+		maxEntries: target.abi.MaxEntries,
+		prevBytes:  make([]byte, int(target.abi.KeySize)),
+	}
+}
+
+var errIterationAborted = errors.New("iteration aborted")
+
+// Next decodes the next key and value.
+//
+// Iterating a hash map from which keys are being deleted is not
+// safe. You may see the same key multiple times. Iteration may
+// also abort with an error, see IsIterationAborted.
+//
+// Returns false if there are no more entries. You must check
+// the result of Err afterwards.
+//
+// See Map.Get for further caveats around valueOut.
+func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool {
+	if mi.err != nil || mi.done {
+		return false
+	}
+
+	for ; mi.count < mi.maxEntries; mi.count++ {
+		var nextBytes []byte
+		nextBytes, mi.err = mi.target.NextKeyBytes(mi.prevKey)
+		if mi.err != nil {
+			return false
+		}
+
+		if nextBytes == nil {
+			mi.done = true
+			return false
+		}
+
+		// The user can get access to nextBytes since unmarshalBytes
+		// does not copy when unmarshaling into a []byte.
+		// Make a copy to prevent accidental corruption of
+		// iterator state.
+		copy(mi.prevBytes, nextBytes)
+		mi.prevKey = mi.prevBytes
+
+		mi.err = mi.target.Lookup(nextBytes, valueOut)
+		if IsNotExist(mi.err) {
+			// Even though the key should be valid, we couldn't look up
+			// its value. If we're iterating a hash map this is probably
+			// because a concurrent delete removed the value before we
+			// could get it. This means that the next call to NextKeyBytes
+			// is very likely to restart iteration.
+			// If we're iterating one of the fd maps like
+			// ProgramArray it means that a given slot doesn't have
+			// a valid fd associated. It's OK to continue to the next slot.
+			continue
+		}
+		if mi.err != nil {
+			return false
+		}
+
+		mi.err = unmarshalBytes(keyOut, nextBytes)
+		return mi.err == nil
+	}
+
+	mi.err = errIterationAborted
+	return false
+}
+
+// Err returns any encountered error.
+//
+// The method must be called after Next returns nil.
+func (mi *MapIterator) Err() error {
+	return mi.err
+}
+
+// IsNotExist returns true if the error indicates that a
+// key doesn't exist.
+func IsNotExist(err error) bool {
+	return errors.Cause(err) == unix.ENOENT
+}
+
+// IsIterationAborted returns true if the iteration was aborted.
+//
+// This occurs when keys are deleted from a hash map during iteration.
+func IsIterationAborted(err error) bool {
+	return errors.Cause(err) == errIterationAborted
+}

+ 192 - 0
vendor/github.com/cilium/ebpf/marshalers.go

@@ -0,0 +1,192 @@
+package ebpf
+
+import (
+	"bytes"
+	"encoding"
+	"encoding/binary"
+	"reflect"
+	"runtime"
+	"unsafe"
+
+	"github.com/cilium/ebpf/internal"
+
+	"github.com/pkg/errors"
+)
+
+func marshalPtr(data interface{}, length int) (syscallPtr, error) {
+	if ptr, ok := data.(unsafe.Pointer); ok {
+		return newPtr(ptr), nil
+	}
+
+	buf, err := marshalBytes(data, length)
+	if err != nil {
+		return syscallPtr{}, err
+	}
+
+	return newPtr(unsafe.Pointer(&buf[0])), nil
+}
+
+func marshalBytes(data interface{}, length int) (buf []byte, err error) {
+	switch value := data.(type) {
+	case encoding.BinaryMarshaler:
+		buf, err = value.MarshalBinary()
+	case string:
+		buf = []byte(value)
+	case []byte:
+		buf = value
+	case unsafe.Pointer:
+		err = errors.New("can't marshal from unsafe.Pointer")
+	default:
+		var wr bytes.Buffer
+		err = binary.Write(&wr, internal.NativeEndian, value)
+		err = errors.Wrapf(err, "encoding %T", value)
+		buf = wr.Bytes()
+	}
+	if err != nil {
+		return nil, err
+	}
+
+	if len(buf) != length {
+		return nil, errors.Errorf("%T doesn't marshal to %d bytes", data, length)
+	}
+	return buf, nil
+}
+
+func makeBuffer(dst interface{}, length int) (syscallPtr, []byte) {
+	if ptr, ok := dst.(unsafe.Pointer); ok {
+		return newPtr(ptr), nil
+	}
+
+	buf := make([]byte, length)
+	return newPtr(unsafe.Pointer(&buf[0])), buf
+}
+
+func unmarshalBytes(data interface{}, buf []byte) error {
+	switch value := data.(type) {
+	case unsafe.Pointer:
+		sh := &reflect.SliceHeader{
+			Data: uintptr(value),
+			Len:  len(buf),
+			Cap:  len(buf),
+		}
+
+		dst := *(*[]byte)(unsafe.Pointer(sh))
+		copy(dst, buf)
+		runtime.KeepAlive(value)
+		return nil
+	case encoding.BinaryUnmarshaler:
+		return value.UnmarshalBinary(buf)
+	case *string:
+		*value = string(buf)
+		return nil
+	case *[]byte:
+		*value = buf
+		return nil
+	case string:
+		return errors.New("require pointer to string")
+	case []byte:
+		return errors.New("require pointer to []byte")
+	default:
+		rd := bytes.NewReader(buf)
+		err := binary.Read(rd, internal.NativeEndian, value)
+		return errors.Wrapf(err, "decoding %T", value)
+	}
+}
+
+// marshalPerCPUValue encodes a slice containing one value per
+// possible CPU into a buffer of bytes.
+//
+// Values are initialized to zero if the slice has less elements than CPUs.
+//
+// slice must have a type like []elementType.
+func marshalPerCPUValue(slice interface{}, elemLength int) (syscallPtr, error) {
+	sliceType := reflect.TypeOf(slice)
+	if sliceType.Kind() != reflect.Slice {
+		return syscallPtr{}, errors.New("per-CPU value requires slice")
+	}
+
+	possibleCPUs, err := internal.PossibleCPUs()
+	if err != nil {
+		return syscallPtr{}, err
+	}
+
+	sliceValue := reflect.ValueOf(slice)
+	sliceLen := sliceValue.Len()
+	if sliceLen > possibleCPUs {
+		return syscallPtr{}, errors.Errorf("per-CPU value exceeds number of CPUs")
+	}
+
+	alignedElemLength := align(elemLength, 8)
+	buf := make([]byte, alignedElemLength*possibleCPUs)
+
+	for i := 0; i < sliceLen; i++ {
+		elem := sliceValue.Index(i).Interface()
+		elemBytes, err := marshalBytes(elem, elemLength)
+		if err != nil {
+			return syscallPtr{}, err
+		}
+
+		offset := i * alignedElemLength
+		copy(buf[offset:offset+elemLength], elemBytes)
+	}
+
+	return newPtr(unsafe.Pointer(&buf[0])), nil
+}
+
+// unmarshalPerCPUValue decodes a buffer into a slice containing one value per
+// possible CPU.
+//
+// valueOut must have a type like *[]elementType
+func unmarshalPerCPUValue(slicePtr interface{}, elemLength int, buf []byte) error {
+	slicePtrType := reflect.TypeOf(slicePtr)
+	if slicePtrType.Kind() != reflect.Ptr || slicePtrType.Elem().Kind() != reflect.Slice {
+		return errors.Errorf("per-cpu value requires pointer to slice")
+	}
+
+	possibleCPUs, err := internal.PossibleCPUs()
+	if err != nil {
+		return err
+	}
+
+	sliceType := slicePtrType.Elem()
+	slice := reflect.MakeSlice(sliceType, possibleCPUs, possibleCPUs)
+
+	sliceElemType := sliceType.Elem()
+	sliceElemIsPointer := sliceElemType.Kind() == reflect.Ptr
+	if sliceElemIsPointer {
+		sliceElemType = sliceElemType.Elem()
+	}
+
+	step := len(buf) / possibleCPUs
+	if step < elemLength {
+		return errors.Errorf("per-cpu element length is larger than available data")
+	}
+	for i := 0; i < possibleCPUs; i++ {
+		var elem interface{}
+		if sliceElemIsPointer {
+			newElem := reflect.New(sliceElemType)
+			slice.Index(i).Set(newElem)
+			elem = newElem.Interface()
+		} else {
+			elem = slice.Index(i).Addr().Interface()
+		}
+
+		// Make a copy, since unmarshal can hold on to itemBytes
+		elemBytes := make([]byte, elemLength)
+		copy(elemBytes, buf[:elemLength])
+
+		err := unmarshalBytes(elem, elemBytes)
+		if err != nil {
+			return errors.Wrapf(err, "cpu %d", i)
+		}
+
+		buf = buf[step:]
+	}
+
+	reflect.ValueOf(slicePtr).Elem().Set(slice)
+	return nil
+}
+
+func align(n, alignment int) int {
+	return (int(n) + alignment - 1) / alignment * alignment
+}

+ 504 - 0
vendor/github.com/cilium/ebpf/prog.go

@@ -0,0 +1,504 @@
+package ebpf
+
+import (
+	"bytes"
+	"fmt"
+	"math"
+	"strings"
+	"time"
+	"unsafe"
+
+	"github.com/cilium/ebpf/asm"
+	"github.com/cilium/ebpf/internal"
+	"github.com/cilium/ebpf/internal/unix"
+
+	"github.com/pkg/errors"
+)
+
+const (
+	// Number of bytes to pad the output buffer for BPF_PROG_TEST_RUN.
+	// This is currently the maximum of spare space allocated for SKB
+	// and XDP programs, and equal to XDP_PACKET_HEADROOM + NET_IP_ALIGN.
+	outputPad = 256 + 2
+)
+
+// DefaultVerifierLogSize is the default number of bytes allocated for the
+// verifier log.
+const DefaultVerifierLogSize = 64 * 1024
+
+// ProgramOptions control loading a program into the kernel.
+type ProgramOptions struct {
+	// Controls the detail emitted by the kernel verifier. Set to non-zero
+	// to enable logging.
+	LogLevel uint32
+	// Controls the output buffer size for the verifier. Defaults to
+	// DefaultVerifierLogSize.
+	LogSize int
+}
+
+// ProgramSpec defines a Program
+type ProgramSpec struct {
+	// Name is passed to the kernel as a debug aid. Must only contain
+	// alpha numeric and '_' characters.
+	Name          string
+	Type          ProgramType
+	AttachType    AttachType
+	Instructions  asm.Instructions
+	License       string
+	KernelVersion uint32
+}
+
+// Copy returns a copy of the spec.
+func (ps *ProgramSpec) Copy() *ProgramSpec {
+	if ps == nil {
+		return nil
+	}
+
+	cpy := *ps
+	cpy.Instructions = make(asm.Instructions, len(ps.Instructions))
+	copy(cpy.Instructions, ps.Instructions)
+	return &cpy
+}
+
+// Program represents BPF program loaded into the kernel.
+//
+// It is not safe to close a Program which is used by other goroutines.
+type Program struct {
+	// Contains the output of the kernel verifier if enabled,
+	// otherwise it is empty.
+	VerifierLog string
+
+	fd   *bpfFD
+	name string
+	abi  ProgramABI
+}
+
+// NewProgram creates a new Program.
+//
+// Loading a program for the first time will perform
+// feature detection by loading small, temporary programs.
+func NewProgram(spec *ProgramSpec) (*Program, error) {
+	return NewProgramWithOptions(spec, ProgramOptions{})
+}
+
+// NewProgramWithOptions creates a new Program.
+//
+// Loading a program for the first time will perform
+// feature detection by loading small, temporary programs.
+func NewProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) {
+	attr, err := convertProgramSpec(spec)
+	if err != nil {
+		return nil, err
+	}
+
+	logSize := DefaultVerifierLogSize
+	if opts.LogSize > 0 {
+		logSize = opts.LogSize
+	}
+
+	var logBuf []byte
+	if opts.LogLevel > 0 {
+		logBuf = make([]byte, logSize)
+		attr.logLevel = opts.LogLevel
+		attr.logSize = uint32(len(logBuf))
+		attr.logBuf = newPtr(unsafe.Pointer(&logBuf[0]))
+	}
+
+	fd, err := bpfProgLoad(attr)
+	if err == nil {
+		prog := newProgram(fd, spec.Name, &ProgramABI{spec.Type})
+		prog.VerifierLog = convertCString(logBuf)
+		return prog, nil
+	}
+
+	truncated := errors.Cause(err) == unix.ENOSPC
+	if opts.LogLevel == 0 {
+		// Re-run with the verifier enabled to get better error messages.
+		logBuf = make([]byte, logSize)
+		attr.logLevel = 1
+		attr.logSize = uint32(len(logBuf))
+		attr.logBuf = newPtr(unsafe.Pointer(&logBuf[0]))
+
+		_, nerr := bpfProgLoad(attr)
+		truncated = errors.Cause(nerr) == unix.ENOSPC
+	}
+
+	logs := convertCString(logBuf)
+	if truncated {
+		logs += "\n(truncated...)"
+	}
+
+	return nil, &loadError{err, logs}
+}
+
+// NewProgramFromFD creates a program from a raw fd.
+//
+// You should not use fd after calling this function.
+//
+// Requires at least Linux 4.11.
+func NewProgramFromFD(fd int) (*Program, error) {
+	if fd < 0 {
+		return nil, errors.New("invalid fd")
+	}
+	bpfFd := newBPFFD(uint32(fd))
+
+	name, abi, err := newProgramABIFromFd(bpfFd)
+	if err != nil {
+		bpfFd.forget()
+		return nil, err
+	}
+
+	return newProgram(bpfFd, name, abi), nil
+}
+
+func newProgram(fd *bpfFD, name string, abi *ProgramABI) *Program {
+	return &Program{
+		name: name,
+		fd:   fd,
+		abi:  *abi,
+	}
+}
+
+func convertProgramSpec(spec *ProgramSpec) (*bpfProgLoadAttr, error) {
+	if len(spec.Instructions) == 0 {
+		return nil, errors.New("Instructions cannot be empty")
+	}
+
+	if len(spec.License) == 0 {
+		return nil, errors.New("License cannot be empty")
+	}
+
+	buf := bytes.NewBuffer(make([]byte, 0, len(spec.Instructions)*asm.InstructionSize))
+	err := spec.Instructions.Marshal(buf, internal.NativeEndian)
+	if err != nil {
+		return nil, err
+	}
+
+	bytecode := buf.Bytes()
+	insCount := uint32(len(bytecode) / asm.InstructionSize)
+	lic := []byte(spec.License)
+	attr := &bpfProgLoadAttr{
+		progType:           spec.Type,
+		expectedAttachType: spec.AttachType,
+		insCount:           insCount,
+		instructions:       newPtr(unsafe.Pointer(&bytecode[0])),
+		license:            newPtr(unsafe.Pointer(&lic[0])),
+	}
+
+	name, err := newBPFObjName(spec.Name)
+	if err != nil {
+		return nil, err
+	}
+
+	if haveObjName() == nil {
+		attr.progName = name
+	}
+
+	return attr, nil
+}
+
+func (p *Program) String() string {
+	if p.name != "" {
+		return fmt.Sprintf("%s(%s)#%v", p.abi.Type, p.name, p.fd)
+	}
+	return fmt.Sprintf("%s#%v", p.abi.Type, p.fd)
+}
+
+// ABI gets the ABI of the Program
+func (p *Program) ABI() ProgramABI {
+	return p.abi
+}
+
+// FD gets the file descriptor of the Program.
+//
+// It is invalid to call this function after Close has been called.
+func (p *Program) FD() int {
+	fd, err := p.fd.value()
+	if err != nil {
+		// Best effort: -1 is the number most likely to be an
+		// invalid file descriptor.
+		return -1
+	}
+
+	return int(fd)
+}
+
+// Clone creates a duplicate of the Program.
+//
+// Closing the duplicate does not affect the original, and vice versa.
+//
+// Cloning a nil Program returns nil.
+func (p *Program) Clone() (*Program, error) {
+	if p == nil {
+		return nil, nil
+	}
+
+	dup, err := p.fd.dup()
+	if err != nil {
+		return nil, errors.Wrap(err, "can't clone program")
+	}
+
+	return newProgram(dup, p.name, &p.abi), nil
+}
+
+// Pin persists the Program past the lifetime of the process that created it
+//
+// This requires bpffs to be mounted above fileName. See http://cilium.readthedocs.io/en/doc-1.0/kubernetes/install/#mounting-the-bpf-fs-optional
+func (p *Program) Pin(fileName string) error {
+	return errors.Wrap(bpfPinObject(fileName, p.fd), "can't pin program")
+}
+
+// Close unloads the program from the kernel.
+func (p *Program) Close() error {
+	if p == nil {
+		return nil
+	}
+
+	return p.fd.close()
+}
+
+// Test runs the Program in the kernel with the given input and returns the
+// value returned by the eBPF program. outLen may be zero.
+//
+// Note: the kernel expects at least 14 bytes input for an ethernet header for
+// XDP and SKB programs.
+//
+// This function requires at least Linux 4.12.
+func (p *Program) Test(in []byte) (uint32, []byte, error) {
+	ret, out, _, err := p.testRun(in, 1)
+	return ret, out, errors.Wrap(err, "can't test program")
+}
+
+// Benchmark runs the Program with the given input for a number of times
+// and returns the time taken per iteration.
+//
+// The returned value is the return value of the last execution of
+// the program.
+//
+// This function requires at least Linux 4.12.
+func (p *Program) Benchmark(in []byte, repeat int) (uint32, time.Duration, error) {
+	ret, _, total, err := p.testRun(in, repeat)
+	return ret, total, errors.Wrap(err, "can't benchmark program")
+}
+
+var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() bool {
+	prog, err := NewProgram(&ProgramSpec{
+		Type: SocketFilter,
+		Instructions: asm.Instructions{
+			asm.LoadImm(asm.R0, 0, asm.DWord),
+			asm.Return(),
+		},
+		License: "MIT",
+	})
+	if err != nil {
+		// This may be because we lack sufficient permissions, etc.
+		return false
+	}
+	defer prog.Close()
+
+	fd, err := prog.fd.value()
+	if err != nil {
+		return false
+	}
+
+	// Programs require at least 14 bytes input
+	in := make([]byte, 14)
+	attr := bpfProgTestRunAttr{
+		fd:         fd,
+		dataSizeIn: uint32(len(in)),
+		dataIn:     newPtr(unsafe.Pointer(&in[0])),
+	}
+
+	_, err = bpfCall(_ProgTestRun, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
+
+	// Check for EINVAL specifically, rather than err != nil since we
+	// otherwise misdetect due to insufficient permissions.
+	return errors.Cause(err) != unix.EINVAL
+})
+
+func (p *Program) testRun(in []byte, repeat int) (uint32, []byte, time.Duration, error) {
+	if uint(repeat) > math.MaxUint32 {
+		return 0, nil, 0, fmt.Errorf("repeat is too high")
+	}
+
+	if len(in) == 0 {
+		return 0, nil, 0, fmt.Errorf("missing input")
+	}
+
+	if uint(len(in)) > math.MaxUint32 {
+		return 0, nil, 0, fmt.Errorf("input is too long")
+	}
+
+	if err := haveProgTestRun(); err != nil {
+		return 0, nil, 0, err
+	}
+
+	// Older kernels ignore the dataSizeOut argument when copying to user space.
+	// Combined with things like bpf_xdp_adjust_head() we don't really know what the final
+	// size will be. Hence we allocate an output buffer which we hope will always be large
+	// enough, and panic if the kernel wrote past the end of the allocation.
+	// See https://patchwork.ozlabs.org/cover/1006822/
+	out := make([]byte, len(in)+outputPad)
+
+	fd, err := p.fd.value()
+	if err != nil {
+		return 0, nil, 0, err
+	}
+
+	attr := bpfProgTestRunAttr{
+		fd:          fd,
+		dataSizeIn:  uint32(len(in)),
+		dataSizeOut: uint32(len(out)),
+		dataIn:      newPtr(unsafe.Pointer(&in[0])),
+		dataOut:     newPtr(unsafe.Pointer(&out[0])),
+		repeat:      uint32(repeat),
+	}
+
+	_, err = bpfCall(_ProgTestRun, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
+	if err != nil {
+		return 0, nil, 0, errors.Wrap(err, "can't run test")
+	}
+
+	if int(attr.dataSizeOut) > cap(out) {
+		// Houston, we have a problem. The program created more data than we allocated,
+		// and the kernel wrote past the end of our buffer.
+		panic("kernel wrote past end of output buffer")
+	}
+	out = out[:int(attr.dataSizeOut)]
+
+	total := time.Duration(attr.duration) * time.Nanosecond
+	return attr.retval, out, total, nil
+}
+
+func unmarshalProgram(buf []byte) (*Program, error) {
+	if len(buf) != 4 {
+		return nil, errors.New("program id requires 4 byte value")
+	}
+
+	// Looking up an entry in a nested map or prog array returns an id,
+	// not an fd.
+	id := internal.NativeEndian.Uint32(buf)
+	fd, err := bpfGetProgramFDByID(id)
+	if err != nil {
+		return nil, err
+	}
+
+	name, abi, err := newProgramABIFromFd(fd)
+	if err != nil {
+		_ = fd.close()
+		return nil, err
+	}
+
+	return newProgram(fd, name, abi), nil
+}
+
+// MarshalBinary implements BinaryMarshaler.
+func (p *Program) MarshalBinary() ([]byte, error) {
+	value, err := p.fd.value()
+	if err != nil {
+		return nil, err
+	}
+
+	buf := make([]byte, 4)
+	internal.NativeEndian.PutUint32(buf, value)
+	return buf, nil
+}
+
+// Attach a Program to a container object fd
+func (p *Program) Attach(fd int, typ AttachType, flags AttachFlags) error {
+	if fd < 0 {
+		return errors.New("invalid fd")
+	}
+
+	pfd, err := p.fd.value()
+	if err != nil {
+		return err
+	}
+
+	attr := bpfProgAlterAttr{
+		targetFd:    uint32(fd),
+		attachBpfFd: pfd,
+		attachType:  uint32(typ),
+		attachFlags: uint32(flags),
+	}
+
+	return bpfProgAlter(_ProgAttach, &attr)
+}
+
+// Detach a Program from a container object fd
+func (p *Program) Detach(fd int, typ AttachType, flags AttachFlags) error {
+	if fd < 0 {
+		return errors.New("invalid fd")
+	}
+
+	pfd, err := p.fd.value()
+	if err != nil {
+		return err
+	}
+
+	attr := bpfProgAlterAttr{
+		targetFd:    uint32(fd),
+		attachBpfFd: pfd,
+		attachType:  uint32(typ),
+		attachFlags: uint32(flags),
+	}
+
+	return bpfProgAlter(_ProgDetach, &attr)
+}
+
+// LoadPinnedProgram loads a Program from a BPF file.
+//
+// Requires at least Linux 4.11.
+func LoadPinnedProgram(fileName string) (*Program, error) {
+	fd, err := bpfGetObject(fileName)
+	if err != nil {
+		return nil, err
+	}
+
+	name, abi, err := newProgramABIFromFd(fd)
+	if err != nil {
+		_ = fd.close()
+		return nil, errors.Wrapf(err, "can't get ABI for %s", fileName)
+	}
+
+	return newProgram(fd, name, abi), nil
+}
+
+// SanitizeName replaces all invalid characters in name.
+//
+// Use this to automatically generate valid names for maps and
+// programs at run time.
+//
+// Passing a negative value for replacement will delete characters
+// instead of replacing them.
+func SanitizeName(name string, replacement rune) string {
+	return strings.Map(func(char rune) rune {
+		if invalidBPFObjNameChar(char) {
+			return replacement
+		}
+		return char
+	}, name)
+}
+
+type loadError struct {
+	cause       error
+	verifierLog string
+}
+
+func (le *loadError) Error() string {
+	if le.verifierLog == "" {
+		return fmt.Sprintf("failed to load program: %s", le.cause)
+	}
+	return fmt.Sprintf("failed to load program: %s: %s", le.cause, le.verifierLog)
+}
+
+func (le *loadError) Cause() error {
+	return le.cause
+}
+
+// IsNotSupported returns true if an error occurred because
+// the kernel does not have support for a specific feature.
+func IsNotSupported(err error) bool {
+	_, notSupported := errors.Cause(err).(*internal.UnsupportedFeatureError)
+	return notSupported
+}

+ 14 - 0
vendor/github.com/cilium/ebpf/ptr_32_be.go

@@ -0,0 +1,14 @@
+// +build armbe mips mips64p32
+
+package ebpf
+
+import (
+	"unsafe"
+)
+
+// ptr wraps an unsafe.Pointer to be 64bit to
+// conform to the syscall specification.
+type syscallPtr struct {
+	pad uint32
+	ptr unsafe.Pointer
+}

+ 14 - 0
vendor/github.com/cilium/ebpf/ptr_32_le.go

@@ -0,0 +1,14 @@
+// +build 386 amd64p32 arm mipsle mips64p32le
+
+package ebpf
+
+import (
+	"unsafe"
+)
+
+// ptr wraps an unsafe.Pointer to be 64bit to
+// conform to the syscall specification.
+type syscallPtr struct {
+	ptr unsafe.Pointer
+	pad uint32
+}

+ 14 - 0
vendor/github.com/cilium/ebpf/ptr_64.go

@@ -0,0 +1,14 @@
+// +build !386,!amd64p32,!arm,!mipsle,!mips64p32le
+// +build !armbe,!mips,!mips64p32
+
+package ebpf
+
+import (
+	"unsafe"
+)
+
+// ptr wraps an unsafe.Pointer to be 64bit to
+// conform to the syscall specification.
+type syscallPtr struct {
+	ptr unsafe.Pointer
+}

+ 20 - 0
vendor/github.com/cilium/ebpf/readme.md

@@ -0,0 +1,20 @@
+eBPF
+-------
+[![](https://godoc.org/github.com/cilium/ebpf?status.svg)](https://godoc.org/github.com/cilium/ebpf)
+
+eBPF is a pure Go library that provides utilities for loading, compiling, and debugging eBPF programs. It has minimal external dependencies and is intended to be used in long running processes.
+
+[ebpf/asm](https://godoc.org/github.com/cilium/ebpf/asm) contains a basic assembler.
+
+The library is maintained by [Cloudflare](https://www.cloudflare.com) and [Cilium](https://www.cilium.io). Feel free to [join](https://cilium.herokuapp.com/) the [libbpf-go](https://cilium.slack.com/messages/libbpf-go) channel on Slack.
+
+## Current status
+
+The package is production ready, but **the API is explicitly unstable
+right now**. Expect to update your code if you want to follow along.
+
+## Useful resources
+
+* [Cilium eBPF documentation](https://cilium.readthedocs.io/en/latest/bpf/#bpf-guide) (recommended)
+* [Linux documentation on BPF](http://elixir.free-electrons.com/linux/latest/source/Documentation/networking/filter.txt)
+* [eBPF features by Linux version](https://github.com/iovisor/bcc/blob/master/docs/kernel-versions.md)

+ 447 - 0
vendor/github.com/cilium/ebpf/syscalls.go

@@ -0,0 +1,447 @@
+package ebpf
+
+import (
+	"bytes"
+	"path/filepath"
+	"runtime"
+	"strconv"
+	"strings"
+	"unsafe"
+
+	"github.com/cilium/ebpf/internal"
+	"github.com/cilium/ebpf/internal/unix"
+
+	"github.com/pkg/errors"
+)
+
+var errClosedFd = errors.New("use of closed file descriptor")
+
+type bpfFD struct {
+	raw int64
+}
+
+func newBPFFD(value uint32) *bpfFD {
+	fd := &bpfFD{int64(value)}
+	runtime.SetFinalizer(fd, (*bpfFD).close)
+	return fd
+}
+
+func (fd *bpfFD) String() string {
+	return strconv.FormatInt(fd.raw, 10)
+}
+
+func (fd *bpfFD) value() (uint32, error) {
+	if fd.raw < 0 {
+		return 0, errClosedFd
+	}
+
+	return uint32(fd.raw), nil
+}
+
+func (fd *bpfFD) close() error {
+	if fd.raw < 0 {
+		return nil
+	}
+
+	value := int(fd.raw)
+	fd.raw = -1
+
+	fd.forget()
+	return unix.Close(value)
+}
+
+func (fd *bpfFD) forget() {
+	runtime.SetFinalizer(fd, nil)
+}
+
+func (fd *bpfFD) dup() (*bpfFD, error) {
+	if fd.raw < 0 {
+		return nil, errClosedFd
+	}
+
+	dup, err := unix.FcntlInt(uintptr(fd.raw), unix.F_DUPFD_CLOEXEC, 0)
+	if err != nil {
+		return nil, errors.Wrap(err, "can't dup fd")
+	}
+
+	return newBPFFD(uint32(dup)), nil
+}
+
+// bpfObjName is a null-terminated string made up of
+// 'A-Za-z0-9_' characters.
+type bpfObjName [unix.BPF_OBJ_NAME_LEN]byte
+
+// newBPFObjName truncates the result if it is too long.
+func newBPFObjName(name string) (bpfObjName, error) {
+	idx := strings.IndexFunc(name, invalidBPFObjNameChar)
+	if idx != -1 {
+		return bpfObjName{}, errors.Errorf("invalid character '%c' in name '%s'", name[idx], name)
+	}
+
+	var result bpfObjName
+	copy(result[:unix.BPF_OBJ_NAME_LEN-1], name)
+	return result, nil
+}
+
+func invalidBPFObjNameChar(char rune) bool {
+	switch {
+	case char >= 'A' && char <= 'Z':
+		fallthrough
+	case char >= 'a' && char <= 'z':
+		fallthrough
+	case char >= '0' && char <= '9':
+		fallthrough
+	case char == '_':
+		return false
+	default:
+		return true
+	}
+}
+
+type bpfMapCreateAttr struct {
+	mapType    MapType
+	keySize    uint32
+	valueSize  uint32
+	maxEntries uint32
+	flags      uint32
+	innerMapFd uint32     // since 4.12 56f668dfe00d
+	numaNode   uint32     // since 4.14 96eabe7a40aa
+	mapName    bpfObjName // since 4.15 ad5b177bd73f
+}
+
+type bpfMapOpAttr struct {
+	mapFd   uint32
+	padding uint32
+	key     syscallPtr
+	value   syscallPtr
+	flags   uint64
+}
+
+type bpfMapInfo struct {
+	mapType    uint32
+	id         uint32
+	keySize    uint32
+	valueSize  uint32
+	maxEntries uint32
+	flags      uint32
+	mapName    bpfObjName // since 4.15 ad5b177bd73f
+}
+
+type bpfPinObjAttr struct {
+	fileName syscallPtr
+	fd       uint32
+	padding  uint32
+}
+
+type bpfProgLoadAttr struct {
+	progType           ProgramType
+	insCount           uint32
+	instructions       syscallPtr
+	license            syscallPtr
+	logLevel           uint32
+	logSize            uint32
+	logBuf             syscallPtr
+	kernelVersion      uint32     // since 4.1  2541517c32be
+	progFlags          uint32     // since 4.11 e07b98d9bffe
+	progName           bpfObjName // since 4.15 067cae47771c
+	progIfIndex        uint32     // since 4.15 1f6f4cb7ba21
+	expectedAttachType AttachType // since 4.17 5e43f899b03a
+}
+
+type bpfProgInfo struct {
+	progType     uint32
+	id           uint32
+	tag          [unix.BPF_TAG_SIZE]byte
+	jitedLen     uint32
+	xlatedLen    uint32
+	jited        syscallPtr
+	xlated       syscallPtr
+	loadTime     uint64 // since 4.15 cb4d2b3f03d8
+	createdByUID uint32
+	nrMapIDs     uint32
+	mapIds       syscallPtr
+	name         bpfObjName
+}
+
+type bpfProgTestRunAttr struct {
+	fd          uint32
+	retval      uint32
+	dataSizeIn  uint32
+	dataSizeOut uint32
+	dataIn      syscallPtr
+	dataOut     syscallPtr
+	repeat      uint32
+	duration    uint32
+}
+
+type bpfProgAlterAttr struct {
+	targetFd    uint32
+	attachBpfFd uint32
+	attachType  uint32
+	attachFlags uint32
+}
+
+type bpfObjGetInfoByFDAttr struct {
+	fd      uint32
+	infoLen uint32
+	info    syscallPtr // May be either bpfMapInfo or bpfProgInfo
+}
+
+type bpfGetFDByIDAttr struct {
+	id   uint32
+	next uint32
+}
+
+func newPtr(ptr unsafe.Pointer) syscallPtr {
+	return syscallPtr{ptr: ptr}
+}
+
+func bpfProgLoad(attr *bpfProgLoadAttr) (*bpfFD, error) {
+	for {
+		fd, err := bpfCall(_ProgLoad, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
+		// As of ~4.20 the verifier can be interrupted by a signal,
+		// and returns EAGAIN in that case.
+		if err == unix.EAGAIN {
+			continue
+		}
+
+		if err != nil {
+			return nil, err
+		}
+
+		return newBPFFD(uint32(fd)), nil
+	}
+}
+
+func bpfProgAlter(cmd int, attr *bpfProgAlterAttr) error {
+	_, err := bpfCall(cmd, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
+	return err
+}
+
+func bpfMapCreate(attr *bpfMapCreateAttr) (*bpfFD, error) {
+	fd, err := bpfCall(_MapCreate, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
+	if err != nil {
+		return nil, err
+	}
+
+	return newBPFFD(uint32(fd)), nil
+}
+
+var haveNestedMaps = internal.FeatureTest("nested maps", "4.12", func() bool {
+	inner, err := bpfMapCreate(&bpfMapCreateAttr{
+		mapType:    Array,
+		keySize:    4,
+		valueSize:  4,
+		maxEntries: 1,
+	})
+	if err != nil {
+		return false
+	}
+	defer inner.close()
+
+	innerFd, _ := inner.value()
+	nested, err := bpfMapCreate(&bpfMapCreateAttr{
+		mapType:    ArrayOfMaps,
+		keySize:    4,
+		valueSize:  4,
+		maxEntries: 1,
+		innerMapFd: innerFd,
+	})
+	if err != nil {
+		return false
+	}
+
+	_ = nested.close()
+	return true
+})
+
+func bpfMapLookupElem(m *bpfFD, key, valueOut syscallPtr) error {
+	fd, err := m.value()
+	if err != nil {
+		return err
+	}
+
+	attr := bpfMapOpAttr{
+		mapFd: fd,
+		key:   key,
+		value: valueOut,
+	}
+	_, err = bpfCall(_MapLookupElem, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
+	return err
+}
+
+func bpfMapUpdateElem(m *bpfFD, key, valueOut syscallPtr, flags uint64) error {
+	fd, err := m.value()
+	if err != nil {
+		return err
+	}
+
+	attr := bpfMapOpAttr{
+		mapFd: fd,
+		key:   key,
+		value: valueOut,
+		flags: flags,
+	}
+	_, err = bpfCall(_MapUpdateElem, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
+	return err
+}
+
+func bpfMapDeleteElem(m *bpfFD, key syscallPtr) error {
+	fd, err := m.value()
+	if err != nil {
+		return err
+	}
+
+	attr := bpfMapOpAttr{
+		mapFd: fd,
+		key:   key,
+	}
+	_, err = bpfCall(_MapDeleteElem, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
+	return err
+}
+
+func bpfMapGetNextKey(m *bpfFD, key, nextKeyOut syscallPtr) error {
+	fd, err := m.value()
+	if err != nil {
+		return err
+	}
+
+	attr := bpfMapOpAttr{
+		mapFd: fd,
+		key:   key,
+		value: nextKeyOut,
+	}
+	_, err = bpfCall(_MapGetNextKey, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
+	return err
+}
+
+const bpfFSType = 0xcafe4a11
+
+func bpfPinObject(fileName string, fd *bpfFD) error {
+	dirName := filepath.Dir(fileName)
+	var statfs unix.Statfs_t
+	if err := unix.Statfs(dirName, &statfs); err != nil {
+		return err
+	}
+	if uint64(statfs.Type) != bpfFSType {
+		return errors.Errorf("%s is not on a bpf filesystem", fileName)
+	}
+
+	value, err := fd.value()
+	if err != nil {
+		return err
+	}
+
+	_, err = bpfCall(_ObjPin, unsafe.Pointer(&bpfPinObjAttr{
+		fileName: newPtr(unsafe.Pointer(&[]byte(fileName)[0])),
+		fd:       value,
+	}), 16)
+	return errors.Wrapf(err, "pin object %s", fileName)
+}
+
+func bpfGetObject(fileName string) (*bpfFD, error) {
+	ptr, err := bpfCall(_ObjGet, unsafe.Pointer(&bpfPinObjAttr{
+		fileName: newPtr(unsafe.Pointer(&[]byte(fileName)[0])),
+	}), 16)
+	if err != nil {
+		return nil, errors.Wrapf(err, "get object %s", fileName)
+	}
+	return newBPFFD(uint32(ptr)), nil
+}
+
+func bpfGetObjectInfoByFD(fd *bpfFD, info unsafe.Pointer, size uintptr) error {
+	value, err := fd.value()
+	if err != nil {
+		return err
+	}
+
+	// available from 4.13
+	attr := bpfObjGetInfoByFDAttr{
+		fd:      value,
+		infoLen: uint32(size),
+		info:    newPtr(info),
+	}
+	_, err = bpfCall(_ObjGetInfoByFD, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
+	return errors.Wrapf(err, "fd %d", value)
+}
+
+func bpfGetProgInfoByFD(fd *bpfFD) (*bpfProgInfo, error) {
+	var info bpfProgInfo
+	err := bpfGetObjectInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info))
+	return &info, errors.Wrap(err, "can't get program info")
+}
+
+func bpfGetMapInfoByFD(fd *bpfFD) (*bpfMapInfo, error) {
+	var info bpfMapInfo
+	err := bpfGetObjectInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info))
+	return &info, errors.Wrap(err, "can't get map info")
+}
+
+var haveObjName = internal.FeatureTest("object names", "4.15", func() bool {
+	name, err := newBPFObjName("feature_test")
+	if err != nil {
+		// This really is a fatal error, but it should be caught
+		// by the unit tests not working.
+		return false
+	}
+
+	attr := bpfMapCreateAttr{
+		mapType:    Array,
+		keySize:    4,
+		valueSize:  4,
+		maxEntries: 1,
+		mapName:    name,
+	}
+
+	fd, err := bpfMapCreate(&attr)
+	if err != nil {
+		return false
+	}
+
+	_ = fd.close()
+	return true
+})
+
+func bpfGetMapFDByID(id uint32) (*bpfFD, error) {
+	// available from 4.13
+	attr := bpfGetFDByIDAttr{
+		id: id,
+	}
+	ptr, err := bpfCall(_MapGetFDByID, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
+	if err != nil {
+		return nil, errors.Wrapf(err, "can't get fd for map id %d", id)
+	}
+	return newBPFFD(uint32(ptr)), nil
+}
+
+func bpfGetProgramFDByID(id uint32) (*bpfFD, error) {
+	// available from 4.13
+	attr := bpfGetFDByIDAttr{
+		id: id,
+	}
+	ptr, err := bpfCall(_ProgGetFDByID, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
+	if err != nil {
+		return nil, errors.Wrapf(err, "can't get fd for program id %d", id)
+	}
+	return newBPFFD(uint32(ptr)), nil
+}
+
+func bpfCall(cmd int, attr unsafe.Pointer, size uintptr) (uintptr, error) {
+	r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size)
+	runtime.KeepAlive(attr)
+
+	var err error
+	if errNo != 0 {
+		err = errNo
+	}
+
+	return r1, err
+}
+
+func convertCString(in []byte) string {
+	inLen := bytes.IndexByte(in, 0)
+	if inLen == -1 {
+		return ""
+	}
+	return string(in[:inLen])
+}

+ 189 - 0
vendor/github.com/cilium/ebpf/types.go

@@ -0,0 +1,189 @@
+package ebpf
+
+//go:generate stringer -output types_string.go -type=MapType,ProgramType
+
+// MapType indicates the type map structure
+// that will be initialized in the kernel.
+type MapType uint32
+
+// All the various map types that can be created
+const (
+	UnspecifiedMap MapType = iota
+	// Hash is a hash map
+	Hash
+	// Array is an array map
+	Array
+	// ProgramArray - A program array map is a special kind of array map whose map
+	// values contain only file descriptors referring to other eBPF
+	// programs.  Thus, both the key_size and value_size must be
+	// exactly four bytes.  This map is used in conjunction with the
+	// TailCall helper.
+	ProgramArray
+	// PerfEventArray - A perf event array is used in conjunction with PerfEventRead
+	// and PerfEventOutput calls, to read the raw bpf_perf_data from the registers.
+	PerfEventArray
+	// PerCPUHash - This data structure is useful for people who have high performance
+	// network needs and can reconcile adds at the end of some cycle, so that
+	// hashes can be lock free without the use of XAdd, which can be costly.
+	PerCPUHash
+	// PerCPUArray - This data structure is useful for people who have high performance
+	// network needs and can reconcile adds at the end of some cycle, so that
+	// hashes can be lock free without the use of XAdd, which can be costly.
+	// Each CPU gets a copy of this hash, the contents of all of which can be reconciled
+	// later.
+	PerCPUArray
+	// StackTrace - This holds whole user and kernel stack traces, it can be retrieved with
+	// GetStackID
+	StackTrace
+	// CGroupArray - This is a very niche structure used to help SKBInCGroup determine
+	// if an skb is from a socket belonging to a specific cgroup
+	CGroupArray
+	// LRUHash - This allows you to create a small hash structure that will purge the
+	// least recently used items rather than thow an error when you run out of memory
+	LRUHash
+	// LRUCPUHash - This is NOT like PerCPUHash, this structure is shared among the CPUs,
+	// it has more to do with including the CPU id with the LRU calculation so that if a
+	// particular CPU is using a value over-and-over again, then it will be saved, but if
+	// a value is being retrieved a lot but sparsely across CPUs it is not as important, basically
+	// giving weight to CPU locality over overall usage.
+	LRUCPUHash
+	// LPMTrie - This is an implementation of Longest-Prefix-Match Trie structure. It is useful,
+	// for storing things like IP addresses which can be bit masked allowing for keys of differing
+	// values to refer to the same reference based on their masks. See wikipedia for more details.
+	LPMTrie
+	// ArrayOfMaps - Each item in the array is another map. The inner map mustn't be a map of maps
+	// itself.
+	ArrayOfMaps
+	// HashOfMaps - Each item in the hash map is another map. The inner map mustn't be a map of maps
+	// itself.
+	HashOfMaps
+)
+
+// hasPerCPUValue returns true if the Map stores a value per CPU.
+func (mt MapType) hasPerCPUValue() bool {
+	if mt == PerCPUHash || mt == PerCPUArray {
+		return true
+	}
+	return false
+}
+
+const (
+	_MapCreate = iota
+	_MapLookupElem
+	_MapUpdateElem
+	_MapDeleteElem
+	_MapGetNextKey
+	_ProgLoad
+	_ObjPin
+	_ObjGet
+	_ProgAttach
+	_ProgDetach
+	_ProgTestRun
+	_ProgGetNextID
+	_MapGetNextID
+	_ProgGetFDByID
+	_MapGetFDByID
+	_ObjGetInfoByFD
+)
+
+const (
+	_Any = iota
+	_NoExist
+	_Exist
+)
+
+// ProgramType of the eBPF program
+type ProgramType uint32
+
+// eBPF program types
+const (
+	// Unrecognized program type
+	UnspecifiedProgram ProgramType = iota
+	// SocketFilter socket or seccomp filter
+	SocketFilter
+	// Kprobe program
+	Kprobe
+	// SchedCLS traffic control shaper
+	SchedCLS
+	// SchedACT routing control shaper
+	SchedACT
+	// TracePoint program
+	TracePoint
+	// XDP program
+	XDP
+	// PerfEvent program
+	PerfEvent
+	// CGroupSKB program
+	CGroupSKB
+	// CGroupSock program
+	CGroupSock
+	// LWTIn program
+	LWTIn
+	// LWTOut program
+	LWTOut
+	// LWTXmit program
+	LWTXmit
+	// SockOps program
+	SockOps
+	// SkSKB program
+	SkSKB
+	// CGroupDevice program
+	CGroupDevice
+	// SkMsg program
+	SkMsg
+	// RawTracepoint program
+	RawTracepoint
+	// CGroupSockAddr program
+	CGroupSockAddr
+	// LWTSeg6Local program
+	LWTSeg6Local
+	// LircMode2 program
+	LircMode2
+	// SkReuseport program
+	SkReuseport
+	// FlowDissector program
+	FlowDissector
+	// CGroupSysctl program
+	CGroupSysctl
+	// RawTracepointWritable program
+	RawTracepointWritable
+	// CGroupSockopt program
+	CGroupSockopt
+)
+
+// AttachType of the eBPF program, needed to differentiate allowed context accesses in
+// some newer program types like CGroupSockAddr. Should be set to AttachNone if not required.
+// Will cause invalid argument (EINVAL) at program load time if set incorrectly.
+type AttachType uint32
+
+// AttachNone is an alias for AttachCGroupInetIngress for readability reasons
+const AttachNone AttachType = 0
+
+const (
+	AttachCGroupInetIngress AttachType = iota
+	AttachCGroupInetEgress
+	AttachCGroupInetSockCreate
+	AttachCGroupSockOps
+	AttachSkSKBStreamParser
+	AttachSkSKBStreamVerdict
+	AttachCGroupDevice
+	AttachSkMsgVerdict
+	AttachCGroupInet4Bind
+	AttachCGroupInet6Bind
+	AttachCGroupInet4Connect
+	AttachCGroupInet6Connect
+	AttachCGroupInet4PostBind
+	AttachCGroupInet6PostBind
+	AttachCGroupUDP4Sendmsg
+	AttachCGroupUDP6Sendmsg
+	AttachLircMode2
+	AttachFlowDissector
+	AttachCGroupSysctl
+	AttachCGroupUDP4Recvmsg
+	AttachCGroupUDP6Recvmsg
+	AttachCGroupGetsockopt
+	AttachCGroupSetsockopt
+)
+
+// AttachFlags of the eBPF program used in BPF_PROG_ATTACH command
+type AttachFlags uint32

+ 78 - 0
vendor/github.com/cilium/ebpf/types_string.go

@@ -0,0 +1,78 @@
+// Code generated by "stringer -output types_string.go -type=MapType,ProgramType"; DO NOT EDIT.
+
+package ebpf
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[UnspecifiedMap-0]
+	_ = x[Hash-1]
+	_ = x[Array-2]
+	_ = x[ProgramArray-3]
+	_ = x[PerfEventArray-4]
+	_ = x[PerCPUHash-5]
+	_ = x[PerCPUArray-6]
+	_ = x[StackTrace-7]
+	_ = x[CGroupArray-8]
+	_ = x[LRUHash-9]
+	_ = x[LRUCPUHash-10]
+	_ = x[LPMTrie-11]
+	_ = x[ArrayOfMaps-12]
+	_ = x[HashOfMaps-13]
+}
+
+const _MapType_name = "UnspecifiedMapHashArrayProgramArrayPerfEventArrayPerCPUHashPerCPUArrayStackTraceCGroupArrayLRUHashLRUCPUHashLPMTrieArrayOfMapsHashOfMaps"
+
+var _MapType_index = [...]uint8{0, 14, 18, 23, 35, 49, 59, 70, 80, 91, 98, 108, 115, 126, 136}
+
+func (i MapType) String() string {
+	if i >= MapType(len(_MapType_index)-1) {
+		return "MapType(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _MapType_name[_MapType_index[i]:_MapType_index[i+1]]
+}
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[UnspecifiedProgram-0]
+	_ = x[SocketFilter-1]
+	_ = x[Kprobe-2]
+	_ = x[SchedCLS-3]
+	_ = x[SchedACT-4]
+	_ = x[TracePoint-5]
+	_ = x[XDP-6]
+	_ = x[PerfEvent-7]
+	_ = x[CGroupSKB-8]
+	_ = x[CGroupSock-9]
+	_ = x[LWTIn-10]
+	_ = x[LWTOut-11]
+	_ = x[LWTXmit-12]
+	_ = x[SockOps-13]
+	_ = x[SkSKB-14]
+	_ = x[CGroupDevice-15]
+	_ = x[SkMsg-16]
+	_ = x[RawTracepoint-17]
+	_ = x[CGroupSockAddr-18]
+	_ = x[LWTSeg6Local-19]
+	_ = x[LircMode2-20]
+	_ = x[SkReuseport-21]
+	_ = x[FlowDissector-22]
+	_ = x[CGroupSysctl-23]
+	_ = x[RawTracepointWritable-24]
+	_ = x[CGroupSockopt-25]
+}
+
+const _ProgramType_name = "UnspecifiedProgramSocketFilterKprobeSchedCLSSchedACTTracePointXDPPerfEventCGroupSKBCGroupSockLWTInLWTOutLWTXmitSockOpsSkSKBCGroupDeviceSkMsgRawTracepointCGroupSockAddrLWTSeg6LocalLircMode2SkReuseportFlowDissectorCGroupSysctlRawTracepointWritableCGroupSockopt"
+
+var _ProgramType_index = [...]uint16{0, 18, 30, 36, 44, 52, 62, 65, 74, 83, 93, 98, 104, 111, 118, 123, 135, 140, 153, 167, 179, 188, 199, 212, 224, 245, 258}
+
+func (i ProgramType) String() string {
+	if i >= ProgramType(len(_ProgramType_index)-1) {
+		return "ProgramType(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _ProgramType_name[_ProgramType_index[i]:_ProgramType_index[i+1]]
+}

+ 83 - 0
vendor/github.com/containerd/cgroups/v2/cpu.go

@@ -0,0 +1,83 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import (
+	"math"
+	"strconv"
+	"strings"
+)
+
+type CPUMax string
+
+func NewCPUMax(quota *int64, period *uint64) CPUMax {
+	max := "max"
+	if quota != nil {
+		max = strconv.FormatInt(*quota, 10)
+	}
+	return CPUMax(strings.Join([]string{max, strconv.FormatUint(*period, 10)}, " "))
+}
+
+type CPU struct {
+	Weight *uint64
+	Max    CPUMax
+	Cpus   string
+	Mems   string
+}
+
+func (c CPUMax) extractQuotaAndPeriod() (int64, uint64) {
+	var (
+		quota  int64
+		period uint64
+	)
+	values := strings.Split(string(c), " ")
+	if values[0] == "max" {
+		quota = math.MaxInt64
+	} else {
+		quota, _ = strconv.ParseInt(values[0], 10, 64)
+	}
+	period, _ = strconv.ParseUint(values[1], 10, 64)
+	return quota, period
+}
+
+func (r *CPU) Values() (o []Value) {
+	if r.Weight != nil {
+		o = append(o, Value{
+			filename: "cpu.weight",
+			value:    *r.Weight,
+		})
+	}
+	if r.Max != "" {
+		o = append(o, Value{
+			filename: "cpu.max",
+			value:    r.Max,
+		})
+	}
+	if r.Cpus != "" {
+		o = append(o, Value{
+			filename: "cpuset.cpus",
+			value:    r.Cpus,
+		})
+	}
+	if r.Mems != "" {
+		o = append(o, Value{
+			filename: "cpuset.mems",
+			value:    r.Mems,
+		})
+	}
+	return o
+}

+ 199 - 0
vendor/github.com/containerd/cgroups/v2/devicefilter.go

@@ -0,0 +1,199 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+// Devicefilter containes eBPF device filter program
+//
+// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c
+//
+// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano)
+// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397
+//
+// This particular Go implementation based on runc version
+// https://github.com/opencontainers/runc/blob/master/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
+package v2
+
+import (
+	"fmt"
+	"math"
+
+	"github.com/cilium/ebpf/asm"
+	"github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/pkg/errors"
+	"golang.org/x/sys/unix"
+)
+
+const (
+	// license string format is same as kernel MODULE_LICENSE macro
+	license = "Apache"
+)
+
+// DeviceFilter returns eBPF device filter program and its license string
+func DeviceFilter(devices []specs.LinuxDeviceCgroup) (asm.Instructions, string, error) {
+	p := &program{}
+	p.init()
+	for i := len(devices) - 1; i >= 0; i-- {
+		if err := p.appendDevice(devices[i]); err != nil {
+			return nil, "", err
+		}
+	}
+	insts, err := p.finalize()
+	return insts, license, err
+}
+
+type program struct {
+	insts       asm.Instructions
+	hasWildCard bool
+	blockID     int
+}
+
+func (p *program) init() {
+	// struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423
+	/*
+		u32 access_type
+		u32 major
+		u32 minor
+	*/
+	// R2 <- type (lower 16 bit of u32 access_type at R1[0])
+	p.insts = append(p.insts,
+		asm.LoadMem(asm.R2, asm.R1, 0, asm.Half))
+
+	// R3 <- access (upper 16 bit of u32 access_type at R1[0])
+	p.insts = append(p.insts,
+		asm.LoadMem(asm.R3, asm.R1, 0, asm.Word),
+		// RSh: bitwise shift right
+		asm.RSh.Imm32(asm.R3, 16))
+
+	// R4 <- major (u32 major at R1[4])
+	p.insts = append(p.insts,
+		asm.LoadMem(asm.R4, asm.R1, 4, asm.Word))
+
+	// R5 <- minor (u32 minor at R1[8])
+	p.insts = append(p.insts,
+		asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
+}
+
+// appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element.
+func (p *program) appendDevice(dev specs.LinuxDeviceCgroup) error {
+	if p.blockID < 0 {
+		return errors.New("the program is finalized")
+	}
+	if p.hasWildCard {
+		// All entries after wildcard entry are ignored
+		return nil
+	}
+
+	bpfType := int32(-1)
+	hasType := true
+	switch dev.Type {
+	case string('c'):
+		bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
+	case string('b'):
+		bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
+	case string('a'):
+		hasType = false
+	default:
+		// if not specified in OCI json, typ is set to DeviceTypeAll
+		return errors.Errorf("invalid DeviceType %q", dev.Type)
+	}
+	if *dev.Major > math.MaxUint32 {
+		return errors.Errorf("invalid major %d", *dev.Major)
+	}
+	if *dev.Minor > math.MaxUint32 {
+		return errors.Errorf("invalid minor %d", *dev.Major)
+	}
+	hasMajor := *dev.Major >= 0 // if not specified in OCI json, major is set to -1
+	hasMinor := *dev.Minor >= 0
+	bpfAccess := int32(0)
+	for _, r := range dev.Access {
+		switch r {
+		case 'r':
+			bpfAccess |= unix.BPF_DEVCG_ACC_READ
+		case 'w':
+			bpfAccess |= unix.BPF_DEVCG_ACC_WRITE
+		case 'm':
+			bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD
+		default:
+			return errors.Errorf("unknown device access %v", r)
+		}
+	}
+	// If the access is rwm, skip the check.
+	hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD)
+
+	blockSym := fmt.Sprintf("block-%d", p.blockID)
+	nextBlockSym := fmt.Sprintf("block-%d", p.blockID+1)
+	prevBlockLastIdx := len(p.insts) - 1
+	if hasType {
+		p.insts = append(p.insts,
+			// if (R2 != bpfType) goto next
+			asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
+		)
+	}
+	if hasAccess {
+		p.insts = append(p.insts,
+			// if (R3 & bpfAccess == 0 /* use R1 as a temp var */) goto next
+			asm.Mov.Reg32(asm.R1, asm.R3),
+			asm.And.Imm32(asm.R1, bpfAccess),
+			asm.JEq.Imm(asm.R1, 0, nextBlockSym),
+		)
+	}
+	if hasMajor {
+		p.insts = append(p.insts,
+			// if (R4 != major) goto next
+			asm.JNE.Imm(asm.R4, int32(*dev.Major), nextBlockSym),
+		)
+	}
+	if hasMinor {
+		p.insts = append(p.insts,
+			// if (R5 != minor) goto next
+			asm.JNE.Imm(asm.R5, int32(*dev.Minor), nextBlockSym),
+		)
+	}
+	if !hasType && !hasAccess && !hasMajor && !hasMinor {
+		p.hasWildCard = true
+	}
+	p.insts = append(p.insts, acceptBlock(dev.Allow)...)
+	// set blockSym to the first instruction we added in this iteration
+	p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym)
+	p.blockID++
+	return nil
+}
+
+func (p *program) finalize() (asm.Instructions, error) {
+	if p.hasWildCard {
+		// acceptBlock with asm.Return() is already inserted
+		return p.insts, nil
+	}
+	blockSym := fmt.Sprintf("block-%d", p.blockID)
+	p.insts = append(p.insts,
+		// R0 <- 0
+		asm.Mov.Imm32(asm.R0, 0).Sym(blockSym),
+		asm.Return(),
+	)
+	p.blockID = -1
+	return p.insts, nil
+}
+
+func acceptBlock(accept bool) asm.Instructions {
+	v := int32(0)
+	if accept {
+		v = 1
+	}
+	return []asm.Instruction{
+		// R0 <- v
+		asm.Mov.Imm32(asm.R0, v),
+		asm.Return(),
+	}
+}

+ 83 - 0
vendor/github.com/containerd/cgroups/v2/ebpf.go

@@ -0,0 +1,83 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import (
+	"github.com/cilium/ebpf"
+	"github.com/cilium/ebpf/asm"
+	"github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/pkg/errors"
+	"golang.org/x/sys/unix"
+)
+
+// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
+//
+// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
+//
+// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
+func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD int) (func() error, error) {
+	nilCloser := func() error {
+		return nil
+	}
+	spec := &ebpf.ProgramSpec{
+		Type:         ebpf.CGroupDevice,
+		Instructions: insts,
+		License:      license,
+	}
+	prog, err := ebpf.NewProgram(spec)
+	if err != nil {
+		return nilCloser, err
+	}
+	if err := prog.Attach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
+		return nilCloser, errors.Wrap(err, "failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
+	}
+	closer := func() error {
+		if err := prog.Detach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
+			return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
+		}
+		return nil
+	}
+	return closer, nil
+}
+
+func isRWM(cgroupPermissions string) bool {
+	r := false
+	w := false
+	m := false
+	for _, rn := range cgroupPermissions {
+		switch rn {
+		case 'r':
+			r = true
+		case 'w':
+			w = true
+		case 'm':
+			m = true
+		}
+	}
+	return r && w && m
+}
+
+// the logic is from runc
+// https://github.com/opencontainers/runc/blob/master/libcontainer/cgroups/fs/devices_v2.go#L44
+func canSkipEBPFError(devices []specs.LinuxDeviceCgroup) bool {
+	for _, dev := range devices {
+		if dev.Allow || !isRWM(dev.Access) {
+			return false
+		}
+	}
+	return true
+}

+ 50 - 0
vendor/github.com/containerd/cgroups/v2/errors.go

@@ -0,0 +1,50 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import (
+	"errors"
+	"os"
+)
+
+var (
+	ErrInvalidPid               = errors.New("cgroups: pid must be greater than 0")
+	ErrMountPointNotExist       = errors.New("cgroups: cgroup mountpoint does not exist")
+	ErrInvalidFormat            = errors.New("cgroups: parsing file with invalid format failed")
+	ErrFreezerNotSupported      = errors.New("cgroups: freezer cgroup (v2) not supported on this system")
+	ErrMemoryNotSupported       = errors.New("cgroups: memory cgroup (v2) not supported on this system")
+	ErrPidsNotSupported         = errors.New("cgroups: pids cgroup (v2) not supported on this system")
+	ErrCPUNotSupported          = errors.New("cgroups: cpu cgroup (v2) not supported on this system")
+	ErrCgroupDeleted            = errors.New("cgroups: cgroup deleted")
+	ErrNoCgroupMountDestination = errors.New("cgroups: cannot find cgroup mount destination")
+	ErrInvalidGroupPath         = errors.New("cgroups: invalid group path")
+)
+
+// ErrorHandler is a function that handles and acts on errors
+type ErrorHandler func(err error) error
+
+// IgnoreNotExist ignores any errors that are for not existing files
+func IgnoreNotExist(err error) error {
+	if os.IsNotExist(err) {
+		return nil
+	}
+	return err
+}
+
+func errPassthrough(err error) error {
+	return err
+}

+ 37 - 0
vendor/github.com/containerd/cgroups/v2/hugetlb.go

@@ -0,0 +1,37 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import "strings"
+
+type HugeTlb []HugeTlbEntry
+
+type HugeTlbEntry struct {
+	HugePageSize string
+	Limit        uint64
+}
+
+func (r *HugeTlb) Values() (o []Value) {
+	for _, e := range *r {
+		o = append(o, Value{
+			filename: strings.Join([]string{"hugetlb", e.HugePageSize, "max"}, "."),
+			value:    e.Limit,
+		})
+	}
+
+	return o
+}

+ 64 - 0
vendor/github.com/containerd/cgroups/v2/io.go

@@ -0,0 +1,64 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import "fmt"
+
+type IOType string
+
+const (
+	ReadBPS   IOType = "rbps"
+	WriteBPS  IOType = "wbps"
+	ReadIOPS  IOType = "riops"
+	WriteIOPS IOType = "wiops"
+)
+
+type BFQ struct {
+	Weight uint16
+}
+
+type Entry struct {
+	Type  IOType
+	Major int64
+	Minor int64
+	Rate  uint64
+}
+
+func (e Entry) String() string {
+	return fmt.Sprintf("%d:%d %s=%d", e.Major, e.Minor, e.Type, e.Rate)
+}
+
+type IO struct {
+	BFQ BFQ
+	Max []Entry
+}
+
+func (i *IO) Values() (o []Value) {
+	if i.BFQ.Weight != 0 {
+		o = append(o, Value{
+			filename: "io.bfq.weight",
+			value:    i.BFQ.Weight,
+		})
+	}
+	for _, e := range i.Max {
+		o = append(o, Value{
+			filename: "io.max",
+			value:    e.String(),
+		})
+	}
+	return o
+}

+ 739 - 0
vendor/github.com/containerd/cgroups/v2/manager.go

@@ -0,0 +1,739 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import (
+	"bufio"
+	"fmt"
+	"io/ioutil"
+	"math"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+
+	"golang.org/x/sys/unix"
+
+	"github.com/containerd/cgroups/v2/stats"
+	"github.com/godbus/dbus/v5"
+	"github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
+
+	systemdDbus "github.com/coreos/go-systemd/v22/dbus"
+)
+
+const (
+	subtreeControl     = "cgroup.subtree_control"
+	controllersFile    = "cgroup.controllers"
+	defaultCgroup2Path = "/sys/fs/cgroup"
+	defaultSlice       = "system.slice"
+)
+
+var (
+	canDelegate bool
+	once        sync.Once
+)
+
+type cgValuer interface {
+	Values() []Value
+}
+
+type Event struct {
+	Low     uint64
+	High    uint64
+	Max     uint64
+	OOM     uint64
+	OOMKill uint64
+}
+
+// Resources for a cgroups v2 unified hierarchy
+type Resources struct {
+	CPU     *CPU
+	Memory  *Memory
+	Pids    *Pids
+	IO      *IO
+	RDMA    *RDMA
+	HugeTlb *HugeTlb
+	// When len(Devices) is zero, devices are not controlled
+	Devices []specs.LinuxDeviceCgroup
+}
+
+// Values returns the raw filenames and values that
+// can be written to the unified hierarchy
+func (r *Resources) Values() (o []Value) {
+	if r.CPU != nil {
+		o = append(o, r.CPU.Values()...)
+	}
+	if r.Memory != nil {
+		o = append(o, r.Memory.Values()...)
+	}
+	if r.Pids != nil {
+		o = append(o, r.Pids.Values()...)
+	}
+	if r.IO != nil {
+		o = append(o, r.IO.Values()...)
+	}
+	if r.RDMA != nil {
+		o = append(o, r.RDMA.Values()...)
+	}
+	if r.HugeTlb != nil {
+		o = append(o, r.HugeTlb.Values()...)
+	}
+	return o
+}
+
+// EnabledControllers returns the list of all not nil resource controllers
+func (r *Resources) EnabledControllers() (c []string) {
+	if r.CPU != nil {
+		c = append(c, "cpu")
+		c = append(c, "cpuset")
+	}
+	if r.Memory != nil {
+		c = append(c, "memory")
+	}
+	if r.Pids != nil {
+		c = append(c, "pids")
+	}
+	if r.IO != nil {
+		c = append(c, "io")
+	}
+	if r.RDMA != nil {
+		c = append(c, "rdma")
+	}
+	if r.HugeTlb != nil {
+		c = append(c, "hugetlb")
+	}
+	return
+}
+
+// Value of a cgroup setting
+type Value struct {
+	filename string
+	value    interface{}
+}
+
+// write the value to the full, absolute path, of a unified hierarchy
+func (c *Value) write(path string, perm os.FileMode) error {
+	var data []byte
+	switch t := c.value.(type) {
+	case uint64:
+		data = []byte(strconv.FormatUint(t, 10))
+	case uint16:
+		data = []byte(strconv.FormatUint(uint64(t), 10))
+	case int64:
+		data = []byte(strconv.FormatInt(t, 10))
+	case []byte:
+		data = t
+	case string:
+		data = []byte(t)
+	case CPUMax:
+		data = []byte(t)
+	default:
+		return ErrInvalidFormat
+	}
+	return ioutil.WriteFile(
+		filepath.Join(path, c.filename),
+		data,
+		perm,
+	)
+}
+
+func writeValues(path string, values []Value) error {
+	for _, o := range values {
+		if err := o.write(path, defaultFilePerm); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func NewManager(mountpoint string, group string, resources *Resources) (*Manager, error) {
+	if err := VerifyGroupPath(group); err != nil {
+		return nil, err
+	}
+	path := filepath.Join(mountpoint, group)
+	if err := os.MkdirAll(path, defaultDirPerm); err != nil {
+		return nil, err
+	}
+	m := Manager{
+		unifiedMountpoint: mountpoint,
+		path:              path,
+	}
+	if err := m.ToggleControllers(resources.EnabledControllers(), Enable); err != nil {
+		// clean up cgroup dir on failure
+		os.Remove(path)
+		return nil, err
+	}
+	if err := setResources(path, resources); err != nil {
+		os.Remove(path)
+		return nil, err
+	}
+	return &m, nil
+}
+
+func LoadManager(mountpoint string, group string) (*Manager, error) {
+	if err := VerifyGroupPath(group); err != nil {
+		return nil, err
+	}
+	path := filepath.Join(mountpoint, group)
+	return &Manager{
+		unifiedMountpoint: mountpoint,
+		path:              path,
+	}, nil
+}
+
+type Manager struct {
+	unifiedMountpoint string
+	path              string
+}
+
+func setResources(path string, resources *Resources) error {
+	if resources != nil {
+		if err := writeValues(path, resources.Values()); err != nil {
+			return err
+		}
+		if err := setDevices(path, resources.Devices); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (c *Manager) RootControllers() ([]string, error) {
+	b, err := ioutil.ReadFile(filepath.Join(c.unifiedMountpoint, controllersFile))
+	if err != nil {
+		return nil, err
+	}
+	return strings.Fields(string(b)), nil
+}
+
+func (c *Manager) Controllers() ([]string, error) {
+	b, err := ioutil.ReadFile(filepath.Join(c.path, controllersFile))
+	if err != nil {
+		return nil, err
+	}
+	return strings.Fields(string(b)), nil
+}
+
+type ControllerToggle int
+
+const (
+	Enable ControllerToggle = iota + 1
+	Disable
+)
+
+func toggleFunc(controllers []string, prefix string) []string {
+	out := make([]string, len(controllers))
+	for i, c := range controllers {
+		out[i] = prefix + c
+	}
+	return out
+}
+
+func (c *Manager) ToggleControllers(controllers []string, t ControllerToggle) error {
+	// when c.path is like /foo/bar/baz, the following files need to be written:
+	// * /sys/fs/cgroup/cgroup.subtree_control
+	// * /sys/fs/cgroup/foo/cgroup.subtree_control
+	// * /sys/fs/cgroup/foo/bar/cgroup.subtree_control
+	// Note that /sys/fs/cgroup/foo/bar/baz/cgroup.subtree_control does not need to be written.
+	split := strings.Split(c.path, "/")
+	var lastErr error
+	for i, _ := range split {
+		f := strings.Join(split[:i], "/")
+		if !strings.HasPrefix(f, c.unifiedMountpoint) || f == c.path {
+			continue
+		}
+		filePath := filepath.Join(f, subtreeControl)
+		if err := c.writeSubtreeControl(filePath, controllers, t); err != nil {
+			// When running as rootless, the user may face EPERM on parent groups, but it is neglible when the
+			// controller is already written.
+			// So we only return the last error.
+			lastErr = errors.Wrapf(err, "failed to write subtree controllers %+v to %q", controllers, filePath)
+		}
+	}
+	return lastErr
+}
+
+func (c *Manager) writeSubtreeControl(filePath string, controllers []string, t ControllerToggle) error {
+	f, err := os.OpenFile(filePath, os.O_WRONLY, 0)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	switch t {
+	case Enable:
+		controllers = toggleFunc(controllers, "+")
+	case Disable:
+		controllers = toggleFunc(controllers, "-")
+	}
+	_, err = f.WriteString(strings.Join(controllers, " "))
+	return err
+}
+
+func (c *Manager) NewChild(name string, resources *Resources) (*Manager, error) {
+	if strings.HasPrefix(name, "/") {
+		return nil, errors.New("name must be relative")
+	}
+	path := filepath.Join(c.path, name)
+	if err := os.MkdirAll(path, defaultDirPerm); err != nil {
+		return nil, err
+	}
+	if err := setResources(path, resources); err != nil {
+		// clean up cgroup dir on failure
+		os.Remove(path)
+		return nil, err
+	}
+	return &Manager{
+		unifiedMountpoint: c.unifiedMountpoint,
+		path:              path,
+	}, nil
+}
+
+func (c *Manager) AddProc(pid uint64) error {
+	v := Value{
+		filename: cgroupProcs,
+		value:    pid,
+	}
+	return writeValues(c.path, []Value{v})
+}
+
+func (c *Manager) Delete() error {
+	return remove(c.path)
+}
+
+func (c *Manager) Procs(recursive bool) ([]uint64, error) {
+	var processes []uint64
+	err := filepath.Walk(c.path, func(p string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+		if !recursive && info.IsDir() {
+			if p == c.path {
+				return nil
+			}
+			return filepath.SkipDir
+		}
+		_, name := filepath.Split(p)
+		if name != cgroupProcs {
+			return nil
+		}
+		procs, err := parseCgroupProcsFile(p)
+		if err != nil {
+			return err
+		}
+		processes = append(processes, procs...)
+		return nil
+	})
+	return processes, err
+}
+
+var singleValueFiles = []string{
+	"pids.current",
+	"pids.max",
+}
+
+func (c *Manager) Stat() (*stats.Metrics, error) {
+	controllers, err := c.Controllers()
+	if err != nil {
+		return nil, err
+	}
+	out := make(map[string]interface{})
+	for _, controller := range controllers {
+		switch controller {
+		case "cpu", "memory":
+			filename := fmt.Sprintf("%s.stat", controller)
+			if err := readKVStatsFile(c.path, filename, out); err != nil {
+				if os.IsNotExist(err) {
+					continue
+				}
+				return nil, err
+			}
+		}
+	}
+	for _, name := range singleValueFiles {
+		if err := readSingleFile(c.path, name, out); err != nil {
+			if os.IsNotExist(err) {
+				continue
+			}
+			return nil, err
+		}
+	}
+	var metrics stats.Metrics
+
+	metrics.Pids = &stats.PidsStat{
+		Current: getPidValue("pids.current", out),
+		Limit:   getPidValue("pids.max", out),
+	}
+	metrics.CPU = &stats.CPUStat{
+		UsageUsec:     getUint64Value("usage_usec", out),
+		UserUsec:      getUint64Value("user_usec", out),
+		SystemUsec:    getUint64Value("system_usec", out),
+		NrPeriods:     getUint64Value("nr_periods", out),
+		NrThrottled:   getUint64Value("nr_throttled", out),
+		ThrottledUsec: getUint64Value("throttled_usec", out),
+	}
+	metrics.Memory = &stats.MemoryStat{
+		Anon:                  getUint64Value("anon", out),
+		File:                  getUint64Value("file", out),
+		KernelStack:           getUint64Value("kernel_stack", out),
+		Slab:                  getUint64Value("slab", out),
+		Sock:                  getUint64Value("sock", out),
+		Shmem:                 getUint64Value("shmem", out),
+		FileMapped:            getUint64Value("file_mapped", out),
+		FileDirty:             getUint64Value("file_dirty", out),
+		FileWriteback:         getUint64Value("file_writeback", out),
+		AnonThp:               getUint64Value("anon_thp", out),
+		InactiveAnon:          getUint64Value("inactive_anon", out),
+		ActiveAnon:            getUint64Value("active_anon", out),
+		InactiveFile:          getUint64Value("inactive_file", out),
+		ActiveFile:            getUint64Value("active_file", out),
+		Unevictable:           getUint64Value("unevictable", out),
+		SlabReclaimable:       getUint64Value("slab_reclaimable", out),
+		SlabUnreclaimable:     getUint64Value("slab_unreclaimable", out),
+		Pgfault:               getUint64Value("pgfault", out),
+		Pgmajfault:            getUint64Value("pgmajfault", out),
+		WorkingsetRefault:     getUint64Value("workingset_refault", out),
+		WorkingsetActivate:    getUint64Value("workingset_activate", out),
+		WorkingsetNodereclaim: getUint64Value("workingset_nodereclaim", out),
+		Pgrefill:              getUint64Value("pgrefill", out),
+		Pgscan:                getUint64Value("pgscan", out),
+		Pgsteal:               getUint64Value("pgsteal", out),
+		Pgactivate:            getUint64Value("pgactivate", out),
+		Pgdeactivate:          getUint64Value("pgdeactivate", out),
+		Pglazyfree:            getUint64Value("pglazyfree", out),
+		Pglazyfreed:           getUint64Value("pglazyfreed", out),
+		ThpFaultAlloc:         getUint64Value("thp_fault_alloc", out),
+		ThpCollapseAlloc:      getUint64Value("thp_collapse_alloc", out),
+		Usage:                 getStatFileContentUint64(filepath.Join(c.path, "memory.current")),
+		UsageLimit:            getStatFileContentUint64(filepath.Join(c.path, "memory.max")),
+		SwapUsage:             getStatFileContentUint64(filepath.Join(c.path, "memory.swap.current")),
+		SwapLimit:             getStatFileContentUint64(filepath.Join(c.path, "memory.swap.max")),
+	}
+
+	metrics.Io = &stats.IOStat{Usage: readIoStats(c.path)}
+	metrics.Rdma = &stats.RdmaStat{
+		Current: rdmaStats(filepath.Join(c.path, "rdma.current")),
+		Limit:   rdmaStats(filepath.Join(c.path, "rdma.max")),
+	}
+	metrics.Hugetlb = readHugeTlbStats(c.path)
+
+	return &metrics, nil
+}
+
+func getUint64Value(key string, out map[string]interface{}) uint64 {
+	v, ok := out[key]
+	if !ok {
+		return 0
+	}
+	switch t := v.(type) {
+	case uint64:
+		return t
+	}
+	return 0
+}
+
+func getPidValue(key string, out map[string]interface{}) uint64 {
+	v, ok := out[key]
+	if !ok {
+		return 0
+	}
+	switch t := v.(type) {
+	case uint64:
+		return t
+	case string:
+		if t == "max" {
+			return math.MaxUint64
+		}
+	}
+	return 0
+}
+
+func readSingleFile(path string, file string, out map[string]interface{}) error {
+	f, err := os.Open(filepath.Join(path, file))
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	data, err := ioutil.ReadAll(f)
+	if err != nil {
+		return err
+	}
+	s := strings.TrimSpace(string(data))
+	v, err := parseUint(s, 10, 64)
+	if err != nil {
+		// if we cannot parse as a uint, parse as a string
+		out[file] = s
+		return nil
+	}
+	out[file] = v
+	return nil
+}
+
+func readKVStatsFile(path string, file string, out map[string]interface{}) error {
+	f, err := os.Open(filepath.Join(path, file))
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	s := bufio.NewScanner(f)
+	for s.Scan() {
+		if err := s.Err(); err != nil {
+			return err
+		}
+		name, value, err := parseKV(s.Text())
+		if err != nil {
+			return errors.Wrapf(err, "error while parsing %s (line=%q)", filepath.Join(path, file), s.Text())
+		}
+		out[name] = value
+	}
+	return nil
+}
+
+func (c *Manager) Freeze() error {
+	return c.freeze(c.path, Frozen)
+}
+
+func (c *Manager) Thaw() error {
+	return c.freeze(c.path, Thawed)
+}
+
+func (c *Manager) freeze(path string, state State) error {
+	values := state.Values()
+	for {
+		if err := writeValues(path, values); err != nil {
+			return err
+		}
+		current, err := fetchState(path)
+		if err != nil {
+			return err
+		}
+		if current == state {
+			return nil
+		}
+		time.Sleep(1 * time.Millisecond)
+	}
+}
+
+// MemoryEventFD returns inotify file descriptor and 'memory.events' inotify watch descriptor
+func (c *Manager) MemoryEventFD() (int, uint32, error) {
+	fpath := filepath.Join(c.path, "memory.events")
+	fd, err := syscall.InotifyInit()
+	if err != nil {
+		return 0, 0, errors.Errorf("Failed to create inotify fd")
+	}
+	wd, err := syscall.InotifyAddWatch(fd, fpath, unix.IN_MODIFY)
+	if wd < 0 {
+		syscall.Close(fd)
+		return 0, 0, errors.Errorf("Failed to add inotify watch for %q", fpath)
+	}
+
+	return fd, uint32(wd), nil
+}
+
+func (c *Manager) EventChan() (<-chan Event, <-chan error) {
+	ec := make(chan Event)
+	errCh := make(chan error)
+	go c.waitForEvents(ec, errCh)
+
+	return ec, nil
+}
+
+func (c *Manager) waitForEvents(ec chan<- Event, errCh chan<- error) {
+	fd, wd, err := c.MemoryEventFD()
+
+	defer syscall.InotifyRmWatch(fd, wd)
+	defer syscall.Close(fd)
+
+	if err != nil {
+		errCh <- err
+		return
+	}
+
+	for {
+		buffer := make([]byte, syscall.SizeofInotifyEvent*10)
+		bytesRead, err := syscall.Read(fd, buffer)
+		if err != nil {
+			errCh <- err
+			return
+		}
+		var out map[string]interface{}
+		if bytesRead >= syscall.SizeofInotifyEvent {
+			if err := readKVStatsFile(c.path, "memory.events", out); err != nil {
+				e := Event{
+					High:    out["high"].(uint64),
+					Low:     out["low"].(uint64),
+					Max:     out["max"].(uint64),
+					OOM:     out["oom"].(uint64),
+					OOMKill: out["oom_kill"].(uint64),
+				}
+				ec <- e
+			} else {
+				errCh <- err
+				return
+			}
+		}
+	}
+}
+
+func setDevices(path string, devices []specs.LinuxDeviceCgroup) error {
+	if len(devices) == 0 {
+		return nil
+	}
+	insts, license, err := DeviceFilter(devices)
+	if err != nil {
+		return err
+	}
+	dirFD, err := unix.Open(path, unix.O_DIRECTORY|unix.O_RDONLY, 0600)
+	if err != nil {
+		return errors.Errorf("cannot get dir FD for %s", path)
+	}
+	defer unix.Close(dirFD)
+	if _, err := LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
+		if !canSkipEBPFError(devices) {
+			return err
+		}
+	}
+	return nil
+}
+
+func NewSystemd(slice, group string, pid int, resources *Resources) (*Manager, error) {
+	if slice == "" {
+		slice = defaultSlice
+	}
+	path := filepath.Join(defaultCgroup2Path, slice, group)
+	conn, err := systemdDbus.New()
+	if err != nil {
+		return &Manager{}, err
+	}
+	defer conn.Close()
+
+	properties := []systemdDbus.Property{
+		systemdDbus.PropDescription(fmt.Sprintf("cgroup %s", group)),
+		newSystemdProperty("DefaultDependencies", false),
+		newSystemdProperty("MemoryAccounting", true),
+		newSystemdProperty("CPUAccounting", true),
+		newSystemdProperty("IOAccounting", true),
+	}
+
+	// if we create a slice, the parent is defined via a Wants=
+	if strings.HasSuffix(group, ".slice") {
+		properties = append(properties, systemdDbus.PropWants(defaultSlice))
+	} else {
+		// otherwise, we use Slice=
+		properties = append(properties, systemdDbus.PropSlice(defaultSlice))
+	}
+
+	// only add pid if its valid, -1 is used w/ general slice creation.
+	if pid != -1 {
+		properties = append(properties, newSystemdProperty("PIDs", []uint32{uint32(pid)}))
+	}
+
+	if resources.Memory != nil && *resources.Memory.Max != 0 {
+		properties = append(properties,
+			newSystemdProperty("MemoryMax", uint64(*resources.Memory.Max)))
+	}
+
+	if resources.CPU != nil && *resources.CPU.Weight != 0 {
+		properties = append(properties,
+			newSystemdProperty("CPUWeight", *resources.CPU.Weight))
+	}
+
+	if resources.CPU != nil && resources.CPU.Max != "" {
+		quota, period := resources.CPU.Max.extractQuotaAndPeriod()
+		// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
+		// corresponds to USEC_INFINITY in systemd
+		// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
+		// always setting a property value ensures we can apply a quota and remove it later
+		cpuQuotaPerSecUSec := uint64(math.MaxUint64)
+		if quota > 0 {
+			// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
+			// (integer percentage of CPU) internally.  This means that if a fractional percent of
+			// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
+			// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
+			cpuQuotaPerSecUSec = uint64(quota*1000000) / period
+			if cpuQuotaPerSecUSec%10000 != 0 {
+				cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
+			}
+		}
+		properties = append(properties,
+			newSystemdProperty("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
+	}
+
+	// If we can delegate, we add the property back in
+	if canDelegate {
+		properties = append(properties, newSystemdProperty("Delegate", true))
+	}
+
+	if resources.Pids != nil && resources.Pids.Max > 0 {
+		properties = append(properties,
+			newSystemdProperty("TasksAccounting", true),
+			newSystemdProperty("TasksMax", uint64(resources.Pids.Max)))
+	}
+
+	statusChan := make(chan string, 1)
+	if _, err := conn.StartTransientUnit(group, "replace", properties, statusChan); err == nil {
+		select {
+		case <-statusChan:
+		case <-time.After(time.Second):
+			logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", group)
+		}
+	} else if !isUnitExists(err) {
+		return &Manager{}, err
+	}
+
+	return &Manager{
+		path: path,
+	}, nil
+}
+
+func LoadSystemd(slice, group string) (*Manager, error) {
+	if slice == "" {
+		slice = defaultSlice
+	}
+	group = filepath.Join(defaultCgroup2Path, slice, group)
+	return &Manager{
+		path: group,
+	}, nil
+}
+
+func (c *Manager) DeleteSystemd() error {
+	conn, err := systemdDbus.New()
+	if err != nil {
+		return err
+	}
+	defer conn.Close()
+	group := systemdUnitFromPath(c.path)
+	ch := make(chan string)
+	_, err = conn.StopUnit(group, "replace", ch)
+	if err != nil {
+		return err
+	}
+	<-ch
+	return nil
+}
+
+func newSystemdProperty(name string, units interface{}) systemdDbus.Property {
+	return systemdDbus.Property{
+		Name:  name,
+		Value: dbus.MakeVariant(units),
+	}
+}

+ 52 - 0
vendor/github.com/containerd/cgroups/v2/memory.go

@@ -0,0 +1,52 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+type Memory struct {
+	Swap *int64
+	Max  *int64
+	Low  *int64
+	High *int64
+}
+
+func (r *Memory) Values() (o []Value) {
+	if r.Swap != nil {
+		o = append(o, Value{
+			filename: "memory.swap.max",
+			value:    *r.Swap,
+		})
+	}
+	if r.Max != nil {
+		o = append(o, Value{
+			filename: "memory.max",
+			value:    *r.Max,
+		})
+	}
+	if r.Low != nil {
+		o = append(o, Value{
+			filename: "memory.low",
+			value:    *r.Low,
+		})
+	}
+	if r.High != nil {
+		o = append(o, Value{
+			filename: "memory.high",
+			value:    *r.High,
+		})
+	}
+	return o
+}

+ 60 - 0
vendor/github.com/containerd/cgroups/v2/paths.go

@@ -0,0 +1,60 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import (
+	"fmt"
+	"path/filepath"
+	"strings"
+)
+
+// NestedGroupPath will nest the cgroups based on the calling processes cgroup
+// placing its child processes inside its own path
+func NestedGroupPath(suffix string) (string, error) {
+	path, err := parseCgroupFile("/proc/self/cgroup")
+	if err != nil {
+		return "", err
+	}
+	return filepath.Join(string(path), suffix), nil
+}
+
+// PidGroupPath will return the correct cgroup paths for an existing process running inside a cgroup
+// This is commonly used for the Load function to restore an existing container
+func PidGroupPath(pid int) (string, error) {
+	p := fmt.Sprintf("/proc/%d/cgroup", pid)
+	return parseCgroupFile(p)
+}
+
+// VerifyGroupPath verifies the format of group path string g.
+// The format is same as the third field in /proc/PID/cgroup.
+// e.g. "/user.slice/user-1001.slice/session-1.scope"
+//
+// g must be a "clean" absolute path starts with "/", and must not contain "/sys/fs/cgroup" prefix.
+//
+// VerifyGroupPath doesn't verify whether g actually exists on the system.
+func VerifyGroupPath(g string) error {
+	if !strings.HasPrefix(g, "/") {
+		return ErrInvalidGroupPath
+	}
+	if filepath.Clean(g) != g {
+		return ErrInvalidGroupPath
+	}
+	if strings.HasPrefix(g, "/sys/fs/cgroup") {
+		return ErrInvalidGroupPath
+	}
+	return nil
+}

+ 37 - 0
vendor/github.com/containerd/cgroups/v2/pids.go

@@ -0,0 +1,37 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import "strconv"
+
+type Pids struct {
+	Max int64
+}
+
+func (r *Pids) Values() (o []Value) {
+	if r.Max != 0 {
+		limit := "max"
+		if r.Max > 0 {
+			limit = strconv.FormatInt(r.Max, 10)
+		}
+		o = append(o, Value{
+			filename: "pids.max",
+			value:    limit,
+		})
+	}
+	return o
+}

+ 46 - 0
vendor/github.com/containerd/cgroups/v2/rdma.go

@@ -0,0 +1,46 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import (
+	"fmt"
+)
+
+type RDMA struct {
+	Limit []RDMAEntry
+}
+
+type RDMAEntry struct {
+	Device     string
+	HcaHandles uint32
+	HcaObjects uint32
+}
+
+func (r RDMAEntry) String() string {
+	return fmt.Sprintf("%s hca_handle=%d hca_object=%d", r.Device, r.HcaHandles, r.HcaObjects)
+}
+
+func (r *RDMA) Values() (o []Value) {
+	for _, e := range r.Limit {
+		o = append(o, Value{
+			filename: "rdma.max",
+			value:    e.String(),
+		})
+	}
+
+	return o
+}

+ 65 - 0
vendor/github.com/containerd/cgroups/v2/state.go

@@ -0,0 +1,65 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import (
+	"io/ioutil"
+	"path/filepath"
+	"strings"
+)
+
+// State is a type that represents the state of the current cgroup
+type State string
+
+const (
+	Unknown State = ""
+	Thawed  State = "thawed"
+	Frozen  State = "frozen"
+	Deleted State = "deleted"
+
+	cgroupFreeze = "cgroup.freeze"
+)
+
+func (s State) Values() []Value {
+	v := Value{
+		filename: cgroupFreeze,
+	}
+	switch s {
+	case Frozen:
+		v.value = "1"
+	case Thawed:
+		v.value = "0"
+	}
+	return []Value{
+		v,
+	}
+}
+
+func fetchState(path string) (State, error) {
+	current, err := ioutil.ReadFile(filepath.Join(path, cgroupFreeze))
+	if err != nil {
+		return Unknown, err
+	}
+	switch strings.TrimSpace(string(current)) {
+	case "1":
+		return Frozen, nil
+	case "0":
+		return Thawed, nil
+	default:
+		return Unknown, nil
+	}
+}

+ 442 - 0
vendor/github.com/containerd/cgroups/v2/utils.go

@@ -0,0 +1,442 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package v2
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"math"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/godbus/dbus/v5"
+
+	"github.com/containerd/cgroups/v2/stats"
+	"github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
+)
+
+const (
+	cgroupProcs    = "cgroup.procs"
+	defaultDirPerm = 0755
+)
+
+// defaultFilePerm is a var so that the test framework can change the filemode
+// of all files created when the tests are running.  The difference between the
+// tests and real world use is that files like "cgroup.procs" will exist when writing
+// to a read cgroup filesystem and do not exist prior when running in the tests.
+// this is set to a non 0 value in the test code
+var defaultFilePerm = os.FileMode(0)
+
+// remove will remove a cgroup path handling EAGAIN and EBUSY errors and
+// retrying the remove after a exp timeout
+func remove(path string) error {
+	var err error
+	delay := 10 * time.Millisecond
+	for i := 0; i < 5; i++ {
+		if i != 0 {
+			time.Sleep(delay)
+			delay *= 2
+		}
+		if err = os.RemoveAll(path); err == nil {
+			return nil
+		}
+	}
+	return errors.Wrapf(err, "cgroups: unable to remove path %q", path)
+}
+
+// parseCgroupProcsFile parses /sys/fs/cgroup/$GROUPPATH/cgroup.procs
+func parseCgroupProcsFile(path string) ([]uint64, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+	var (
+		out []uint64
+		s   = bufio.NewScanner(f)
+	)
+	for s.Scan() {
+		if t := s.Text(); t != "" {
+			pid, err := strconv.ParseUint(t, 10, 0)
+			if err != nil {
+				return nil, err
+			}
+			out = append(out, pid)
+		}
+	}
+	return out, nil
+}
+
+func parseKV(raw string) (string, interface{}, error) {
+	parts := strings.Fields(raw)
+	switch len(parts) {
+	case 2:
+		v, err := parseUint(parts[1], 10, 64)
+		if err != nil {
+			// if we cannot parse as a uint, parse as a string
+			return parts[0], parts[1], nil
+		}
+		return parts[0], v, nil
+	default:
+		return "", 0, ErrInvalidFormat
+	}
+}
+
+func readUint(path string) (uint64, error) {
+	v, err := ioutil.ReadFile(path)
+	if err != nil {
+		return 0, err
+	}
+	return parseUint(strings.TrimSpace(string(v)), 10, 64)
+}
+
+func parseUint(s string, base, bitSize int) (uint64, error) {
+	v, err := strconv.ParseUint(s, base, bitSize)
+	if err != nil {
+		intValue, intErr := strconv.ParseInt(s, base, bitSize)
+		// 1. Handle negative values greater than MinInt64 (and)
+		// 2. Handle negative values lesser than MinInt64
+		if intErr == nil && intValue < 0 {
+			return 0, nil
+		} else if intErr != nil &&
+			intErr.(*strconv.NumError).Err == strconv.ErrRange &&
+			intValue < 0 {
+			return 0, nil
+		}
+		return 0, err
+	}
+	return v, nil
+}
+
+// parseCgroupFile parses /proc/PID/cgroup file and return string
+func parseCgroupFile(path string) (string, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return "", err
+	}
+	defer f.Close()
+	return parseCgroupFromReader(f)
+}
+
+func parseCgroupFromReader(r io.Reader) (string, error) {
+	var (
+		s = bufio.NewScanner(r)
+	)
+	for s.Scan() {
+		if err := s.Err(); err != nil {
+			return "", err
+		}
+		var (
+			text  = s.Text()
+			parts = strings.SplitN(text, ":", 3)
+		)
+		if len(parts) < 3 {
+			return "", fmt.Errorf("invalid cgroup entry: %q", text)
+		}
+		// text is like "0::/user.slice/user-1001.slice/session-1.scope"
+		if parts[0] == "0" && parts[1] == "" {
+			return parts[2], nil
+		}
+	}
+	return "", fmt.Errorf("cgroup path not found")
+}
+
+// ToResources converts the oci LinuxResources struct into a
+// v2 Resources type for use with this package.
+//
+// converting cgroups configuration from v1 to v2
+// ref: https://github.com/containers/crun/blob/master/crun.1.md#cgroup-v2
+func ToResources(spec *specs.LinuxResources) *Resources {
+	var resources Resources
+	if cpu := spec.CPU; cpu != nil {
+		resources.CPU = &CPU{
+			Cpus: cpu.Cpus,
+			Mems: cpu.Mems,
+		}
+		if shares := cpu.Shares; shares != nil {
+			convertedWeight := (1 + ((*shares-2)*9999)/262142)
+			resources.CPU.Weight = &convertedWeight
+		}
+		if period := cpu.Period; period != nil {
+			resources.CPU.Max = NewCPUMax(cpu.Quota, period)
+		}
+	}
+	if mem := spec.Memory; mem != nil {
+		resources.Memory = &Memory{}
+		if swap := mem.Swap; swap != nil {
+			resources.Memory.Swap = swap
+		}
+		if l := mem.Limit; l != nil {
+			resources.Memory.Max = l
+		}
+		if l := mem.Reservation; l != nil {
+			resources.Memory.Low = l
+		}
+	}
+	if hugetlbs := spec.HugepageLimits; hugetlbs != nil {
+		hugeTlbUsage := HugeTlb{}
+		for _, hugetlb := range hugetlbs {
+			hugeTlbUsage = append(hugeTlbUsage, HugeTlbEntry{
+				HugePageSize: hugetlb.Pagesize,
+				Limit:        hugetlb.Limit,
+			})
+		}
+		resources.HugeTlb = &hugeTlbUsage
+	}
+	if pids := spec.Pids; pids != nil {
+		resources.Pids = &Pids{
+			Max: pids.Limit,
+		}
+	}
+	if i := spec.BlockIO; i != nil {
+		resources.IO = &IO{}
+		if i.Weight != nil {
+			resources.IO.BFQ.Weight = 1 + (*i.Weight-10)*9999/990
+		}
+		for t, devices := range map[IOType][]specs.LinuxThrottleDevice{
+			ReadBPS:   i.ThrottleReadBpsDevice,
+			WriteBPS:  i.ThrottleWriteBpsDevice,
+			ReadIOPS:  i.ThrottleReadIOPSDevice,
+			WriteIOPS: i.ThrottleWriteIOPSDevice,
+		} {
+			for _, d := range devices {
+				resources.IO.Max = append(resources.IO.Max, Entry{
+					Type:  t,
+					Major: d.Major,
+					Minor: d.Minor,
+					Rate:  d.Rate,
+				})
+			}
+		}
+	}
+	if i := spec.Rdma; i != nil {
+		resources.RDMA = &RDMA{}
+		for device, value := range spec.Rdma {
+			if device != "" && (value.HcaHandles != nil || value.HcaObjects != nil) {
+				resources.RDMA.Limit = append(resources.RDMA.Limit, RDMAEntry{
+					Device:     device,
+					HcaHandles: *value.HcaHandles,
+					HcaObjects: *value.HcaObjects,
+				})
+			}
+		}
+	}
+
+	return &resources
+}
+
+// Gets uint64 parsed content of single value cgroup stat file
+func getStatFileContentUint64(filePath string) uint64 {
+	contents, err := ioutil.ReadFile(filePath)
+	if err != nil {
+		return 0
+	}
+	trimmed := strings.TrimSpace(string(contents))
+	if trimmed == "max" {
+		return math.MaxUint64
+	}
+
+	res, err := parseUint(trimmed, 10, 64)
+	if err != nil {
+		logrus.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), filePath)
+		return res
+	}
+
+	return res
+}
+
+func readIoStats(path string) []*stats.IOEntry {
+	// more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
+	var usage []*stats.IOEntry
+	fpath := filepath.Join(path, "io.stat")
+	currentData, err := ioutil.ReadFile(fpath)
+	if err != nil {
+		return usage
+	}
+	entries := strings.Split(string(currentData), "\n")
+
+	for _, entry := range entries {
+		parts := strings.Split(entry, " ")
+		if len(parts) < 2 {
+			continue
+		}
+		majmin := strings.Split(parts[0], ":")
+		if len(majmin) != 2 {
+			continue
+		}
+		major, err := strconv.ParseUint(majmin[0], 10, 0)
+		if err != nil {
+			return usage
+		}
+		minor, err := strconv.ParseUint(majmin[1], 10, 0)
+		if err != nil {
+			return usage
+		}
+		parts = parts[1:]
+		ioEntry := stats.IOEntry{
+			Major: major,
+			Minor: minor,
+		}
+		for _, stats := range parts {
+			keyPairValue := strings.Split(stats, "=")
+			if len(keyPairValue) != 2 {
+				continue
+			}
+			v, err := strconv.ParseUint(keyPairValue[1], 10, 0)
+			if err != nil {
+				continue
+			}
+			switch keyPairValue[0] {
+			case "rbytes":
+				ioEntry.Rbytes = v
+			case "wbytes":
+				ioEntry.Wbytes = v
+			case "rios":
+				ioEntry.Rios = v
+			case "wios":
+				ioEntry.Wios = v
+			}
+		}
+		usage = append(usage, &ioEntry)
+	}
+	return usage
+}
+
+func rdmaStats(filepath string) []*stats.RdmaEntry {
+	currentData, err := ioutil.ReadFile(filepath)
+	if err != nil {
+		return []*stats.RdmaEntry{}
+	}
+	return toRdmaEntry(strings.Split(string(currentData), "\n"))
+}
+
+func parseRdmaKV(raw string, entry *stats.RdmaEntry) {
+	var value uint64
+	var err error
+
+	parts := strings.Split(raw, "=")
+	switch len(parts) {
+	case 2:
+		if parts[1] == "max" {
+			value = math.MaxUint32
+		} else {
+			value, err = parseUint(parts[1], 10, 32)
+			if err != nil {
+				return
+			}
+		}
+		if parts[0] == "hca_handle" {
+			entry.HcaHandles = uint32(value)
+		} else if parts[0] == "hca_object" {
+			entry.HcaObjects = uint32(value)
+		}
+	}
+}
+
+func toRdmaEntry(strEntries []string) []*stats.RdmaEntry {
+	var rdmaEntries []*stats.RdmaEntry
+	for i := range strEntries {
+		parts := strings.Fields(strEntries[i])
+		switch len(parts) {
+		case 3:
+			entry := new(stats.RdmaEntry)
+			entry.Device = parts[0]
+			parseRdmaKV(parts[1], entry)
+			parseRdmaKV(parts[2], entry)
+
+			rdmaEntries = append(rdmaEntries, entry)
+		default:
+			continue
+		}
+	}
+	return rdmaEntries
+}
+
+// isUnitExists returns true if the error is that a systemd unit already exists.
+func isUnitExists(err error) bool {
+	if err != nil {
+		if dbusError, ok := err.(dbus.Error); ok {
+			return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists")
+		}
+	}
+	return false
+}
+
+func systemdUnitFromPath(path string) string {
+	_, unit := filepath.Split(path)
+	return unit
+}
+
+func readHugeTlbStats(path string) []*stats.HugeTlbStat {
+	var usage = []*stats.HugeTlbStat{}
+	var keyUsage = make(map[string]*stats.HugeTlbStat)
+	f, err := os.Open(path)
+	if err != nil {
+		return usage
+	}
+	files, err := f.Readdir(-1)
+	f.Close()
+	if err != nil {
+		return usage
+	}
+
+	for _, file := range files {
+		if strings.Contains(file.Name(), "hugetlb") &&
+			(strings.HasSuffix(file.Name(), "max") || strings.HasSuffix(file.Name(), "current")) {
+			var hugeTlb *stats.HugeTlbStat
+			var ok bool
+			fileName := strings.Split(file.Name(), ".")
+			pageSize := fileName[1]
+			if hugeTlb, ok = keyUsage[pageSize]; !ok {
+				hugeTlb = &stats.HugeTlbStat{}
+			}
+			hugeTlb.Pagesize = pageSize
+			out, err := ioutil.ReadFile(filepath.Join(path, file.Name()))
+			if err != nil {
+				continue
+			}
+			var value uint64
+			stringVal := strings.TrimSpace(string(out))
+			if stringVal == "max" {
+				value = math.MaxUint64
+			} else {
+				value, err = strconv.ParseUint(stringVal, 10, 64)
+			}
+			if err != nil {
+				continue
+			}
+			switch fileName[2] {
+			case "max":
+				hugeTlb.Max = value
+			case "current":
+				hugeTlb.Current = value
+			}
+			keyUsage[pageSize] = hugeTlb
+		}
+	}
+	for _, entry := range keyUsage {
+		usage = append(usage, entry)
+	}
+	return usage
+}