瀏覽代碼

Merge pull request #42249 from thaJeztah/bump_containerd

vendor: github.com/containerd/containerd v1.5.2
Brian Goff 4 年之前
父節點
當前提交
e27beeba0f
共有 100 個文件被更改,包括 4182 次插入1626 次删除
  1. 2 2
      plugin/backend_linux.go
  2. 6 6
      vendor.conf
  3. 88 0
      vendor/github.com/Microsoft/hcsshim/pkg/ociwclayer/export.go
  4. 148 0
      vendor/github.com/Microsoft/hcsshim/pkg/ociwclayer/import.go
  5. 171 0
      vendor/github.com/cilium/ebpf/link/cgroup.go
  6. 2 0
      vendor/github.com/cilium/ebpf/link/doc.go
  7. 67 0
      vendor/github.com/cilium/ebpf/link/iter.go
  8. 296 0
      vendor/github.com/cilium/ebpf/link/kprobe.go
  9. 229 0
      vendor/github.com/cilium/ebpf/link/link.go
  10. 60 0
      vendor/github.com/cilium/ebpf/link/netns.go
  11. 253 0
      vendor/github.com/cilium/ebpf/link/perf_event.go
  12. 76 0
      vendor/github.com/cilium/ebpf/link/program.go
  13. 61 0
      vendor/github.com/cilium/ebpf/link/raw_tracepoint.go
  14. 173 0
      vendor/github.com/cilium/ebpf/link/syscalls.go
  15. 56 0
      vendor/github.com/cilium/ebpf/link/tracepoint.go
  16. 6 6
      vendor/github.com/containerd/cgroups/go.mod
  17. 4 0
      vendor/github.com/containerd/cgroups/net_cls.go
  18. 593 513
      vendor/github.com/containerd/cgroups/stats/v1/metrics.pb.go
  19. 5 2
      vendor/github.com/containerd/cgroups/subsystem.go
  20. 31 26
      vendor/github.com/containerd/cgroups/utils.go
  21. 15 3
      vendor/github.com/containerd/cgroups/v2/ebpf.go
  22. 415 353
      vendor/github.com/containerd/cgroups/v2/stats/metrics.pb.go
  23. 2 1
      vendor/github.com/containerd/containerd/README.md
  24. 23 2
      vendor/github.com/containerd/containerd/archive/tar.go
  25. 1 1
      vendor/github.com/containerd/containerd/archive/tar_freebsd.go
  26. 10 0
      vendor/github.com/containerd/containerd/archive/tar_opts.go
  27. 43 0
      vendor/github.com/containerd/containerd/archive/tar_opts_windows.go
  28. 2 2
      vendor/github.com/containerd/containerd/archive/tar_unix.go
  29. 0 137
      vendor/github.com/containerd/containerd/archive/tar_windows.go
  30. 1 1
      vendor/github.com/containerd/containerd/cio/io.go
  31. 10 12
      vendor/github.com/containerd/containerd/cio/io_unix.go
  32. 12 24
      vendor/github.com/containerd/containerd/cio/io_windows.go
  33. 19 1
      vendor/github.com/containerd/containerd/client.go
  34. 8 0
      vendor/github.com/containerd/containerd/client_opts.go
  35. 5 5
      vendor/github.com/containerd/containerd/container.go
  36. 1 1
      vendor/github.com/containerd/containerd/content/adaptor.go
  37. 1 1
      vendor/github.com/containerd/containerd/content/content.go
  38. 28 0
      vendor/github.com/containerd/containerd/content/local/readerat.go
  39. 4 16
      vendor/github.com/containerd/containerd/content/local/store.go
  40. 8 1
      vendor/github.com/containerd/containerd/content/proxy/content_writer.go
  41. 2 0
      vendor/github.com/containerd/containerd/defaults/defaults_unix.go
  42. 3 0
      vendor/github.com/containerd/containerd/defaults/defaults_windows.go
  43. 4 0
      vendor/github.com/containerd/containerd/diff/stream.go
  44. 42 37
      vendor/github.com/containerd/containerd/go.mod
  45. 4 3
      vendor/github.com/containerd/containerd/images/diffid.go
  46. 3 0
      vendor/github.com/containerd/containerd/images/mediatypes.go
  47. 88 0
      vendor/github.com/containerd/containerd/metadata/boltutil/helpers.go
  48. 11 58
      vendor/github.com/containerd/containerd/metadata/containers.go
  49. 30 19
      vendor/github.com/containerd/containerd/metadata/namespaces.go
  50. 30 18
      vendor/github.com/containerd/containerd/mount/losetup_linux.go
  51. 4 2
      vendor/github.com/containerd/containerd/mount/mount_linux.go
  52. 31 44
      vendor/github.com/containerd/containerd/oci/spec_opts.go
  53. 42 79
      vendor/github.com/containerd/containerd/oci/spec_opts_linux.go
  54. 38 0
      vendor/github.com/containerd/containerd/oci/spec_opts_nonlinux.go
  55. 10 79
      vendor/github.com/containerd/containerd/oci/spec_opts_unix.go
  56. 1 1
      vendor/github.com/containerd/containerd/oci/spec_opts_windows.go
  57. 137 0
      vendor/github.com/containerd/containerd/oci/utils_unix.go
  58. 6 27
      vendor/github.com/containerd/containerd/pkg/apparmor/apparmor.go
  59. 48 0
      vendor/github.com/containerd/containerd/pkg/apparmor/apparmor_linux.go
  60. 1 2
      vendor/github.com/containerd/containerd/pkg/apparmor/apparmor_unsupported.go
  61. 192 0
      vendor/github.com/containerd/containerd/pkg/cap/cap_linux.go
  62. 6 2
      vendor/github.com/containerd/containerd/pkg/process/init.go
  63. 2 16
      vendor/github.com/containerd/containerd/pkg/process/io.go
  64. 1 1
      vendor/github.com/containerd/containerd/pkg/process/io_util.go
  65. 1 1
      vendor/github.com/containerd/containerd/pkg/process/utils.go
  66. 1 1
      vendor/github.com/containerd/containerd/pkg/userns/userns_linux.go
  67. 1 1
      vendor/github.com/containerd/containerd/pkg/userns/userns_unsupported.go
  68. 53 5
      vendor/github.com/containerd/containerd/platforms/defaults_windows.go
  69. 45 7
      vendor/github.com/containerd/containerd/remotes/docker/pusher.go
  70. 37 4
      vendor/github.com/containerd/containerd/remotes/docker/registry.go
  71. 2 0
      vendor/github.com/containerd/containerd/remotes/docker/resolver.go
  72. 21 1
      vendor/github.com/containerd/containerd/remotes/docker/status.go
  73. 14 4
      vendor/github.com/containerd/containerd/remotes/errors/errors.go
  74. 32 9
      vendor/github.com/containerd/containerd/remotes/handlers.go
  75. 2 0
      vendor/github.com/containerd/containerd/remotes/resolver.go
  76. 2 2
      vendor/github.com/containerd/containerd/rootfs/init.go
  77. 5 5
      vendor/github.com/containerd/containerd/runtime/runtime.go
  78. 17 17
      vendor/github.com/containerd/containerd/runtime/task.go
  79. 1 1
      vendor/github.com/containerd/containerd/runtime/v1/linux/runtime.go
  80. 1 4
      vendor/github.com/containerd/containerd/runtime/v1/shim/client/client.go
  81. 3 0
      vendor/github.com/containerd/containerd/runtime/v1/shim/service.go
  82. 3 3
      vendor/github.com/containerd/containerd/runtime/v1/shim/service_linux.go
  83. 3 4
      vendor/github.com/containerd/containerd/runtime/v1/shim/service_unix.go
  84. 2 0
      vendor/github.com/containerd/containerd/services/introspection/introspection.go
  85. 4 0
      vendor/github.com/containerd/containerd/services/introspection/local.go
  86. 20 7
      vendor/github.com/containerd/containerd/services/server/config/config.go
  87. 2 1
      vendor/github.com/containerd/containerd/snapshots/snapshotter.go
  88. 66 4
      vendor/github.com/containerd/containerd/sys/filesys_windows.go
  89. 29 7
      vendor/github.com/containerd/containerd/sys/oom_linux.go
  90. 13 1
      vendor/github.com/containerd/containerd/sys/oom_unsupported.go
  91. 4 14
      vendor/github.com/containerd/containerd/sys/userns_deprecated.go
  92. 12 3
      vendor/github.com/containerd/containerd/task.go
  93. 1 1
      vendor/github.com/containerd/containerd/version/version.go
  94. 4 0
      vendor/github.com/containerd/continuity/README.md
  95. 1 1
      vendor/github.com/containerd/continuity/devices/devices_unix.go
  96. 5 5
      vendor/github.com/containerd/continuity/devices/mknod_freebsd.go
  97. 5 7
      vendor/github.com/containerd/continuity/devices/mknod_unix.go
  98. 17 2
      vendor/github.com/containerd/continuity/fs/copy.go
  99. 40 0
      vendor/github.com/containerd/continuity/fs/copy_darwinopenbsdsolaris.go
  100. 42 0
      vendor/github.com/containerd/continuity/fs/copy_freebsd.go

+ 2 - 2
plugin/backend_linux.go

@@ -421,7 +421,7 @@ func (pm *Manager) Push(ctx context.Context, name string, metaHeader http.Header
 
 	// Make sure we can authenticate the request since the auth scope for plugin repos is different than a normal repo.
 	ctx = docker.WithScope(ctx, scope(ref, true))
-	if err := remotes.PushContent(ctx, pusher, desc, pm.blobStore, nil, func(h images.Handler) images.Handler {
+	if err := remotes.PushContent(ctx, pusher, desc, pm.blobStore, nil, nil, func(h images.Handler) images.Handler {
 		return images.Handlers(progressHandler, h)
 	}); err != nil {
 		// Try fallback to http.
@@ -433,7 +433,7 @@ func (pm *Manager) Push(ctx context.Context, name string, metaHeader http.Header
 			pusher, _ := resolver.Pusher(ctx, ref.String())
 			if pusher != nil {
 				logrus.WithField("ref", ref).Debug("Re-attmpting push with http-fallback")
-				err2 := remotes.PushContent(ctx, pusher, desc, pm.blobStore, nil, func(h images.Handler) images.Handler {
+				err2 := remotes.PushContent(ctx, pusher, desc, pm.blobStore, nil, nil, func(h images.Handler) images.Handler {
 					return images.Handlers(progressHandler, h)
 				})
 				if err2 == nil {

+ 6 - 6
vendor.conf

@@ -132,13 +132,13 @@ github.com/googleapis/gax-go                        bd5b16380fd03dc758d11cef74ba
 google.golang.org/genproto                          3f1135a288c9a07e340ae8ba4cc6c7065a3160e8
 
 # containerd
-github.com/containerd/containerd                    19ee068f93c91f7b9b2a858457f1af2cabc7bc06 # master (v1.5.0-dev)
-github.com/containerd/fifo                          0724c46b320cf96bb172a0550c19a4b1fca4dacb
-github.com/containerd/continuity                    efbc4488d8fe1bdc16bde3b2d2990d9b3a899165
-github.com/containerd/cgroups                       0b889c03f102012f1d93a97ddd3ef71cd6f4f510
+github.com/containerd/containerd                    36cc874494a56a253cd181a1a685b44b58a2e34a # v1.5.2
+github.com/containerd/fifo                          650e8a8a179d040123db61f016cb133143e7a581 # v1.0.0
+github.com/containerd/continuity                    bce1c3f9669b6f3e7f6656ee715b0b4d75fa64a6 # v0.1.0
+github.com/containerd/cgroups                       b9de8a2212026c07cec67baf3323f1fc0121e048 # v1.0.1
 github.com/containerd/console                       2f1e3d2b6afd18e8b2077816c711205a0b4d8769 # v1.0.2
-github.com/containerd/go-runc                       16b287bc67d069a60fa48db15f330b790b74365b
-github.com/containerd/typeurl                       cd3ce7159eae562a4f60ceff37dada11a939d247 # v1.0.1
+github.com/containerd/go-runc                       16b287bc67d069a60fa48db15f330b790b74365b # v1.0.0
+github.com/containerd/typeurl                       5e43fb8b75ed2f2305fc04e6918c8d10636771bc # v1.0.2
 github.com/containerd/ttrpc                         bfba540dc45464586c106b1f31c8547933c1eb41 # v1.0.2
 github.com/gogo/googleapis                          01e0f9cca9b92166042241267ee2a5cdf5cff46c # v1.3.2
 github.com/cilium/ebpf                              ef54c303d1fff1e80a9bf20f00a378fde5419d61 # v0.5.0

+ 88 - 0
vendor/github.com/Microsoft/hcsshim/pkg/ociwclayer/export.go

@@ -0,0 +1,88 @@
+// Package ociwclayer provides functions for importing and exporting Windows
+// container layers from and to their OCI tar representation.
+package ociwclayer
+
+import (
+	"archive/tar"
+	"context"
+	"io"
+	"path/filepath"
+
+	"github.com/Microsoft/go-winio/backuptar"
+	"github.com/Microsoft/hcsshim"
+)
+
+var driverInfo = hcsshim.DriverInfo{}
+
+// ExportLayerToTar writes an OCI layer tar stream from the provided on-disk layer.
+// The caller must specify the parent layers, if any, ordered from lowest to
+// highest layer.
+//
+// The layer will be mounted for this process, so the caller should ensure that
+// it is not currently mounted.
+func ExportLayerToTar(ctx context.Context, w io.Writer, path string, parentLayerPaths []string) error {
+	err := hcsshim.ActivateLayer(driverInfo, path)
+	if err != nil {
+		return err
+	}
+	defer func() {
+		_ = hcsshim.DeactivateLayer(driverInfo, path)
+	}()
+
+	// Prepare and unprepare the layer to ensure that it has been initialized.
+	err = hcsshim.PrepareLayer(driverInfo, path, parentLayerPaths)
+	if err != nil {
+		return err
+	}
+	err = hcsshim.UnprepareLayer(driverInfo, path)
+	if err != nil {
+		return err
+	}
+
+	r, err := hcsshim.NewLayerReader(driverInfo, path, parentLayerPaths)
+	if err != nil {
+		return err
+	}
+
+	err = writeTarFromLayer(ctx, r, w)
+	cerr := r.Close()
+	if err != nil {
+		return err
+	}
+	return cerr
+}
+
+func writeTarFromLayer(ctx context.Context, r hcsshim.LayerReader, w io.Writer) error {
+	t := tar.NewWriter(w)
+	for {
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		default:
+		}
+
+		name, size, fileInfo, err := r.Next()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return err
+		}
+		if fileInfo == nil {
+			// Write a whiteout file.
+			hdr := &tar.Header{
+				Name: filepath.ToSlash(filepath.Join(filepath.Dir(name), whiteoutPrefix+filepath.Base(name))),
+			}
+			err := t.WriteHeader(hdr)
+			if err != nil {
+				return err
+			}
+		} else {
+			err = backuptar.WriteTarFileFromBackupStream(t, r, name, size, fileInfo)
+			if err != nil {
+				return err
+			}
+		}
+	}
+	return t.Close()
+}

+ 148 - 0
vendor/github.com/Microsoft/hcsshim/pkg/ociwclayer/import.go

@@ -0,0 +1,148 @@
+package ociwclayer
+
+import (
+	"archive/tar"
+	"bufio"
+	"context"
+	"io"
+	"os"
+	"path"
+	"path/filepath"
+	"strings"
+
+	winio "github.com/Microsoft/go-winio"
+	"github.com/Microsoft/go-winio/backuptar"
+	"github.com/Microsoft/hcsshim"
+)
+
+const whiteoutPrefix = ".wh."
+
+var (
+	// mutatedFiles is a list of files that are mutated by the import process
+	// and must be backed up and restored.
+	mutatedFiles = map[string]string{
+		"UtilityVM/Files/EFI/Microsoft/Boot/BCD":      "bcd.bak",
+		"UtilityVM/Files/EFI/Microsoft/Boot/BCD.LOG":  "bcd.log.bak",
+		"UtilityVM/Files/EFI/Microsoft/Boot/BCD.LOG1": "bcd.log1.bak",
+		"UtilityVM/Files/EFI/Microsoft/Boot/BCD.LOG2": "bcd.log2.bak",
+	}
+)
+
+// ImportLayerFromTar  reads a layer from an OCI layer tar stream and extracts it to the
+// specified path. The caller must specify the parent layers, if any, ordered
+// from lowest to highest layer.
+//
+// The caller must ensure that the thread or process has acquired backup and
+// restore privileges.
+//
+// This function returns the total size of the layer's files, in bytes.
+func ImportLayerFromTar(ctx context.Context, r io.Reader, path string, parentLayerPaths []string) (int64, error) {
+	err := os.MkdirAll(path, 0)
+	if err != nil {
+		return 0, err
+	}
+	w, err := hcsshim.NewLayerWriter(hcsshim.DriverInfo{}, path, parentLayerPaths)
+	if err != nil {
+		return 0, err
+	}
+	n, err := writeLayerFromTar(ctx, r, w, path)
+	cerr := w.Close()
+	if err != nil {
+		return 0, err
+	}
+	if cerr != nil {
+		return 0, cerr
+	}
+	return n, nil
+}
+
+func writeLayerFromTar(ctx context.Context, r io.Reader, w hcsshim.LayerWriter, root string) (int64, error) {
+	t := tar.NewReader(r)
+	hdr, err := t.Next()
+	totalSize := int64(0)
+	buf := bufio.NewWriter(nil)
+	for err == nil {
+		select {
+		case <-ctx.Done():
+			return 0, ctx.Err()
+		default:
+		}
+
+		base := path.Base(hdr.Name)
+		if strings.HasPrefix(base, whiteoutPrefix) {
+			name := path.Join(path.Dir(hdr.Name), base[len(whiteoutPrefix):])
+			err = w.Remove(filepath.FromSlash(name))
+			if err != nil {
+				return 0, err
+			}
+			hdr, err = t.Next()
+		} else if hdr.Typeflag == tar.TypeLink {
+			err = w.AddLink(filepath.FromSlash(hdr.Name), filepath.FromSlash(hdr.Linkname))
+			if err != nil {
+				return 0, err
+			}
+			hdr, err = t.Next()
+		} else {
+			var (
+				name     string
+				size     int64
+				fileInfo *winio.FileBasicInfo
+			)
+			name, size, fileInfo, err = backuptar.FileInfoFromHeader(hdr)
+			if err != nil {
+				return 0, err
+			}
+			err = w.Add(filepath.FromSlash(name), fileInfo)
+			if err != nil {
+				return 0, err
+			}
+			hdr, err = writeBackupStreamFromTarAndSaveMutatedFiles(buf, w, t, hdr, root)
+			totalSize += size
+		}
+	}
+	if err != io.EOF {
+		return 0, err
+	}
+	return totalSize, nil
+}
+
+// writeBackupStreamFromTarAndSaveMutatedFiles reads data from a tar stream and
+// writes it to a backup stream, and also saves any files that will be mutated
+// by the import layer process to a backup location.
+func writeBackupStreamFromTarAndSaveMutatedFiles(buf *bufio.Writer, w io.Writer, t *tar.Reader, hdr *tar.Header, root string) (nextHdr *tar.Header, err error) {
+	var bcdBackup *os.File
+	var bcdBackupWriter *winio.BackupFileWriter
+	if backupPath, ok := mutatedFiles[hdr.Name]; ok {
+		bcdBackup, err = os.Create(filepath.Join(root, backupPath))
+		if err != nil {
+			return nil, err
+		}
+		defer func() {
+			cerr := bcdBackup.Close()
+			if err == nil {
+				err = cerr
+			}
+		}()
+
+		bcdBackupWriter = winio.NewBackupFileWriter(bcdBackup, false)
+		defer func() {
+			cerr := bcdBackupWriter.Close()
+			if err == nil {
+				err = cerr
+			}
+		}()
+
+		buf.Reset(io.MultiWriter(w, bcdBackupWriter))
+	} else {
+		buf.Reset(w)
+	}
+
+	defer func() {
+		ferr := buf.Flush()
+		if err == nil {
+			err = ferr
+		}
+	}()
+
+	return backuptar.WriteBackupStreamFromTarFile(buf, t, hdr)
+}

+ 171 - 0
vendor/github.com/cilium/ebpf/link/cgroup.go

@@ -0,0 +1,171 @@
+package link
+
+import (
+	"errors"
+	"fmt"
+	"os"
+
+	"github.com/cilium/ebpf"
+)
+
+type cgroupAttachFlags uint32
+
+// cgroup attach flags
+const (
+	flagAllowOverride cgroupAttachFlags = 1 << iota
+	flagAllowMulti
+	flagReplace
+)
+
+type CgroupOptions struct {
+	// Path to a cgroupv2 folder.
+	Path string
+	// One of the AttachCgroup* constants
+	Attach ebpf.AttachType
+	// Program must be of type CGroup*, and the attach type must match Attach.
+	Program *ebpf.Program
+}
+
+// AttachCgroup links a BPF program to a cgroup.
+func AttachCgroup(opts CgroupOptions) (Link, error) {
+	cgroup, err := os.Open(opts.Path)
+	if err != nil {
+		return nil, fmt.Errorf("can't open cgroup: %s", err)
+	}
+
+	clone, err := opts.Program.Clone()
+	if err != nil {
+		cgroup.Close()
+		return nil, err
+	}
+
+	var cg Link
+	cg, err = newLinkCgroup(cgroup, opts.Attach, clone)
+	if errors.Is(err, ErrNotSupported) {
+		cg, err = newProgAttachCgroup(cgroup, opts.Attach, clone, flagAllowMulti)
+	}
+	if errors.Is(err, ErrNotSupported) {
+		cg, err = newProgAttachCgroup(cgroup, opts.Attach, clone, flagAllowOverride)
+	}
+	if err != nil {
+		cgroup.Close()
+		clone.Close()
+		return nil, err
+	}
+
+	return cg, nil
+}
+
+// LoadPinnedCgroup loads a pinned cgroup from a bpffs.
+func LoadPinnedCgroup(fileName string, opts *ebpf.LoadPinOptions) (Link, error) {
+	link, err := LoadPinnedRawLink(fileName, CgroupType, opts)
+	if err != nil {
+		return nil, err
+	}
+
+	return &linkCgroup{*link}, nil
+}
+
+type progAttachCgroup struct {
+	cgroup     *os.File
+	current    *ebpf.Program
+	attachType ebpf.AttachType
+	flags      cgroupAttachFlags
+}
+
+var _ Link = (*progAttachCgroup)(nil)
+
+func (cg *progAttachCgroup) isLink() {}
+
+func newProgAttachCgroup(cgroup *os.File, attach ebpf.AttachType, prog *ebpf.Program, flags cgroupAttachFlags) (*progAttachCgroup, error) {
+	if flags&flagAllowMulti > 0 {
+		if err := haveProgAttachReplace(); err != nil {
+			return nil, fmt.Errorf("can't support multiple programs: %w", err)
+		}
+	}
+
+	err := RawAttachProgram(RawAttachProgramOptions{
+		Target:  int(cgroup.Fd()),
+		Program: prog,
+		Flags:   uint32(flags),
+		Attach:  attach,
+	})
+	if err != nil {
+		return nil, fmt.Errorf("cgroup: %w", err)
+	}
+
+	return &progAttachCgroup{cgroup, prog, attach, flags}, nil
+}
+
+func (cg *progAttachCgroup) Close() error {
+	defer cg.cgroup.Close()
+	defer cg.current.Close()
+
+	err := RawDetachProgram(RawDetachProgramOptions{
+		Target:  int(cg.cgroup.Fd()),
+		Program: cg.current,
+		Attach:  cg.attachType,
+	})
+	if err != nil {
+		return fmt.Errorf("close cgroup: %s", err)
+	}
+	return nil
+}
+
+func (cg *progAttachCgroup) Update(prog *ebpf.Program) error {
+	new, err := prog.Clone()
+	if err != nil {
+		return err
+	}
+
+	args := RawAttachProgramOptions{
+		Target:  int(cg.cgroup.Fd()),
+		Program: prog,
+		Attach:  cg.attachType,
+		Flags:   uint32(cg.flags),
+	}
+
+	if cg.flags&flagAllowMulti > 0 {
+		// Atomically replacing multiple programs requires at least
+		// 5.5 (commit 7dd68b3279f17921 "bpf: Support replacing cgroup-bpf
+		// program in MULTI mode")
+		args.Flags |= uint32(flagReplace)
+		args.Replace = cg.current
+	}
+
+	if err := RawAttachProgram(args); err != nil {
+		new.Close()
+		return fmt.Errorf("can't update cgroup: %s", err)
+	}
+
+	cg.current.Close()
+	cg.current = new
+	return nil
+}
+
+func (cg *progAttachCgroup) Pin(string) error {
+	return fmt.Errorf("can't pin cgroup: %w", ErrNotSupported)
+}
+
+func (cg *progAttachCgroup) Unpin() error {
+	return fmt.Errorf("can't pin cgroup: %w", ErrNotSupported)
+}
+
+type linkCgroup struct {
+	RawLink
+}
+
+var _ Link = (*linkCgroup)(nil)
+
+func newLinkCgroup(cgroup *os.File, attach ebpf.AttachType, prog *ebpf.Program) (*linkCgroup, error) {
+	link, err := AttachRawLink(RawLinkOptions{
+		Target:  int(cgroup.Fd()),
+		Program: prog,
+		Attach:  attach,
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	return &linkCgroup{*link}, err
+}

+ 2 - 0
vendor/github.com/cilium/ebpf/link/doc.go

@@ -0,0 +1,2 @@
+// Package link allows attaching eBPF programs to various kernel hooks.
+package link

+ 67 - 0
vendor/github.com/cilium/ebpf/link/iter.go

@@ -0,0 +1,67 @@
+package link
+
+import (
+	"fmt"
+	"io"
+
+	"github.com/cilium/ebpf"
+)
+
+type IterOptions struct {
+	// Program must be of type Tracing with attach type
+	// AttachTraceIter. The kind of iterator to attach to is
+	// determined at load time via the AttachTo field.
+	//
+	// AttachTo requires the kernel to include BTF of itself,
+	// and it to be compiled with a recent pahole (>= 1.16).
+	Program *ebpf.Program
+}
+
+// AttachIter attaches a BPF seq_file iterator.
+func AttachIter(opts IterOptions) (*Iter, error) {
+	link, err := AttachRawLink(RawLinkOptions{
+		Program: opts.Program,
+		Attach:  ebpf.AttachTraceIter,
+	})
+	if err != nil {
+		return nil, fmt.Errorf("can't link iterator: %w", err)
+	}
+
+	return &Iter{*link}, err
+}
+
+// LoadPinnedIter loads a pinned iterator from a bpffs.
+func LoadPinnedIter(fileName string, opts *ebpf.LoadPinOptions) (*Iter, error) {
+	link, err := LoadPinnedRawLink(fileName, IterType, opts)
+	if err != nil {
+		return nil, err
+	}
+
+	return &Iter{*link}, err
+}
+
+// Iter represents an attached bpf_iter.
+type Iter struct {
+	RawLink
+}
+
+// Open creates a new instance of the iterator.
+//
+// Reading from the returned reader triggers the BPF program.
+func (it *Iter) Open() (io.ReadCloser, error) {
+	linkFd, err := it.fd.Value()
+	if err != nil {
+		return nil, err
+	}
+
+	attr := &bpfIterCreateAttr{
+		linkFd: linkFd,
+	}
+
+	fd, err := bpfIterCreate(attr)
+	if err != nil {
+		return nil, fmt.Errorf("can't create iterator: %w", err)
+	}
+
+	return fd.File("bpf_iter"), nil
+}

+ 296 - 0
vendor/github.com/cilium/ebpf/link/kprobe.go

@@ -0,0 +1,296 @@
+package link
+
+import (
+	"crypto/rand"
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"runtime"
+
+	"github.com/cilium/ebpf"
+	"github.com/cilium/ebpf/internal"
+	"github.com/cilium/ebpf/internal/unix"
+)
+
+var (
+	kprobeEventsPath = filepath.Join(tracefsPath, "kprobe_events")
+)
+
+// Kprobe attaches the given eBPF program to a perf event that fires when the
+// given kernel symbol starts executing. See /proc/kallsyms for available
+// symbols. For example, printk():
+//
+//	Kprobe("printk")
+//
+// The resulting Link must be Closed during program shutdown to avoid leaking
+// system resources.
+func Kprobe(symbol string, prog *ebpf.Program) (Link, error) {
+	k, err := kprobe(symbol, prog, false)
+	if err != nil {
+		return nil, err
+	}
+
+	err = k.attach(prog)
+	if err != nil {
+		k.Close()
+		return nil, err
+	}
+
+	return k, nil
+}
+
+// Kretprobe attaches the given eBPF program to a perf event that fires right
+// before the given kernel symbol exits, with the function stack left intact.
+// See /proc/kallsyms for available symbols. For example, printk():
+//
+//	Kretprobe("printk")
+//
+// The resulting Link must be Closed during program shutdown to avoid leaking
+// system resources.
+func Kretprobe(symbol string, prog *ebpf.Program) (Link, error) {
+	k, err := kprobe(symbol, prog, true)
+	if err != nil {
+		return nil, err
+	}
+
+	err = k.attach(prog)
+	if err != nil {
+		k.Close()
+		return nil, err
+	}
+
+	return k, nil
+}
+
+// kprobe opens a perf event on the given symbol and attaches prog to it.
+// If ret is true, create a kretprobe.
+func kprobe(symbol string, prog *ebpf.Program, ret bool) (*perfEvent, error) {
+	if symbol == "" {
+		return nil, fmt.Errorf("symbol name cannot be empty: %w", errInvalidInput)
+	}
+	if prog == nil {
+		return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput)
+	}
+	if !rgxTraceEvent.MatchString(symbol) {
+		return nil, fmt.Errorf("symbol '%s' must be alphanumeric or underscore: %w", symbol, errInvalidInput)
+	}
+	if prog.Type() != ebpf.Kprobe {
+		return nil, fmt.Errorf("eBPF program type %s is not a Kprobe: %w", prog.Type(), errInvalidInput)
+	}
+
+	// Use kprobe PMU if the kernel has it available.
+	tp, err := pmuKprobe(symbol, ret)
+	if err == nil {
+		return tp, nil
+	}
+	if err != nil && !errors.Is(err, ErrNotSupported) {
+		return nil, fmt.Errorf("creating perf_kprobe PMU: %w", err)
+	}
+
+	// Use tracefs if kprobe PMU is missing.
+	tp, err = tracefsKprobe(symbol, ret)
+	if err != nil {
+		return nil, fmt.Errorf("creating trace event '%s' in tracefs: %w", symbol, err)
+	}
+
+	return tp, nil
+}
+
+// pmuKprobe opens a perf event based on a Performance Monitoring Unit.
+// Requires at least 4.17 (e12f03d7031a "perf/core: Implement the
+// 'perf_kprobe' PMU").
+// Returns ErrNotSupported if the kernel doesn't support perf_kprobe PMU,
+// or os.ErrNotExist if the given symbol does not exist in the kernel.
+func pmuKprobe(symbol string, ret bool) (*perfEvent, error) {
+
+	// Getting the PMU type will fail if the kernel doesn't support
+	// the perf_kprobe PMU.
+	et, err := getPMUEventType("kprobe")
+	if err != nil {
+		return nil, err
+	}
+
+	// Create a pointer to a NUL-terminated string for the kernel.
+	sp, err := unsafeStringPtr(symbol)
+	if err != nil {
+		return nil, err
+	}
+
+	// TODO: Parse the position of the bit from /sys/bus/event_source/devices/%s/format/retprobe.
+	config := 0
+	if ret {
+		config = 1
+	}
+
+	attr := unix.PerfEventAttr{
+		Type:   uint32(et),          // PMU event type read from sysfs
+		Ext1:   uint64(uintptr(sp)), // Kernel symbol to trace
+		Config: uint64(config),      // perf_kprobe PMU treats config as flags
+	}
+
+	fd, err := unix.PerfEventOpen(&attr, perfAllThreads, 0, -1, unix.PERF_FLAG_FD_CLOEXEC)
+
+	// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
+	// when trying to create a kretprobe for a missing symbol. Make sure ENOENT
+	// is returned to the caller.
+	if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
+		return nil, fmt.Errorf("symbol '%s' not found: %w", symbol, os.ErrNotExist)
+	}
+	if err != nil {
+		return nil, fmt.Errorf("opening perf event: %w", err)
+	}
+
+	// Ensure the string pointer is not collected before PerfEventOpen returns.
+	runtime.KeepAlive(sp)
+
+	// Kernel has perf_kprobe PMU available, initialize perf event.
+	return &perfEvent{
+		fd:       internal.NewFD(uint32(fd)),
+		pmuID:    et,
+		name:     symbol,
+		ret:      ret,
+		progType: ebpf.Kprobe,
+	}, nil
+}
+
+// tracefsKprobe creates a trace event by writing an entry to <tracefs>/kprobe_events.
+// A new trace event group name is generated on every call to support creating
+// multiple trace events for the same kernel symbol. A perf event is then opened
+// on the newly-created trace event and returned to the caller.
+func tracefsKprobe(symbol string, ret bool) (*perfEvent, error) {
+
+	// Generate a random string for each trace event we attempt to create.
+	// This value is used as the 'group' token in tracefs to allow creating
+	// multiple kprobe trace events with the same name.
+	group, err := randomGroup("ebpf")
+	if err != nil {
+		return nil, fmt.Errorf("randomizing group name: %w", err)
+	}
+
+	// Before attempting to create a trace event through tracefs,
+	// check if an event with the same group and name already exists.
+	// Kernels 4.x and earlier don't return os.ErrExist on writing a duplicate
+	// entry, so we need to rely on reads for detecting uniqueness.
+	_, err = getTraceEventID(group, symbol)
+	if err == nil {
+		return nil, fmt.Errorf("trace event already exists: %s/%s", group, symbol)
+	}
+	// The read is expected to fail with ErrNotSupported due to a non-existing event.
+	if err != nil && !errors.Is(err, ErrNotSupported) {
+		return nil, fmt.Errorf("checking trace event %s/%s: %w", group, symbol, err)
+	}
+
+	// Create the kprobe trace event using tracefs.
+	if err := createTraceFSKprobeEvent(group, symbol, ret); err != nil {
+		return nil, fmt.Errorf("creating kprobe event on tracefs: %w", err)
+	}
+
+	// Get the newly-created trace event's id.
+	tid, err := getTraceEventID(group, symbol)
+	if err != nil {
+		return nil, fmt.Errorf("getting trace event id: %w", err)
+	}
+
+	// Kprobes are ephemeral tracepoints and share the same perf event type.
+	fd, err := openTracepointPerfEvent(tid)
+	if err != nil {
+		return nil, err
+	}
+
+	return &perfEvent{
+		fd:        fd,
+		group:     group,
+		name:      symbol,
+		ret:       ret,
+		tracefsID: tid,
+		progType:  ebpf.Kprobe, // kernel only allows attaching kprobe programs to kprobe events
+	}, nil
+}
+
+// createTraceFSKprobeEvent creates a new ephemeral trace event by writing to
+// <tracefs>/kprobe_events. Returns ErrNotSupported if symbol is not a valid
+// kernel symbol, or if it is not traceable with kprobes.
+func createTraceFSKprobeEvent(group, symbol string, ret bool) error {
+	// Open the kprobe_events file in tracefs.
+	f, err := os.OpenFile(kprobeEventsPath, os.O_APPEND|os.O_WRONLY, 0666)
+	if err != nil {
+		return fmt.Errorf("error opening kprobe_events: %w", err)
+	}
+	defer f.Close()
+
+	// The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt):
+	// p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
+	// r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
+	// -:[GRP/]EVENT                                        : Clear a probe
+	//
+	// Some examples:
+	// r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy
+	// p:ebpf_5678/p_my_kprobe __x64_sys_execve
+	//
+	// Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the
+	// kernel default to NR_CPUS. This is desired in most eBPF cases since
+	// subsampling or rate limiting logic can be more accurately implemented in
+	// the eBPF program itself. See Documentation/kprobes.txt for more details.
+	pe := fmt.Sprintf("%s:%s/%s %s", kprobePrefix(ret), group, symbol, symbol)
+	_, err = f.WriteString(pe)
+	// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
+	// when trying to create a kretprobe for a missing symbol. Make sure ENOENT
+	// is returned to the caller.
+	if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
+		return fmt.Errorf("kernel symbol %s not found: %w", symbol, os.ErrNotExist)
+	}
+	if err != nil {
+		return fmt.Errorf("writing '%s' to kprobe_events: %w", pe, err)
+	}
+
+	return nil
+}
+
+// closeTraceFSKprobeEvent removes the kprobe with the given group, symbol and kind
+// from <tracefs>/kprobe_events.
+func closeTraceFSKprobeEvent(group, symbol string) error {
+	f, err := os.OpenFile(kprobeEventsPath, os.O_APPEND|os.O_WRONLY, 0666)
+	if err != nil {
+		return fmt.Errorf("error opening kprobe_events: %w", err)
+	}
+	defer f.Close()
+
+	// See kprobe_events syntax above. Kprobe type does not need to be specified
+	// for removals.
+	pe := fmt.Sprintf("-:%s/%s", group, symbol)
+	if _, err = f.WriteString(pe); err != nil {
+		return fmt.Errorf("writing '%s' to kprobe_events: %w", pe, err)
+	}
+
+	return nil
+}
+
+// randomGroup generates a pseudorandom string for use as a tracefs group name.
+// Returns an error when the output string would exceed 63 characters (kernel
+// limitation), when rand.Read() fails or when prefix contains characters not
+// allowed by rgxTraceEvent.
+func randomGroup(prefix string) (string, error) {
+	if !rgxTraceEvent.MatchString(prefix) {
+		return "", fmt.Errorf("prefix '%s' must be alphanumeric or underscore: %w", prefix, errInvalidInput)
+	}
+
+	b := make([]byte, 8)
+	if _, err := rand.Read(b); err != nil {
+		return "", fmt.Errorf("reading random bytes: %w", err)
+	}
+
+	group := fmt.Sprintf("%s_%x", prefix, b)
+	if len(group) > 63 {
+		return "", fmt.Errorf("group name '%s' cannot be longer than 63 characters: %w", group, errInvalidInput)
+	}
+
+	return group, nil
+}
+
+func kprobePrefix(ret bool) string {
+	if ret {
+		return "r"
+	}
+	return "p"
+}

+ 229 - 0
vendor/github.com/cilium/ebpf/link/link.go

@@ -0,0 +1,229 @@
+package link
+
+import (
+	"fmt"
+	"unsafe"
+
+	"github.com/cilium/ebpf"
+	"github.com/cilium/ebpf/internal"
+)
+
+var ErrNotSupported = internal.ErrNotSupported
+
+// Link represents a Program attached to a BPF hook.
+type Link interface {
+	// Replace the current program with a new program.
+	//
+	// Passing a nil program is an error. May return an error wrapping ErrNotSupported.
+	Update(*ebpf.Program) error
+
+	// Persist a link by pinning it into a bpffs.
+	//
+	// May return an error wrapping ErrNotSupported.
+	Pin(string) error
+
+	// Undo a previous call to Pin.
+	//
+	// May return an error wrapping ErrNotSupported.
+	Unpin() error
+
+	// Close frees resources.
+	//
+	// The link will be broken unless it has been pinned. A link
+	// may continue past the lifetime of the process if Close is
+	// not called.
+	Close() error
+
+	// Prevent external users from implementing this interface.
+	isLink()
+}
+
+// ID uniquely identifies a BPF link.
+type ID uint32
+
+// RawLinkOptions control the creation of a raw link.
+type RawLinkOptions struct {
+	// File descriptor to attach to. This differs for each attach type.
+	Target int
+	// Program to attach.
+	Program *ebpf.Program
+	// Attach must match the attach type of Program.
+	Attach ebpf.AttachType
+}
+
+// RawLinkInfo contains metadata on a link.
+type RawLinkInfo struct {
+	Type    Type
+	ID      ID
+	Program ebpf.ProgramID
+}
+
+// RawLink is the low-level API to bpf_link.
+//
+// You should consider using the higher level interfaces in this
+// package instead.
+type RawLink struct {
+	fd         *internal.FD
+	pinnedPath string
+}
+
+// AttachRawLink creates a raw link.
+func AttachRawLink(opts RawLinkOptions) (*RawLink, error) {
+	if err := haveBPFLink(); err != nil {
+		return nil, err
+	}
+
+	if opts.Target < 0 {
+		return nil, fmt.Errorf("invalid target: %s", internal.ErrClosedFd)
+	}
+
+	progFd := opts.Program.FD()
+	if progFd < 0 {
+		return nil, fmt.Errorf("invalid program: %s", internal.ErrClosedFd)
+	}
+
+	attr := bpfLinkCreateAttr{
+		targetFd:   uint32(opts.Target),
+		progFd:     uint32(progFd),
+		attachType: opts.Attach,
+	}
+	fd, err := bpfLinkCreate(&attr)
+	if err != nil {
+		return nil, fmt.Errorf("can't create link: %s", err)
+	}
+
+	return &RawLink{fd, ""}, nil
+}
+
+// LoadPinnedRawLink loads a persisted link from a bpffs.
+//
+// Returns an error if the pinned link type doesn't match linkType. Pass
+// UnspecifiedType to disable this behaviour.
+func LoadPinnedRawLink(fileName string, linkType Type, opts *ebpf.LoadPinOptions) (*RawLink, error) {
+	fd, err := internal.BPFObjGet(fileName, opts.Marshal())
+	if err != nil {
+		return nil, fmt.Errorf("load pinned link: %w", err)
+	}
+
+	link := &RawLink{fd, fileName}
+	if linkType == UnspecifiedType {
+		return link, nil
+	}
+
+	info, err := link.Info()
+	if err != nil {
+		link.Close()
+		return nil, fmt.Errorf("get pinned link info: %s", err)
+	}
+
+	if info.Type != linkType {
+		link.Close()
+		return nil, fmt.Errorf("link type %v doesn't match %v", info.Type, linkType)
+	}
+
+	return link, nil
+}
+
+func (l *RawLink) isLink() {}
+
+// FD returns the raw file descriptor.
+func (l *RawLink) FD() int {
+	fd, err := l.fd.Value()
+	if err != nil {
+		return -1
+	}
+	return int(fd)
+}
+
+// Close breaks the link.
+//
+// Use Pin if you want to make the link persistent.
+func (l *RawLink) Close() error {
+	return l.fd.Close()
+}
+
+// Pin persists a link past the lifetime of the process.
+//
+// Calling Close on a pinned Link will not break the link
+// until the pin is removed.
+func (l *RawLink) Pin(fileName string) error {
+	if err := internal.Pin(l.pinnedPath, fileName, l.fd); err != nil {
+		return err
+	}
+	l.pinnedPath = fileName
+	return nil
+}
+
+// Unpin implements the Link interface.
+func (l *RawLink) Unpin() error {
+	if err := internal.Unpin(l.pinnedPath); err != nil {
+		return err
+	}
+	l.pinnedPath = ""
+	return nil
+}
+
+// Update implements the Link interface.
+func (l *RawLink) Update(new *ebpf.Program) error {
+	return l.UpdateArgs(RawLinkUpdateOptions{
+		New: new,
+	})
+}
+
+// RawLinkUpdateOptions control the behaviour of RawLink.UpdateArgs.
+type RawLinkUpdateOptions struct {
+	New   *ebpf.Program
+	Old   *ebpf.Program
+	Flags uint32
+}
+
+// UpdateArgs updates a link based on args.
+func (l *RawLink) UpdateArgs(opts RawLinkUpdateOptions) error {
+	newFd := opts.New.FD()
+	if newFd < 0 {
+		return fmt.Errorf("invalid program: %s", internal.ErrClosedFd)
+	}
+
+	var oldFd int
+	if opts.Old != nil {
+		oldFd = opts.Old.FD()
+		if oldFd < 0 {
+			return fmt.Errorf("invalid replacement program: %s", internal.ErrClosedFd)
+		}
+	}
+
+	linkFd, err := l.fd.Value()
+	if err != nil {
+		return fmt.Errorf("can't update link: %s", err)
+	}
+
+	attr := bpfLinkUpdateAttr{
+		linkFd:    linkFd,
+		newProgFd: uint32(newFd),
+		oldProgFd: uint32(oldFd),
+		flags:     opts.Flags,
+	}
+	return bpfLinkUpdate(&attr)
+}
+
+// struct bpf_link_info
+type bpfLinkInfo struct {
+	typ     uint32
+	id      uint32
+	prog_id uint32
+}
+
+// Info returns metadata about the link.
+func (l *RawLink) Info() (*RawLinkInfo, error) {
+	var info bpfLinkInfo
+	err := internal.BPFObjGetInfoByFD(l.fd, unsafe.Pointer(&info), unsafe.Sizeof(info))
+	if err != nil {
+		return nil, fmt.Errorf("link info: %s", err)
+	}
+
+	return &RawLinkInfo{
+		Type(info.typ),
+		ID(info.id),
+		ebpf.ProgramID(info.prog_id),
+	}, nil
+}

+ 60 - 0
vendor/github.com/cilium/ebpf/link/netns.go

@@ -0,0 +1,60 @@
+package link
+
+import (
+	"fmt"
+
+	"github.com/cilium/ebpf"
+)
+
+// NetNsInfo contains metadata about a network namespace link.
+type NetNsInfo struct {
+	RawLinkInfo
+}
+
+// NetNsLink is a program attached to a network namespace.
+type NetNsLink struct {
+	*RawLink
+}
+
+// AttachNetNs attaches a program to a network namespace.
+func AttachNetNs(ns int, prog *ebpf.Program) (*NetNsLink, error) {
+	var attach ebpf.AttachType
+	switch t := prog.Type(); t {
+	case ebpf.FlowDissector:
+		attach = ebpf.AttachFlowDissector
+	case ebpf.SkLookup:
+		attach = ebpf.AttachSkLookup
+	default:
+		return nil, fmt.Errorf("can't attach %v to network namespace", t)
+	}
+
+	link, err := AttachRawLink(RawLinkOptions{
+		Target:  ns,
+		Program: prog,
+		Attach:  attach,
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	return &NetNsLink{link}, nil
+}
+
+// LoadPinnedNetNs loads a network namespace link from bpffs.
+func LoadPinnedNetNs(fileName string, opts *ebpf.LoadPinOptions) (*NetNsLink, error) {
+	link, err := LoadPinnedRawLink(fileName, NetNsType, opts)
+	if err != nil {
+		return nil, err
+	}
+
+	return &NetNsLink{link}, nil
+}
+
+// Info returns information about the link.
+func (nns *NetNsLink) Info() (*NetNsInfo, error) {
+	info, err := nns.RawLink.Info()
+	if err != nil {
+		return nil, err
+	}
+	return &NetNsInfo{*info}, nil
+}

+ 253 - 0
vendor/github.com/cilium/ebpf/link/perf_event.go

@@ -0,0 +1,253 @@
+package link
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"regexp"
+	"runtime"
+	"strconv"
+	"strings"
+	"unsafe"
+
+	"github.com/cilium/ebpf"
+	"github.com/cilium/ebpf/internal"
+	"github.com/cilium/ebpf/internal/unix"
+)
+
+// Getting the terminology right is usually the hardest part. For posterity and
+// for staying sane during implementation:
+//
+// - trace event: Representation of a kernel runtime hook. Filesystem entries
+//   under <tracefs>/events. Can be tracepoints (static), kprobes or uprobes.
+//   Can be instantiated into perf events (see below).
+// - tracepoint: A predetermined hook point in the kernel. Exposed as trace
+//   events in (sub)directories under <tracefs>/events. Cannot be closed or
+//   removed, they are static.
+// - k(ret)probe: Ephemeral trace events based on entry or exit points of
+//   exported kernel symbols. kprobe-based (tracefs) trace events can be
+//   created system-wide by writing to the <tracefs>/kprobe_events file, or
+//   they can be scoped to the current process by creating PMU perf events.
+// - perf event: An object instantiated based on an existing trace event or
+//   kernel symbol. Referred to by fd in userspace.
+//   Exactly one eBPF program can be attached to a perf event. Multiple perf
+//   events can be created from a single trace event. Closing a perf event
+//   stops any further invocations of the attached eBPF program.
+
+var (
+	tracefsPath = "/sys/kernel/debug/tracing"
+
+	// Trace event groups, names and kernel symbols must adhere to this set
+	// of characters. Non-empty, first character must not be a number, all
+	// characters must be alphanumeric or underscore.
+	rgxTraceEvent = regexp.MustCompile("^[a-zA-Z_][0-9a-zA-Z_]*$")
+
+	errInvalidInput = errors.New("invalid input")
+)
+
+const (
+	perfAllThreads = -1
+)
+
+// A perfEvent represents a perf event kernel object. Exactly one eBPF program
+// can be attached to it. It is created based on a tracefs trace event or a
+// Performance Monitoring Unit (PMU).
+type perfEvent struct {
+
+	// Group and name of the tracepoint/kprobe/uprobe.
+	group string
+	name  string
+
+	// PMU event ID read from sysfs. Valid IDs are non-zero.
+	pmuID uint64
+	// ID of the trace event read from tracefs. Valid IDs are non-zero.
+	tracefsID uint64
+
+	// True for kretprobes/uretprobes.
+	ret bool
+
+	fd       *internal.FD
+	progType ebpf.ProgramType
+}
+
+func (pe *perfEvent) isLink() {}
+
+func (pe *perfEvent) Pin(string) error {
+	return fmt.Errorf("pin perf event: %w", ErrNotSupported)
+}
+
+func (pe *perfEvent) Unpin() error {
+	return fmt.Errorf("unpin perf event: %w", ErrNotSupported)
+}
+
+// Since 4.15 (e87c6bc3852b "bpf: permit multiple bpf attachments for a single perf event"),
+// calling PERF_EVENT_IOC_SET_BPF appends the given program to a prog_array
+// owned by the perf event, which means multiple programs can be attached
+// simultaneously.
+//
+// Before 4.15, calling PERF_EVENT_IOC_SET_BPF more than once on a perf event
+// returns EEXIST.
+//
+// Detaching a program from a perf event is currently not possible, so a
+// program replacement mechanism cannot be implemented for perf events.
+func (pe *perfEvent) Update(prog *ebpf.Program) error {
+	return fmt.Errorf("can't replace eBPF program in perf event: %w", ErrNotSupported)
+}
+
+func (pe *perfEvent) Close() error {
+	if pe.fd == nil {
+		return nil
+	}
+
+	pfd, err := pe.fd.Value()
+	if err != nil {
+		return fmt.Errorf("getting perf event fd: %w", err)
+	}
+
+	err = unix.IoctlSetInt(int(pfd), unix.PERF_EVENT_IOC_DISABLE, 0)
+	if err != nil {
+		return fmt.Errorf("disabling perf event: %w", err)
+	}
+
+	err = pe.fd.Close()
+	if err != nil {
+		return fmt.Errorf("closing perf event fd: %w", err)
+	}
+
+	switch t := pe.progType; t {
+	case ebpf.Kprobe:
+		// For kprobes created using tracefs, clean up the <tracefs>/kprobe_events entry.
+		if pe.tracefsID != 0 {
+			return closeTraceFSKprobeEvent(pe.group, pe.name)
+		}
+	case ebpf.TracePoint:
+		// Tracepoint trace events don't hold any extra resources.
+		return nil
+	}
+
+	return nil
+}
+
+// attach the given eBPF prog to the perf event stored in pe.
+// pe must contain a valid perf event fd.
+// prog's type must match the program type stored in pe.
+func (pe *perfEvent) attach(prog *ebpf.Program) error {
+	if prog == nil {
+		return errors.New("cannot attach a nil program")
+	}
+	if pe.fd == nil {
+		return errors.New("cannot attach to nil perf event")
+	}
+	if t := prog.Type(); t != pe.progType {
+		return fmt.Errorf("invalid program type (expected %s): %s", pe.progType, t)
+	}
+	if prog.FD() < 0 {
+		return fmt.Errorf("invalid program: %w", internal.ErrClosedFd)
+	}
+
+	// The ioctl below will fail when the fd is invalid.
+	kfd, _ := pe.fd.Value()
+
+	// Assign the eBPF program to the perf event.
+	err := unix.IoctlSetInt(int(kfd), unix.PERF_EVENT_IOC_SET_BPF, prog.FD())
+	if err != nil {
+		return fmt.Errorf("setting perf event bpf program: %w", err)
+	}
+
+	// PERF_EVENT_IOC_ENABLE and _DISABLE ignore their given values.
+	if err := unix.IoctlSetInt(int(kfd), unix.PERF_EVENT_IOC_ENABLE, 0); err != nil {
+		return fmt.Errorf("enable perf event: %s", err)
+	}
+
+	// Close the perf event when its reference is lost to avoid leaking system resources.
+	runtime.SetFinalizer(pe, (*perfEvent).Close)
+	return nil
+}
+
+// unsafeStringPtr returns an unsafe.Pointer to a NUL-terminated copy of str.
+func unsafeStringPtr(str string) (unsafe.Pointer, error) {
+	p, err := unix.BytePtrFromString(str)
+	if err != nil {
+		return nil, err
+	}
+	return unsafe.Pointer(p), nil
+}
+
+// getTraceEventID reads a trace event's ID from tracefs given its group and name.
+// group and name must be alphanumeric or underscore, as required by the kernel.
+func getTraceEventID(group, name string) (uint64, error) {
+	tid, err := uint64FromFile(tracefsPath, "events", group, name, "id")
+	if errors.Is(err, ErrNotSupported) {
+		return 0, fmt.Errorf("trace event %s/%s: %w", group, name, ErrNotSupported)
+	}
+	if err != nil {
+		return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err)
+	}
+
+	return tid, nil
+}
+
+// getPMUEventType reads a Performance Monitoring Unit's type (numeric identifier)
+// from /sys/bus/event_source/devices/<pmu>/type.
+func getPMUEventType(pmu string) (uint64, error) {
+	et, err := uint64FromFile("/sys/bus/event_source/devices", pmu, "type")
+	if errors.Is(err, ErrNotSupported) {
+		return 0, fmt.Errorf("pmu type %s: %w", pmu, ErrNotSupported)
+	}
+	if err != nil {
+		return 0, fmt.Errorf("reading pmu type %s: %w", pmu, err)
+	}
+
+	return et, nil
+}
+
+// openTracepointPerfEvent opens a tracepoint-type perf event. System-wide
+// kprobes created by writing to <tracefs>/kprobe_events are tracepoints
+// behind the scenes, and can be attached to using these perf events.
+func openTracepointPerfEvent(tid uint64) (*internal.FD, error) {
+	attr := unix.PerfEventAttr{
+		Type:        unix.PERF_TYPE_TRACEPOINT,
+		Config:      tid,
+		Sample_type: unix.PERF_SAMPLE_RAW,
+		Sample:      1,
+		Wakeup:      1,
+	}
+
+	fd, err := unix.PerfEventOpen(&attr, perfAllThreads, 0, -1, unix.PERF_FLAG_FD_CLOEXEC)
+	if err != nil {
+		return nil, fmt.Errorf("opening tracepoint perf event: %w", err)
+	}
+
+	return internal.NewFD(uint32(fd)), nil
+}
+
+// uint64FromFile reads a uint64 from a file. All elements of path are sanitized
+// and joined onto base. Returns error if base no longer prefixes the path after
+// joining all components.
+func uint64FromFile(base string, path ...string) (uint64, error) {
+
+	// Resolve leaf path separately for error feedback. Makes the join onto
+	// base more readable (can't mix with variadic args).
+	l := filepath.Join(path...)
+
+	p := filepath.Join(base, l)
+	if !strings.HasPrefix(p, base) {
+		return 0, fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, errInvalidInput)
+	}
+
+	data, err := ioutil.ReadFile(p)
+	if os.IsNotExist(err) {
+		// Only echo leaf path, the base path can be prepended at the call site
+		// if more verbosity is required.
+		return 0, fmt.Errorf("symbol %s: %w", l, ErrNotSupported)
+	}
+	if err != nil {
+		return 0, fmt.Errorf("reading file %s: %w", p, err)
+	}
+
+	et := bytes.TrimSpace(data)
+	return strconv.ParseUint(string(et), 10, 64)
+}

+ 76 - 0
vendor/github.com/cilium/ebpf/link/program.go

@@ -0,0 +1,76 @@
+package link
+
+import (
+	"fmt"
+
+	"github.com/cilium/ebpf"
+	"github.com/cilium/ebpf/internal"
+)
+
+type RawAttachProgramOptions struct {
+	// File descriptor to attach to. This differs for each attach type.
+	Target int
+	// Program to attach.
+	Program *ebpf.Program
+	// Program to replace (cgroups).
+	Replace *ebpf.Program
+	// Attach must match the attach type of Program (and Replace).
+	Attach ebpf.AttachType
+	// Flags control the attach behaviour. This differs for each attach type.
+	Flags uint32
+}
+
+// RawAttachProgram is a low level wrapper around BPF_PROG_ATTACH.
+//
+// You should use one of the higher level abstractions available in this
+// package if possible.
+func RawAttachProgram(opts RawAttachProgramOptions) error {
+	if err := haveProgAttach(); err != nil {
+		return err
+	}
+
+	var replaceFd uint32
+	if opts.Replace != nil {
+		replaceFd = uint32(opts.Replace.FD())
+	}
+
+	attr := internal.BPFProgAttachAttr{
+		TargetFd:     uint32(opts.Target),
+		AttachBpfFd:  uint32(opts.Program.FD()),
+		ReplaceBpfFd: replaceFd,
+		AttachType:   uint32(opts.Attach),
+		AttachFlags:  uint32(opts.Flags),
+	}
+
+	if err := internal.BPFProgAttach(&attr); err != nil {
+		return fmt.Errorf("can't attach program: %s", err)
+	}
+	return nil
+}
+
+type RawDetachProgramOptions struct {
+	Target  int
+	Program *ebpf.Program
+	Attach  ebpf.AttachType
+}
+
+// RawDetachProgram is a low level wrapper around BPF_PROG_DETACH.
+//
+// You should use one of the higher level abstractions available in this
+// package if possible.
+func RawDetachProgram(opts RawDetachProgramOptions) error {
+	if err := haveProgAttach(); err != nil {
+		return err
+	}
+
+	attr := internal.BPFProgDetachAttr{
+		TargetFd:    uint32(opts.Target),
+		AttachBpfFd: uint32(opts.Program.FD()),
+		AttachType:  uint32(opts.Attach),
+	}
+	if err := internal.BPFProgDetach(&attr); err != nil {
+		return fmt.Errorf("can't detach program: %s", err)
+	}
+
+	return nil
+}

+ 61 - 0
vendor/github.com/cilium/ebpf/link/raw_tracepoint.go

@@ -0,0 +1,61 @@
+package link
+
+import (
+	"fmt"
+
+	"github.com/cilium/ebpf"
+	"github.com/cilium/ebpf/internal"
+)
+
+type RawTracepointOptions struct {
+	// Tracepoint name.
+	Name string
+	// Program must be of type RawTracepoint*
+	Program *ebpf.Program
+}
+
+// AttachRawTracepoint links a BPF program to a raw_tracepoint.
+//
+// Requires at least Linux 4.17.
+func AttachRawTracepoint(opts RawTracepointOptions) (Link, error) {
+	if t := opts.Program.Type(); t != ebpf.RawTracepoint && t != ebpf.RawTracepointWritable {
+		return nil, fmt.Errorf("invalid program type %s, expected RawTracepoint(Writable)", t)
+	}
+	if opts.Program.FD() < 0 {
+		return nil, fmt.Errorf("invalid program: %w", internal.ErrClosedFd)
+	}
+
+	fd, err := bpfRawTracepointOpen(&bpfRawTracepointOpenAttr{
+		name: internal.NewStringPointer(opts.Name),
+		fd:   uint32(opts.Program.FD()),
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	return &progAttachRawTracepoint{fd: fd}, nil
+}
+
+type progAttachRawTracepoint struct {
+	fd *internal.FD
+}
+
+var _ Link = (*progAttachRawTracepoint)(nil)
+
+func (rt *progAttachRawTracepoint) isLink() {}
+
+func (rt *progAttachRawTracepoint) Close() error {
+	return rt.fd.Close()
+}
+
+func (rt *progAttachRawTracepoint) Update(_ *ebpf.Program) error {
+	return fmt.Errorf("can't update raw_tracepoint: %w", ErrNotSupported)
+}
+
+func (rt *progAttachRawTracepoint) Pin(_ string) error {
+	return fmt.Errorf("can't pin raw_tracepoint: %w", ErrNotSupported)
+}
+
+func (rt *progAttachRawTracepoint) Unpin() error {
+	return fmt.Errorf("unpin raw_tracepoint: %w", ErrNotSupported)
+}

+ 173 - 0
vendor/github.com/cilium/ebpf/link/syscalls.go

@@ -0,0 +1,173 @@
+package link
+
+import (
+	"errors"
+	"unsafe"
+
+	"github.com/cilium/ebpf"
+	"github.com/cilium/ebpf/asm"
+	"github.com/cilium/ebpf/internal"
+	"github.com/cilium/ebpf/internal/unix"
+)
+
+// Type is the kind of link.
+type Type uint32
+
+// Valid link types.
+//
+// Equivalent to enum bpf_link_type.
+const (
+	UnspecifiedType Type = iota
+	RawTracepointType
+	TracingType
+	CgroupType
+	IterType
+	NetNsType
+	XDPType
+)
+
+var haveProgAttach = internal.FeatureTest("BPF_PROG_ATTACH", "4.10", func() error {
+	prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
+		Type:       ebpf.CGroupSKB,
+		AttachType: ebpf.AttachCGroupInetIngress,
+		License:    "MIT",
+		Instructions: asm.Instructions{
+			asm.Mov.Imm(asm.R0, 0),
+			asm.Return(),
+		},
+	})
+	if err != nil {
+		return internal.ErrNotSupported
+	}
+
+	// BPF_PROG_ATTACH was introduced at the same time as CGgroupSKB,
+	// so being able to load the program is enough to infer that we
+	// have the syscall.
+	prog.Close()
+	return nil
+})
+
+var haveProgAttachReplace = internal.FeatureTest("BPF_PROG_ATTACH atomic replacement", "5.5", func() error {
+	if err := haveProgAttach(); err != nil {
+		return err
+	}
+
+	prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
+		Type:       ebpf.CGroupSKB,
+		AttachType: ebpf.AttachCGroupInetIngress,
+		License:    "MIT",
+		Instructions: asm.Instructions{
+			asm.Mov.Imm(asm.R0, 0),
+			asm.Return(),
+		},
+	})
+	if err != nil {
+		return internal.ErrNotSupported
+	}
+	defer prog.Close()
+
+	// We know that we have BPF_PROG_ATTACH since we can load CGroupSKB programs.
+	// If passing BPF_F_REPLACE gives us EINVAL we know that the feature isn't
+	// present.
+	attr := internal.BPFProgAttachAttr{
+		// We rely on this being checked after attachFlags.
+		TargetFd:    ^uint32(0),
+		AttachBpfFd: uint32(prog.FD()),
+		AttachType:  uint32(ebpf.AttachCGroupInetIngress),
+		AttachFlags: uint32(flagReplace),
+	}
+
+	err = internal.BPFProgAttach(&attr)
+	if errors.Is(err, unix.EINVAL) {
+		return internal.ErrNotSupported
+	}
+	if errors.Is(err, unix.EBADF) {
+		return nil
+	}
+	return err
+})
+
+type bpfLinkCreateAttr struct {
+	progFd     uint32
+	targetFd   uint32
+	attachType ebpf.AttachType
+	flags      uint32
+}
+
+func bpfLinkCreate(attr *bpfLinkCreateAttr) (*internal.FD, error) {
+	ptr, err := internal.BPF(internal.BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
+	if err != nil {
+		return nil, err
+	}
+	return internal.NewFD(uint32(ptr)), nil
+}
+
+type bpfLinkUpdateAttr struct {
+	linkFd    uint32
+	newProgFd uint32
+	flags     uint32
+	oldProgFd uint32
+}
+
+func bpfLinkUpdate(attr *bpfLinkUpdateAttr) error {
+	_, err := internal.BPF(internal.BPF_LINK_UPDATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
+	return err
+}
+
+var haveBPFLink = internal.FeatureTest("bpf_link", "5.7", func() error {
+	prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
+		Type:       ebpf.CGroupSKB,
+		AttachType: ebpf.AttachCGroupInetIngress,
+		License:    "MIT",
+		Instructions: asm.Instructions{
+			asm.Mov.Imm(asm.R0, 0),
+			asm.Return(),
+		},
+	})
+	if err != nil {
+		return internal.ErrNotSupported
+	}
+	defer prog.Close()
+
+	attr := bpfLinkCreateAttr{
+		// This is a hopefully invalid file descriptor, which triggers EBADF.
+		targetFd:   ^uint32(0),
+		progFd:     uint32(prog.FD()),
+		attachType: ebpf.AttachCGroupInetIngress,
+	}
+	_, err = bpfLinkCreate(&attr)
+	if errors.Is(err, unix.EINVAL) {
+		return internal.ErrNotSupported
+	}
+	if errors.Is(err, unix.EBADF) {
+		return nil
+	}
+	return err
+})
+
+type bpfIterCreateAttr struct {
+	linkFd uint32
+	flags  uint32
+}
+
+func bpfIterCreate(attr *bpfIterCreateAttr) (*internal.FD, error) {
+	ptr, err := internal.BPF(internal.BPF_ITER_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
+	if err == nil {
+		return internal.NewFD(uint32(ptr)), nil
+	}
+	return nil, err
+}
+
+type bpfRawTracepointOpenAttr struct {
+	name internal.Pointer
+	fd   uint32
+	_    uint32
+}
+
+func bpfRawTracepointOpen(attr *bpfRawTracepointOpenAttr) (*internal.FD, error) {
+	ptr, err := internal.BPF(internal.BPF_RAW_TRACEPOINT_OPEN, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
+	if err == nil {
+		return internal.NewFD(uint32(ptr)), nil
+	}
+	return nil, err
+}

+ 56 - 0
vendor/github.com/cilium/ebpf/link/tracepoint.go

@@ -0,0 +1,56 @@
+package link
+
+import (
+	"fmt"
+
+	"github.com/cilium/ebpf"
+)
+
+// Tracepoint attaches the given eBPF program to the tracepoint with the given
+// group and name. See /sys/kernel/debug/tracing/events to find available
+// tracepoints. The top-level directory is the group, the event's subdirectory
+// is the name. Example:
+//
+//	Tracepoint("syscalls", "sys_enter_fork")
+//
+// Note that attaching eBPF programs to syscalls (sys_enter_*/sys_exit_*) is
+// only possible as of kernel 4.14 (commit cf5f5ce).
+func Tracepoint(group, name string, prog *ebpf.Program) (Link, error) {
+	if group == "" || name == "" {
+		return nil, fmt.Errorf("group and name cannot be empty: %w", errInvalidInput)
+	}
+	if prog == nil {
+		return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput)
+	}
+	if !rgxTraceEvent.MatchString(group) || !rgxTraceEvent.MatchString(name) {
+		return nil, fmt.Errorf("group and name '%s/%s' must be alphanumeric or underscore: %w", group, name, errInvalidInput)
+	}
+	if prog.Type() != ebpf.TracePoint {
+		return nil, fmt.Errorf("eBPF program type %s is not a Tracepoint: %w", prog.Type(), errInvalidInput)
+	}
+
+	tid, err := getTraceEventID(group, name)
+	if err != nil {
+		return nil, err
+	}
+
+	fd, err := openTracepointPerfEvent(tid)
+	if err != nil {
+		return nil, err
+	}
+
+	pe := &perfEvent{
+		fd:        fd,
+		tracefsID: tid,
+		group:     group,
+		name:      name,
+		progType:  ebpf.TracePoint,
+	}
+
+	if err := pe.attach(prog); err != nil {
+		pe.Close()
+		return nil, err
+	}
+
+	return pe, nil
+}

+ 6 - 6
vendor/github.com/containerd/cgroups/go.mod

@@ -3,16 +3,16 @@ module github.com/containerd/cgroups
 go 1.13
 
 require (
-	github.com/cilium/ebpf v0.0.0-20200702112145-1c8d4c9ef775
-	github.com/coreos/go-systemd/v22 v22.0.0
+	github.com/cilium/ebpf v0.4.0
+	github.com/coreos/go-systemd/v22 v22.1.0
 	github.com/cpuguy83/go-md2man/v2 v2.0.0 // indirect
 	github.com/docker/go-units v0.4.0
 	github.com/godbus/dbus/v5 v5.0.3
-	github.com/gogo/protobuf v1.3.1
+	github.com/gogo/protobuf v1.3.2
 	github.com/opencontainers/runtime-spec v1.0.2
 	github.com/pkg/errors v0.9.1
-	github.com/sirupsen/logrus v1.6.0
-	github.com/stretchr/testify v1.2.2
+	github.com/sirupsen/logrus v1.7.0
+	github.com/stretchr/testify v1.6.1
 	github.com/urfave/cli v1.22.2
-	golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9
+	golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c
 )

+ 4 - 0
vendor/github.com/containerd/cgroups/net_cls.go

@@ -55,3 +55,7 @@ func (n *netclsController) Create(path string, resources *specs.LinuxResources)
 	}
 	return nil
 }
+
+func (n *netclsController) Update(path string, resources *specs.LinuxResources) error {
+	return n.Create(path, resources)
+}

File diff suppressed because it is too large
+ 593 - 513
vendor/github.com/containerd/cgroups/stats/v1/metrics.pb.go


+ 5 - 2
vendor/github.com/containerd/cgroups/subsystem.go

@@ -18,6 +18,7 @@ package cgroups
 
 import (
 	"fmt"
+	"os"
 
 	v1 "github.com/containerd/cgroups/stats/v1"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
@@ -46,7 +47,6 @@ const (
 // available on most linux systems
 func Subsystems() []Name {
 	n := []Name{
-		Hugetlb,
 		Freezer,
 		Pids,
 		NetCLS,
@@ -59,9 +59,12 @@ func Subsystems() []Name {
 		Blkio,
 		Rdma,
 	}
-	if !isUserNS {
+	if !RunningInUserNS() {
 		n = append(n, Devices)
 	}
+	if _, err := os.Stat("/sys/kernel/mm/hugepages"); err == nil {
+		n = append(n, Hugetlb)
+	}
 	return n
 }
 

+ 31 - 26
vendor/github.com/containerd/cgroups/utils.go

@@ -36,7 +36,8 @@ import (
 )
 
 var (
-	isUserNS  = runningInUserNS()
+	nsOnce    sync.Once
+	inUserNS  bool
 	checkMode sync.Once
 	cgMode    CGMode
 )
@@ -81,33 +82,37 @@ func Mode() CGMode {
 	return cgMode
 }
 
-// runningInUserNS detects whether we are currently running in a user namespace.
+// RunningInUserNS detects whether we are currently running in a user namespace.
 // Copied from github.com/lxc/lxd/shared/util.go
-func runningInUserNS() bool {
-	file, err := os.Open("/proc/self/uid_map")
-	if err != nil {
-		// This kernel-provided file only exists if user namespaces are supported
-		return false
-	}
-	defer file.Close()
+func RunningInUserNS() bool {
+	nsOnce.Do(func() {
+		file, err := os.Open("/proc/self/uid_map")
+		if err != nil {
+			// This kernel-provided file only exists if user namespaces are supported
+			return
+		}
+		defer file.Close()
 
-	buf := bufio.NewReader(file)
-	l, _, err := buf.ReadLine()
-	if err != nil {
-		return false
-	}
+		buf := bufio.NewReader(file)
+		l, _, err := buf.ReadLine()
+		if err != nil {
+			return
+		}
 
-	line := string(l)
-	var a, b, c int64
-	fmt.Sscanf(line, "%d %d %d", &a, &b, &c)
-	/*
-	 * We assume we are in the initial user namespace if we have a full
-	 * range - 4294967295 uids starting at uid 0.
-	 */
-	if a == 0 && b == 0 && c == 4294967295 {
-		return false
-	}
-	return true
+		line := string(l)
+		var a, b, c int64
+		fmt.Sscanf(line, "%d %d %d", &a, &b, &c)
+
+		/*
+		 * We assume we are in the initial user namespace if we have a full
+		 * range - 4294967295 uids starting at uid 0.
+		 */
+		if a == 0 && b == 0 && c == 4294967295 {
+			return
+		}
+		inUserNS = true
+	})
+	return inUserNS
 }
 
 // defaults returns all known groups
@@ -132,7 +137,7 @@ func defaults(root string) ([]Subsystem, error) {
 	}
 	// only add the devices cgroup if we are not in a user namespace
 	// because modifications are not allowed
-	if !isUserNS {
+	if !RunningInUserNS() {
 		s = append(s, NewDevices(root))
 	}
 	// add the hugetlb cgroup if error wasn't due to missing hugetlb

+ 15 - 3
vendor/github.com/containerd/cgroups/v2/ebpf.go

@@ -19,6 +19,7 @@ package v2
 import (
 	"github.com/cilium/ebpf"
 	"github.com/cilium/ebpf/asm"
+	"github.com/cilium/ebpf/link"
 	"github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/pkg/errors"
 	"golang.org/x/sys/unix"
@@ -42,12 +43,23 @@ func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD
 	if err != nil {
 		return nilCloser, err
 	}
-	if err := prog.Attach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
+	err = link.RawAttachProgram(link.RawAttachProgramOptions{
+		Target:  dirFD,
+		Program: prog,
+		Attach:  ebpf.AttachCGroupDevice,
+		Flags:   unix.BPF_F_ALLOW_MULTI,
+	})
+	if err != nil {
 		return nilCloser, errors.Wrap(err, "failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
 	}
 	closer := func() error {
-		if err := prog.Detach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
-			return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
+		err = link.RawDetachProgram(link.RawDetachProgramOptions{
+			Target:  dirFD,
+			Program: prog,
+			Attach:  ebpf.AttachCGroupDevice,
+		})
+		if err != nil {
+			return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE)")
 		}
 		return nil
 	}

File diff suppressed because it is too large
+ 415 - 353
vendor/github.com/containerd/cgroups/v2/stats/metrics.pb.go


+ 2 - 1
vendor/github.com/containerd/containerd/README.md

@@ -49,7 +49,8 @@ Please be aware: nightly builds might have critical bugs, it's not recommended f
 
 Runtime requirements for containerd are very minimal. Most interactions with
 the Linux and Windows container feature sets are handled via [runc](https://github.com/opencontainers/runc) and/or
-OS-specific libraries (e.g. [hcsshim](https://github.com/Microsoft/hcsshim) for Microsoft). The current required version of `runc` is always listed in [RUNC.md](/docs/RUNC.md).
+OS-specific libraries (e.g. [hcsshim](https://github.com/Microsoft/hcsshim) for Microsoft).
+The current required version of `runc` is described in [RUNC.md](docs/RUNC.md).
 
 There are specific features
 used by containerd core code and snapshotters that will require a minimum kernel

+ 23 - 2
vendor/github.com/containerd/containerd/archive/tar.go

@@ -63,13 +63,34 @@ func Diff(ctx context.Context, a, b string) io.ReadCloser {
 }
 
 // WriteDiff writes a tar stream of the computed difference between the
-// provided directories.
+// provided paths.
 //
 // Produces a tar using OCI style file markers for deletions. Deleted
 // files will be prepended with the prefix ".wh.". This style is
 // based off AUFS whiteouts.
 // See https://github.com/opencontainers/image-spec/blob/master/layer.md
-func WriteDiff(ctx context.Context, w io.Writer, a, b string) error {
+func WriteDiff(ctx context.Context, w io.Writer, a, b string, opts ...WriteDiffOpt) error {
+	var options WriteDiffOptions
+	for _, opt := range opts {
+		if err := opt(&options); err != nil {
+			return errors.Wrap(err, "failed to apply option")
+		}
+	}
+	if options.writeDiffFunc == nil {
+		options.writeDiffFunc = writeDiffNaive
+	}
+
+	return options.writeDiffFunc(ctx, w, a, b, options)
+}
+
+// writeDiffNaive writes a tar stream of the computed difference between the
+// provided directories on disk.
+//
+// Produces a tar using OCI style file markers for deletions. Deleted
+// files will be prepended with the prefix ".wh.". This style is
+// based off AUFS whiteouts.
+// See https://github.com/opencontainers/image-spec/blob/master/layer.md
+func writeDiffNaive(ctx context.Context, w io.Writer, a, b string, _ WriteDiffOptions) error {
 	cw := newChangeWriter(w, b)
 	err := fs.Changes(ctx, a, b, cw.HandleChange)
 	if err != nil {

+ 1 - 1
vendor/github.com/containerd/containerd/archive/tar_freebsd.go

@@ -29,7 +29,7 @@ func mknod(path string, mode uint32, dev uint64) error {
 // lsetxattrCreate wraps unix.Lsetxattr with FreeBSD-specific flags and errors
 func lsetxattrCreate(link string, attr string, data []byte) error {
 	err := unix.Lsetxattr(link, attr, data, 0)
-	if err == unix.ENOTSUP|| err == unix.EEXIST {
+	if err == unix.ENOTSUP || err == unix.EEXIST {
 		return nil
 	}
 	return err

+ 10 - 0
vendor/github.com/containerd/containerd/archive/tar_opts.go

@@ -73,3 +73,13 @@ func WithParents(p []string) ApplyOpt {
 		return nil
 	}
 }
+
+// WriteDiffOptions provides additional options for a WriteDiff operation
+type WriteDiffOptions struct {
+	ParentLayers []string // Windows needs the full list of parent layers
+
+	writeDiffFunc func(context.Context, io.Writer, string, string, WriteDiffOptions) error
+}
+
+// WriteDiffOpt allows setting mutable archive write properties on creation
+type WriteDiffOpt func(options *WriteDiffOptions) error

+ 43 - 0
vendor/github.com/containerd/containerd/archive/tar_opts_windows.go

@@ -18,6 +18,19 @@
 
 package archive
 
+import (
+	"context"
+	"io"
+
+	"github.com/Microsoft/hcsshim/pkg/ociwclayer"
+)
+
+// applyWindowsLayer applies a tar stream of an OCI style diff tar of a Windows layer
+// See https://github.com/opencontainers/image-spec/blob/master/layer.md#applying-changesets
+func applyWindowsLayer(ctx context.Context, root string, r io.Reader, options ApplyOptions) (size int64, err error) {
+	return ociwclayer.ImportLayerFromTar(ctx, r, root, options.Parents)
+}
+
 // AsWindowsContainerLayer indicates that the tar stream to apply is that of
 // a Windows Container Layer. The caller must be holding SeBackupPrivilege and
 // SeRestorePrivilege.
@@ -27,3 +40,33 @@ func AsWindowsContainerLayer() ApplyOpt {
 		return nil
 	}
 }
+
+// writeDiffWindowsLayers writes a tar stream of the computed difference between the
+// provided Windows layers
+//
+// Produces a tar using OCI style file markers for deletions. Deleted
+// files will be prepended with the prefix ".wh.". This style is
+// based off AUFS whiteouts.
+// See https://github.com/opencontainers/image-spec/blob/master/layer.md
+func writeDiffWindowsLayers(ctx context.Context, w io.Writer, _, layer string, options WriteDiffOptions) error {
+	return ociwclayer.ExportLayerToTar(ctx, w, layer, options.ParentLayers)
+}
+
+// AsWindowsContainerLayerPair indicates that the paths to diff are a pair of
+// Windows Container Layers. The caller must be holding SeBackupPrivilege.
+func AsWindowsContainerLayerPair() WriteDiffOpt {
+	return func(options *WriteDiffOptions) error {
+		options.writeDiffFunc = writeDiffWindowsLayers
+		return nil
+	}
+}
+
+// WithParentLayers provides the Windows Container Layers that are the parents
+// of the target (right-hand, "upper") layer, if any. The source (left-hand, "lower")
+// layer passed to WriteDiff should be "" in this case.
+func WithParentLayers(p []string) WriteDiffOpt {
+	return func(options *WriteDiffOptions) error {
+		options.ParentLayers = p
+		return nil
+	}
+}

+ 2 - 2
vendor/github.com/containerd/containerd/archive/tar_unix.go

@@ -24,7 +24,7 @@ import (
 	"strings"
 	"syscall"
 
-	"github.com/containerd/containerd/sys"
+	"github.com/containerd/containerd/pkg/userns"
 	"github.com/containerd/continuity/fs"
 	"github.com/containerd/continuity/sysx"
 	"github.com/pkg/errors"
@@ -87,7 +87,7 @@ func skipFile(hdr *tar.Header) bool {
 	switch hdr.Typeflag {
 	case tar.TypeBlock, tar.TypeChar:
 		// cannot create a device if running in user namespace
-		return sys.RunningInUserNS()
+		return userns.RunningInUserNS()
 	default:
 		return false
 	}

+ 0 - 137
vendor/github.com/containerd/containerd/archive/tar_windows.go

@@ -20,33 +20,14 @@ package archive
 
 import (
 	"archive/tar"
-	"bufio"
-	"context"
 	"fmt"
-	"io"
 	"os"
-	"path"
-	"path/filepath"
 	"strings"
 
-	"github.com/Microsoft/go-winio"
-	"github.com/Microsoft/go-winio/backuptar"
-	"github.com/Microsoft/hcsshim"
 	"github.com/containerd/containerd/sys"
 	"github.com/pkg/errors"
 )
 
-var (
-	// mutatedFiles is a list of files that are mutated by the import process
-	// and must be backed up and restored.
-	mutatedFiles = map[string]string{
-		"UtilityVM/Files/EFI/Microsoft/Boot/BCD":      "bcd.bak",
-		"UtilityVM/Files/EFI/Microsoft/Boot/BCD.LOG":  "bcd.log.bak",
-		"UtilityVM/Files/EFI/Microsoft/Boot/BCD.LOG1": "bcd.log1.bak",
-		"UtilityVM/Files/EFI/Microsoft/Boot/BCD.LOG2": "bcd.log2.bak",
-	}
-)
-
 // tarName returns platform-specific filepath
 // to canonical posix-style path for tar archival. p is relative
 // path.
@@ -141,121 +122,3 @@ func copyDirInfo(fi os.FileInfo, path string) error {
 func copyUpXAttrs(dst, src string) error {
 	return nil
 }
-
-// applyWindowsLayer applies a tar stream of an OCI style diff tar of a Windows
-// layer using the hcsshim layer writer and backup streams.
-// See https://github.com/opencontainers/image-spec/blob/master/layer.md#applying-changesets
-func applyWindowsLayer(ctx context.Context, root string, r io.Reader, options ApplyOptions) (size int64, err error) {
-	home, id := filepath.Split(root)
-	info := hcsshim.DriverInfo{
-		HomeDir: home,
-	}
-
-	w, err := hcsshim.NewLayerWriter(info, id, options.Parents)
-	if err != nil {
-		return 0, err
-	}
-	defer func() {
-		if err2 := w.Close(); err2 != nil {
-			// This error should not be discarded as a failure here
-			// could result in an invalid layer on disk
-			if err == nil {
-				err = err2
-			}
-		}
-	}()
-
-	tr := tar.NewReader(r)
-	buf := bufio.NewWriter(nil)
-	hdr, nextErr := tr.Next()
-	// Iterate through the files in the archive.
-	for {
-		select {
-		case <-ctx.Done():
-			return 0, ctx.Err()
-		default:
-		}
-
-		if nextErr == io.EOF {
-			// end of tar archive
-			break
-		}
-		if nextErr != nil {
-			return 0, nextErr
-		}
-
-		// Note: path is used instead of filepath to prevent OS specific handling
-		// of the tar path
-		base := path.Base(hdr.Name)
-		if strings.HasPrefix(base, whiteoutPrefix) {
-			dir := path.Dir(hdr.Name)
-			originalBase := base[len(whiteoutPrefix):]
-			originalPath := path.Join(dir, originalBase)
-			if err := w.Remove(filepath.FromSlash(originalPath)); err != nil {
-				return 0, err
-			}
-			hdr, nextErr = tr.Next()
-		} else if hdr.Typeflag == tar.TypeLink {
-			err := w.AddLink(filepath.FromSlash(hdr.Name), filepath.FromSlash(hdr.Linkname))
-			if err != nil {
-				return 0, err
-			}
-			hdr, nextErr = tr.Next()
-		} else {
-			name, fileSize, fileInfo, err := backuptar.FileInfoFromHeader(hdr)
-			if err != nil {
-				return 0, err
-			}
-			if err := w.Add(filepath.FromSlash(name), fileInfo); err != nil {
-				return 0, err
-			}
-			size += fileSize
-			hdr, nextErr = tarToBackupStreamWithMutatedFiles(buf, w, tr, hdr, root)
-		}
-	}
-
-	return
-}
-
-// tarToBackupStreamWithMutatedFiles reads data from a tar stream and
-// writes it to a backup stream, and also saves any files that will be mutated
-// by the import layer process to a backup location.
-func tarToBackupStreamWithMutatedFiles(buf *bufio.Writer, w io.Writer, t *tar.Reader, hdr *tar.Header, root string) (nextHdr *tar.Header, err error) {
-	var (
-		bcdBackup       *os.File
-		bcdBackupWriter *winio.BackupFileWriter
-	)
-	if backupPath, ok := mutatedFiles[hdr.Name]; ok {
-		bcdBackup, err = os.Create(filepath.Join(root, backupPath))
-		if err != nil {
-			return nil, err
-		}
-		defer func() {
-			cerr := bcdBackup.Close()
-			if err == nil {
-				err = cerr
-			}
-		}()
-
-		bcdBackupWriter = winio.NewBackupFileWriter(bcdBackup, false)
-		defer func() {
-			cerr := bcdBackupWriter.Close()
-			if err == nil {
-				err = cerr
-			}
-		}()
-
-		buf.Reset(io.MultiWriter(w, bcdBackupWriter))
-	} else {
-		buf.Reset(w)
-	}
-
-	defer func() {
-		ferr := buf.Flush()
-		if err == nil {
-			err = ferr
-		}
-	}()
-
-	return backuptar.WriteBackupStreamFromTarFile(buf, t, hdr)
-}

+ 1 - 1
vendor/github.com/containerd/containerd/cio/io.go

@@ -80,7 +80,7 @@ type FIFOSet struct {
 
 // Close the FIFOSet
 func (f *FIFOSet) Close() error {
-	if f.close != nil {
+	if f != nil && f.close != nil {
 		return f.close()
 	}
 	return nil

+ 10 - 12
vendor/github.com/containerd/containerd/cio/io_unix.go

@@ -103,38 +103,36 @@ func copyIO(fifos *FIFOSet, ioset *Streams) (*cio, error) {
 	}, nil
 }
 
-func openFifos(ctx context.Context, fifos *FIFOSet) (pipes, error) {
-	var err error
+func openFifos(ctx context.Context, fifos *FIFOSet) (f pipes, retErr error) {
 	defer func() {
-		if err != nil {
+		if retErr != nil {
 			fifos.Close()
 		}
 	}()
 
-	var f pipes
 	if fifos.Stdin != "" {
-		if f.Stdin, err = fifo.OpenFifo(ctx, fifos.Stdin, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
-			return f, errors.Wrapf(err, "failed to open stdin fifo")
+		if f.Stdin, retErr = fifo.OpenFifo(ctx, fifos.Stdin, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); retErr != nil {
+			return f, errors.Wrapf(retErr, "failed to open stdin fifo")
 		}
 		defer func() {
-			if err != nil && f.Stdin != nil {
+			if retErr != nil && f.Stdin != nil {
 				f.Stdin.Close()
 			}
 		}()
 	}
 	if fifos.Stdout != "" {
-		if f.Stdout, err = fifo.OpenFifo(ctx, fifos.Stdout, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
-			return f, errors.Wrapf(err, "failed to open stdout fifo")
+		if f.Stdout, retErr = fifo.OpenFifo(ctx, fifos.Stdout, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); retErr != nil {
+			return f, errors.Wrapf(retErr, "failed to open stdout fifo")
 		}
 		defer func() {
-			if err != nil && f.Stdout != nil {
+			if retErr != nil && f.Stdout != nil {
 				f.Stdout.Close()
 			}
 		}()
 	}
 	if !fifos.Terminal && fifos.Stderr != "" {
-		if f.Stderr, err = fifo.OpenFifo(ctx, fifos.Stderr, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
-			return f, errors.Wrapf(err, "failed to open stderr fifo")
+		if f.Stderr, retErr = fifo.OpenFifo(ctx, fifos.Stderr, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); retErr != nil {
+			return f, errors.Wrapf(retErr, "failed to open stderr fifo")
 		}
 	}
 	return f, nil

+ 12 - 24
vendor/github.com/containerd/containerd/cio/io_windows.go

@@ -20,7 +20,6 @@ import (
 	"context"
 	"fmt"
 	"io"
-	"net"
 
 	winio "github.com/Microsoft/go-winio"
 	"github.com/containerd/containerd/log"
@@ -43,22 +42,21 @@ func NewFIFOSetInDir(_, id string, terminal bool) (*FIFOSet, error) {
 	}, nil), nil
 }
 
-func copyIO(fifos *FIFOSet, ioset *Streams) (*cio, error) {
-	var (
-		set []io.Closer
-	)
+func copyIO(fifos *FIFOSet, ioset *Streams) (_ *cio, retErr error) {
+	cios := &cio{config: fifos.Config}
+
+	defer func() {
+		if retErr != nil {
+			_ = cios.Close()
+		}
+	}()
 
 	if fifos.Stdin != "" {
 		l, err := winio.ListenPipe(fifos.Stdin, nil)
 		if err != nil {
 			return nil, errors.Wrapf(err, "failed to create stdin pipe %s", fifos.Stdin)
 		}
-		defer func(l net.Listener) {
-			if err != nil {
-				l.Close()
-			}
-		}(l)
-		set = append(set, l)
+		cios.closers = append(cios.closers, l)
 
 		go func() {
 			c, err := l.Accept()
@@ -81,12 +79,7 @@ func copyIO(fifos *FIFOSet, ioset *Streams) (*cio, error) {
 		if err != nil {
 			return nil, errors.Wrapf(err, "failed to create stdout pipe %s", fifos.Stdout)
 		}
-		defer func(l net.Listener) {
-			if err != nil {
-				l.Close()
-			}
-		}(l)
-		set = append(set, l)
+		cios.closers = append(cios.closers, l)
 
 		go func() {
 			c, err := l.Accept()
@@ -109,12 +102,7 @@ func copyIO(fifos *FIFOSet, ioset *Streams) (*cio, error) {
 		if err != nil {
 			return nil, errors.Wrapf(err, "failed to create stderr pipe %s", fifos.Stderr)
 		}
-		defer func(l net.Listener) {
-			if err != nil {
-				l.Close()
-			}
-		}(l)
-		set = append(set, l)
+		cios.closers = append(cios.closers, l)
 
 		go func() {
 			c, err := l.Accept()
@@ -132,7 +120,7 @@ func copyIO(fifos *FIFOSet, ioset *Streams) (*cio, error) {
 		}()
 	}
 
-	return &cio{config: fifos.Config, closers: set}, nil
+	return cios, nil
 }
 
 // NewDirectIO returns an IO implementation that exposes the IO streams as io.ReadCloser

+ 19 - 1
vendor/github.com/containerd/containerd/client.go

@@ -63,6 +63,7 @@ import (
 	ocispec "github.com/opencontainers/image-spec/specs-go/v1"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/pkg/errors"
+	"golang.org/x/sync/semaphore"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/backoff"
 	"google.golang.org/grpc/health/grpc_health_v1"
@@ -226,6 +227,11 @@ func (c *Client) Reconnect() error {
 	return nil
 }
 
+// Runtime returns the name of the runtime being used
+func (c *Client) Runtime() string {
+	return c.runtime
+}
+
 // IsServing returns true if the client can successfully connect to the
 // containerd daemon and the healthcheck service returns the SERVING
 // response.
@@ -350,6 +356,9 @@ type RemoteContext struct {
 	// MaxConcurrentDownloads is the max concurrent content downloads for each pull.
 	MaxConcurrentDownloads int
 
+	// MaxConcurrentUploadedLayers is the max concurrent uploaded layers for each push.
+	MaxConcurrentUploadedLayers int
+
 	// AllMetadata downloads all manifests and known-configuration files
 	AllMetadata bool
 
@@ -458,7 +467,12 @@ func (c *Client) Push(ctx context.Context, ref string, desc ocispec.Descriptor,
 		wrapper = pushCtx.HandlerWrapper
 	}
 
-	return remotes.PushContent(ctx, pusher, desc, c.ContentStore(), pushCtx.PlatformMatcher, wrapper)
+	var limiter *semaphore.Weighted
+	if pushCtx.MaxConcurrentUploadedLayers > 0 {
+		limiter = semaphore.NewWeighted(int64(pushCtx.MaxConcurrentUploadedLayers))
+	}
+
+	return remotes.PushContent(ctx, pusher, desc, c.ContentStore(), limiter, pushCtx.PlatformMatcher, wrapper)
 }
 
 // GetImage returns an existing image
@@ -715,10 +729,12 @@ func (c *Client) Version(ctx context.Context) (Version, error) {
 	}, nil
 }
 
+// ServerInfo represents the introspected server information
 type ServerInfo struct {
 	UUID string
 }
 
+// Server returns server information from the introspection service
 func (c *Client) Server(ctx context.Context) (ServerInfo, error) {
 	c.connMu.Lock()
 	if c.conn == nil {
@@ -784,6 +800,8 @@ func CheckRuntime(current, expected string) bool {
 	return true
 }
 
+// GetSnapshotterSupportedPlatforms returns a platform matchers which represents the
+// supported platforms for the given snapshotters
 func (c *Client) GetSnapshotterSupportedPlatforms(ctx context.Context, snapshotterName string) (platforms.MatchComparer, error) {
 	filters := []string{fmt.Sprintf("type==%s, id==%s", plugin.SnapshotPlugin, snapshotterName)}
 	in := c.IntrospectionService()

+ 8 - 0
vendor/github.com/containerd/containerd/client_opts.go

@@ -228,6 +228,14 @@ func WithMaxConcurrentDownloads(max int) RemoteOpt {
 	}
 }
 
+// WithMaxConcurrentUploadedLayers sets max concurrent uploaded layer limit.
+func WithMaxConcurrentUploadedLayers(max int) RemoteOpt {
+	return func(client *Client, c *RemoteContext) error {
+		c.MaxConcurrentUploadedLayers = max
+		return nil
+	}
+}
+
 // WithAllMetadata downloads all manifests and known-configuration files
 func WithAllMetadata() RemoteOpt {
 	return func(_ *Client, c *RemoteContext) error {

+ 5 - 5
vendor/github.com/containerd/containerd/container.go

@@ -32,7 +32,7 @@ import (
 	"github.com/containerd/containerd/images"
 	"github.com/containerd/containerd/oci"
 	"github.com/containerd/containerd/runtime/v2/runc/options"
-	"github.com/containerd/containerd/sys"
+	"github.com/containerd/fifo"
 	"github.com/containerd/typeurl"
 	prototypes "github.com/gogo/protobuf/types"
 	ver "github.com/opencontainers/image-spec/specs-go"
@@ -435,12 +435,12 @@ func loadFifos(response *tasks.GetResponse) *cio.FIFOSet {
 			err  error
 			dirs = map[string]struct{}{}
 		)
-		for _, fifo := range fifos {
-			if isFifo, _ := sys.IsFifo(fifo); isFifo {
-				if rerr := os.Remove(fifo); err == nil {
+		for _, f := range fifos {
+			if isFifo, _ := fifo.IsFifo(f); isFifo {
+				if rerr := os.Remove(f); err == nil {
 					err = rerr
 				}
-				dirs[filepath.Dir(fifo)] = struct{}{}
+				dirs[filepath.Dir(f)] = struct{}{}
 			}
 		}
 		for dir := range dirs {

+ 1 - 1
vendor/github.com/containerd/containerd/content/adaptor.go

@@ -22,7 +22,7 @@ import (
 	"github.com/containerd/containerd/filters"
 )
 
-// AdoptInfo returns `filters.Adaptor` that handles `content.Info`.
+// AdaptInfo returns `filters.Adaptor` that handles `content.Info`.
 func AdaptInfo(info Info) filters.Adaptor {
 	return filters.AdapterFunc(func(fieldpath []string) (string, bool) {
 		if len(fieldpath) == 0 {

+ 1 - 1
vendor/github.com/containerd/containerd/content/content.go

@@ -37,7 +37,7 @@ type Provider interface {
 	// ReaderAt only requires desc.Digest to be set.
 	// Other fields in the descriptor may be used internally for resolving
 	// the location of the actual data.
-	ReaderAt(ctx context.Context, dec ocispec.Descriptor) (ReaderAt, error)
+	ReaderAt(ctx context.Context, desc ocispec.Descriptor) (ReaderAt, error)
 }
 
 // Ingester writes content

+ 28 - 0
vendor/github.com/containerd/containerd/content/local/readerat.go

@@ -18,6 +18,11 @@ package local
 
 import (
 	"os"
+
+	"github.com/pkg/errors"
+
+	"github.com/containerd/containerd/content"
+	"github.com/containerd/containerd/errdefs"
 )
 
 // readerat implements io.ReaderAt in a completely stateless manner by opening
@@ -27,6 +32,29 @@ type sizeReaderAt struct {
 	fp   *os.File
 }
 
+// OpenReader creates ReaderAt from a file
+func OpenReader(p string) (content.ReaderAt, error) {
+	fi, err := os.Stat(p)
+	if err != nil {
+		if !os.IsNotExist(err) {
+			return nil, err
+		}
+
+		return nil, errors.Wrap(errdefs.ErrNotFound, "blob not found")
+	}
+
+	fp, err := os.Open(p)
+	if err != nil {
+		if !os.IsNotExist(err) {
+			return nil, err
+		}
+
+		return nil, errors.Wrap(errdefs.ErrNotFound, "blob not found")
+	}
+
+	return sizeReaderAt{size: fi.Size(), fp: fp}, nil
+}
+
 func (ra sizeReaderAt) ReadAt(p []byte, offset int64) (int, error) {
 	return ra.fp.ReadAt(p, offset)
 }

+ 4 - 16
vendor/github.com/containerd/containerd/content/local/store.go

@@ -131,25 +131,13 @@ func (s *store) ReaderAt(ctx context.Context, desc ocispec.Descriptor) (content.
 	if err != nil {
 		return nil, errors.Wrapf(err, "calculating blob path for ReaderAt")
 	}
-	fi, err := os.Stat(p)
-	if err != nil {
-		if !os.IsNotExist(err) {
-			return nil, err
-		}
-
-		return nil, errors.Wrapf(errdefs.ErrNotFound, "blob %s expected at %s", desc.Digest, p)
-	}
 
-	fp, err := os.Open(p)
+	reader, err := OpenReader(p)
 	if err != nil {
-		if !os.IsNotExist(err) {
-			return nil, err
-		}
-
-		return nil, errors.Wrapf(errdefs.ErrNotFound, "blob %s expected at %s", desc.Digest, p)
+		return nil, errors.Wrapf(err, "blob %s expected at %s", desc.Digest, p)
 	}
 
-	return sizeReaderAt{size: fi.Size(), fp: fp}, nil
+	return reader, nil
 }
 
 // Delete removes a blob by its digest.
@@ -477,7 +465,6 @@ func (s *store) Writer(ctx context.Context, opts ...content.WriterOpt) (content.
 	}
 	var lockErr error
 	for count := uint64(0); count < 10; count++ {
-		time.Sleep(time.Millisecond * time.Duration(rand.Intn(1<<count)))
 		if err := tryLock(wOpts.Ref); err != nil {
 			if !errdefs.IsUnavailable(err) {
 				return nil, err
@@ -488,6 +475,7 @@ func (s *store) Writer(ctx context.Context, opts ...content.WriterOpt) (content.
 			lockErr = nil
 			break
 		}
+		time.Sleep(time.Millisecond * time.Duration(rand.Intn(1<<count)))
 	}
 
 	if lockErr != nil {

+ 8 - 1
vendor/github.com/containerd/containerd/content/proxy/content_writer.go

@@ -97,7 +97,14 @@ func (rw *remoteWriter) Write(p []byte) (n int, err error) {
 	return
 }
 
-func (rw *remoteWriter) Commit(ctx context.Context, size int64, expected digest.Digest, opts ...content.Opt) error {
+func (rw *remoteWriter) Commit(ctx context.Context, size int64, expected digest.Digest, opts ...content.Opt) (err error) {
+	defer func() {
+		err1 := rw.Close()
+		if err == nil {
+			err = err1
+		}
+	}()
+
 	var base content.Info
 	for _, opt := range opts {
 		if err := opt(&base); err != nil {

+ 2 - 0
vendor/github.com/containerd/containerd/defaults/defaults_unix.go

@@ -34,4 +34,6 @@ const (
 	DefaultFIFODir = "/run/containerd/fifo"
 	// DefaultRuntime is the default linux runtime
 	DefaultRuntime = "io.containerd.runc.v2"
+	// DefaultConfigDir is the default location for config files.
+	DefaultConfigDir = "/etc/containerd"
 )

+ 3 - 0
vendor/github.com/containerd/containerd/defaults/defaults_windows.go

@@ -30,6 +30,9 @@ var (
 	// DefaultStateDir is the default location used by containerd to store
 	// transient data
 	DefaultStateDir = filepath.Join(os.Getenv("ProgramData"), "containerd", "state")
+
+	// DefaultConfigDir is the default location for config files.
+	DefaultConfigDir = filepath.Join(os.Getenv("programfiles"), "containerd")
 )
 
 const (

+ 4 - 0
vendor/github.com/containerd/containerd/diff/stream.go

@@ -168,6 +168,10 @@ func (c *compressedProcessor) Close() error {
 	return c.rc.Close()
 }
 
+// BinaryHandler creates a new stream processor handler which calls out to the given binary.
+// The id is used to identify the stream processor and allows the caller to send
+// payloads specific for that stream processor (i.e. decryption keys for decrypt stream processor).
+// The binary will be called for the provided mediaTypes and return the given media type.
 func BinaryHandler(id, returnsMediaType string, mediaTypes []string, path string, args, env []string) Handler {
 	set := make(map[string]struct{}, len(mediaTypes))
 	for _, m := range mediaTypes {

+ 42 - 37
vendor/github.com/containerd/containerd/go.mod

@@ -1,26 +1,24 @@
 module github.com/containerd/containerd
 
-go 1.15
+go 1.16
 
 require (
-	github.com/BurntSushi/toml v0.3.1
-	github.com/Microsoft/go-winio v0.4.16
-	github.com/Microsoft/hcsshim v0.8.14
-	github.com/Microsoft/hcsshim/test v0.0.0-20201218223536-d3e5debf77da
-	github.com/containerd/aufs v0.0.0-20200908144142-dab0cbea06f4
-	github.com/containerd/btrfs v0.0.0-20201111183144-404b9149801e
-	github.com/containerd/cgroups v0.0.0-20200824123100-0b889c03f102
-	github.com/containerd/console v1.0.1
-	github.com/containerd/continuity v0.0.0-20201208142359-180525291bb7
-	github.com/containerd/fifo v0.0.0-20201026212402-0724c46b320c
-	github.com/containerd/go-cni v1.0.1
-	github.com/containerd/go-runc v0.0.0-20200220073739-7016d3ce2328
-	github.com/containerd/imgcrypt v1.0.1
-	github.com/containerd/nri v0.0.0-20201007170849-eb1350a75164
+	github.com/Microsoft/go-winio v0.4.17
+	github.com/Microsoft/hcsshim v0.8.16
+	github.com/containerd/aufs v1.0.0
+	github.com/containerd/btrfs v1.0.0
+	github.com/containerd/cgroups v1.0.1
+	github.com/containerd/console v1.0.2
+	github.com/containerd/continuity v0.1.0
+	github.com/containerd/fifo v1.0.0
+	github.com/containerd/go-cni v1.0.2
+	github.com/containerd/go-runc v1.0.0
+	github.com/containerd/imgcrypt v1.1.1
+	github.com/containerd/nri v0.1.0
 	github.com/containerd/ttrpc v1.0.2
-	github.com/containerd/typeurl v1.0.1
-	github.com/containerd/zfs v0.0.0-20200918131355-0a33824f23a2
-	github.com/containernetworking/plugins v0.8.6
+	github.com/containerd/typeurl v1.0.2
+	github.com/containerd/zfs v1.0.0
+	github.com/containernetworking/plugins v0.9.1
 	github.com/coreos/go-systemd/v22 v22.1.0
 	github.com/davecgh/go-spew v1.1.1
 	github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c
@@ -31,43 +29,50 @@ require (
 	github.com/gogo/googleapis v1.4.0
 	github.com/gogo/protobuf v1.3.2
 	github.com/golang/protobuf v1.4.3
-	github.com/google/go-cmp v0.5.2
-	github.com/google/uuid v1.1.2
+	github.com/google/go-cmp v0.5.4
+	github.com/google/uuid v1.2.0
 	github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0
 	github.com/hashicorp/go-multierror v1.0.0
-	github.com/imdario/mergo v0.3.10
-	github.com/klauspost/compress v1.11.3
-	github.com/moby/sys/mountinfo v0.4.0
+	github.com/imdario/mergo v0.3.11
+	github.com/klauspost/compress v1.11.13
+	github.com/moby/locker v1.0.1
+	github.com/moby/sys/mountinfo v0.4.1
 	github.com/moby/sys/symlink v0.1.0
 	github.com/opencontainers/go-digest v1.0.0
 	github.com/opencontainers/image-spec v1.0.1
-	github.com/opencontainers/runc v1.0.0-rc92
-	github.com/opencontainers/runtime-spec v1.0.3-0.20200728170252-4d89ac9fbff6
+	github.com/opencontainers/runc v1.0.0-rc93
+	github.com/opencontainers/runtime-spec v1.0.3-0.20200929063507-e6143ca7d51d
 	github.com/opencontainers/selinux v1.8.0
+	github.com/pelletier/go-toml v1.8.1
 	github.com/pkg/errors v0.9.1
 	github.com/prometheus/client_golang v1.7.1
+	github.com/prometheus/procfs v0.6.0 // indirect; temporarily force v0.6.0, which was previously defined in imgcrypt as explicit version
+	github.com/satori/go.uuid v1.2.0 // indirect
 	github.com/sirupsen/logrus v1.7.0
 	github.com/stretchr/testify v1.6.1
-	github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2
 	github.com/tchap/go-patricia v2.2.6+incompatible
 	github.com/urfave/cli v1.22.2
 	go.etcd.io/bbolt v1.3.5
-	golang.org/x/net v0.0.0-20201110031124-69a78807bb2b
-	golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9
-	golang.org/x/sys v0.0.0-20201202213521-69691e467435
-	google.golang.org/grpc v1.30.0
-	gotest.tools/v3 v3.0.2
-	k8s.io/api v0.20.1
-	k8s.io/apimachinery v0.20.1
-	k8s.io/apiserver v0.20.1
-	k8s.io/client-go v0.20.1
-	k8s.io/component-base v0.20.1
-	k8s.io/cri-api v0.20.1
+	golang.org/x/net v0.0.0-20210226172049-e18ecbb05110
+	golang.org/x/sync v0.0.0-20201207232520-09787c993a3a
+	golang.org/x/sys v0.0.0-20210324051608-47abb6519492
+	google.golang.org/grpc v1.33.2
+	gotest.tools/v3 v3.0.3
+	k8s.io/api v0.20.6
+	k8s.io/apimachinery v0.20.6
+	k8s.io/apiserver v0.20.6
+	k8s.io/client-go v0.20.6
+	k8s.io/component-base v0.20.6
+	k8s.io/cri-api v0.20.6
 	k8s.io/klog/v2 v2.4.0
 	k8s.io/utils v0.0.0-20201110183641-67b214c5f920
 )
 
+// When updating replace rules, make sure to also update the rules in integration/client/go.mod
 replace (
+	// prevent transitional dependencies due to containerd having a circular
+	// dependency on itself through plugins. see .empty-mod/go.mod for details
+	github.com/containerd/containerd => ./.empty-mod/
 	github.com/gogo/googleapis => github.com/gogo/googleapis v1.3.2
 	github.com/golang/protobuf => github.com/golang/protobuf v1.3.5
 	// urfave/cli must be <= v1.22.1 due to a regression: https://github.com/urfave/cli/issues/1092

+ 4 - 3
vendor/github.com/containerd/containerd/images/diffid.go

@@ -17,10 +17,10 @@
 package images
 
 import (
-	"compress/gzip"
 	"context"
 	"io"
 
+	"github.com/containerd/containerd/archive/compression"
 	"github.com/containerd/containerd/content"
 	"github.com/containerd/containerd/labels"
 	"github.com/opencontainers/go-digest"
@@ -55,13 +55,14 @@ func GetDiffID(ctx context.Context, cs content.Store, desc ocispec.Descriptor) (
 	}
 	defer ra.Close()
 	r := content.NewReader(ra)
-	gzR, err := gzip.NewReader(r)
+	uR, err := compression.DecompressStream(r)
 	if err != nil {
 		return "", err
 	}
+	defer uR.Close()
 	digester := digest.Canonical.Digester()
 	hashW := digester.Hash()
-	if _, err := io.Copy(hashW, gzR); err != nil {
+	if _, err := io.Copy(hashW, uR); err != nil {
 		return "", err
 	}
 	if err := ra.Close(); err != nil {

+ 3 - 0
vendor/github.com/containerd/containerd/images/mediatypes.go

@@ -49,6 +49,9 @@ const (
 	MediaTypeContainerd1CheckpointRuntimeOptions = "application/vnd.containerd.container.checkpoint.runtime.options+proto"
 	// Legacy Docker schema1 manifest
 	MediaTypeDockerSchema1Manifest = "application/vnd.docker.distribution.manifest.v1+prettyjws"
+	// Encypted media types
+	MediaTypeImageLayerEncrypted     = ocispec.MediaTypeImageLayer + "+encrypted"
+	MediaTypeImageLayerGzipEncrypted = ocispec.MediaTypeImageLayerGzip + "+encrypted"
 )
 
 // DiffCompression returns the compression as defined by the layer diff media

+ 88 - 0
vendor/github.com/containerd/containerd/metadata/boltutil/helpers.go

@@ -19,6 +19,8 @@ package boltutil
 import (
 	"time"
 
+	"github.com/gogo/protobuf/proto"
+	"github.com/gogo/protobuf/types"
 	"github.com/pkg/errors"
 	bolt "go.etcd.io/bbolt"
 )
@@ -28,6 +30,7 @@ var (
 	bucketKeyLabels      = []byte("labels")
 	bucketKeyCreatedAt   = []byte("createdat")
 	bucketKeyUpdatedAt   = []byte("updatedat")
+	bucketKeyExtensions  = []byte("extensions")
 )
 
 // ReadLabels reads the labels key from the bucket
@@ -145,3 +148,88 @@ func WriteTimestamps(bkt *bolt.Bucket, created, updated time.Time) error {
 
 	return nil
 }
+
+// WriteExtensions will write a KV map to the given bucket,
+// where `K` is a string key and `V` is a protobuf's Any type that represents a generic extension.
+func WriteExtensions(bkt *bolt.Bucket, extensions map[string]types.Any) error {
+	if len(extensions) == 0 {
+		return nil
+	}
+
+	ebkt, err := bkt.CreateBucketIfNotExists(bucketKeyExtensions)
+	if err != nil {
+		return err
+	}
+
+	for name, ext := range extensions {
+		p, err := proto.Marshal(&ext)
+		if err != nil {
+			return err
+		}
+
+		if err := ebkt.Put([]byte(name), p); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// ReadExtensions will read back a map of extensions from the given bucket, previously written by WriteExtensions
+func ReadExtensions(bkt *bolt.Bucket) (map[string]types.Any, error) {
+	var (
+		extensions = make(map[string]types.Any)
+		ebkt       = bkt.Bucket(bucketKeyExtensions)
+	)
+
+	if ebkt == nil {
+		return extensions, nil
+	}
+
+	if err := ebkt.ForEach(func(k, v []byte) error {
+		var t types.Any
+		if err := proto.Unmarshal(v, &t); err != nil {
+			return err
+		}
+
+		extensions[string(k)] = t
+		return nil
+	}); err != nil {
+		return nil, err
+	}
+
+	return extensions, nil
+}
+
+// WriteAny write a protobuf's Any type to the bucket
+func WriteAny(bkt *bolt.Bucket, name []byte, any *types.Any) error {
+	if any == nil {
+		return nil
+	}
+
+	data, err := proto.Marshal(any)
+	if err != nil {
+		return err
+	}
+
+	if err := bkt.Put(name, data); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// ReadAny reads back protobuf's Any type from the bucket
+func ReadAny(bkt *bolt.Bucket, name []byte) (*types.Any, error) {
+	bytes := bkt.Get(name)
+	if bytes == nil {
+		return nil, nil
+	}
+
+	out := types.Any{}
+	if err := proto.Unmarshal(bytes, &out); err != nil {
+		return nil, errors.Wrap(err, "failed to unmarshal any")
+	}
+
+	return &out, nil
+}

+ 11 - 58
vendor/github.com/containerd/containerd/metadata/containers.go

@@ -336,16 +336,11 @@ func readContainer(container *containers.Container, bkt *bolt.Bucket) error {
 				container.Runtime.Name = string(n)
 			}
 
-			obkt := rbkt.Get(bucketKeyOptions)
-			if obkt == nil {
-				return nil
-			}
-
-			var any types.Any
-			if err := proto.Unmarshal(obkt, &any); err != nil {
+			any, err := boltutil.ReadAny(rbkt, bucketKeyOptions)
+			if err != nil {
 				return err
 			}
-			container.Runtime.Options = &any
+			container.Runtime.Options = any
 		case string(bucketKeySpec):
 			var any types.Any
 			if err := proto.Unmarshal(v, &any); err != nil {
@@ -357,22 +352,8 @@ func readContainer(container *containers.Container, bkt *bolt.Bucket) error {
 		case string(bucketKeySnapshotter):
 			container.Snapshotter = string(v)
 		case string(bucketKeyExtensions):
-			ebkt := bkt.Bucket(bucketKeyExtensions)
-			if ebkt == nil {
-				return nil
-			}
-
-			extensions := make(map[string]types.Any)
-			if err := ebkt.ForEach(func(k, v []byte) error {
-				var a types.Any
-				if err := proto.Unmarshal(v, &a); err != nil {
-					return err
-				}
-
-				extensions[string(k)] = a
-				return nil
-			}); err != nil {
-
+			extensions, err := boltutil.ReadExtensions(bkt)
+			if err != nil {
 				return err
 			}
 
@@ -388,15 +369,8 @@ func writeContainer(bkt *bolt.Bucket, container *containers.Container) error {
 		return err
 	}
 
-	if container.Spec != nil {
-		spec, err := container.Spec.Marshal()
-		if err != nil {
-			return err
-		}
-
-		if err := bkt.Put(bucketKeySpec, spec); err != nil {
-			return err
-		}
+	if err := boltutil.WriteAny(bkt, bucketKeySpec, container.Spec); err != nil {
+		return err
 	}
 
 	for _, v := range [][2][]byte{
@@ -424,33 +398,12 @@ func writeContainer(bkt *bolt.Bucket, container *containers.Container) error {
 		return err
 	}
 
-	if len(container.Extensions) > 0 {
-		ebkt, err := bkt.CreateBucketIfNotExists(bucketKeyExtensions)
-		if err != nil {
-			return err
-		}
-
-		for name, ext := range container.Extensions {
-			p, err := proto.Marshal(&ext)
-			if err != nil {
-				return err
-			}
-
-			if err := ebkt.Put([]byte(name), p); err != nil {
-				return err
-			}
-		}
+	if err := boltutil.WriteExtensions(bkt, container.Extensions); err != nil {
+		return err
 	}
 
-	if container.Runtime.Options != nil {
-		data, err := proto.Marshal(container.Runtime.Options)
-		if err != nil {
-			return err
-		}
-
-		if err := rbkt.Put(bucketKeyOptions, data); err != nil {
-			return err
-		}
+	if err := boltutil.WriteAny(rbkt, bucketKeyOptions, container.Runtime.Options); err != nil {
+		return err
 	}
 
 	return boltutil.WriteLabels(bkt, container.Labels)

+ 30 - 19
vendor/github.com/containerd/containerd/metadata/namespaces.go

@@ -18,6 +18,7 @@ package metadata
 
 import (
 	"context"
+	"strings"
 
 	"github.com/containerd/containerd/errdefs"
 	"github.com/containerd/containerd/identifiers"
@@ -140,10 +141,17 @@ func (s *namespaceStore) Delete(ctx context.Context, namespace string, opts ...n
 		}
 	}
 	bkt := getBucket(s.tx, bucketKeyVersion)
-	if empty, err := s.namespaceEmpty(ctx, namespace); err != nil {
+	types, err := s.listNs(namespace)
+	if err != nil {
 		return err
-	} else if !empty {
-		return errors.Wrapf(errdefs.ErrFailedPrecondition, "namespace %q must be empty", namespace)
+	}
+
+	if len(types) > 0 {
+		return errors.Wrapf(
+			errdefs.ErrFailedPrecondition,
+			"namespace %q must be empty, but it still has %s",
+			namespace, strings.Join(types, ", "),
+		)
 	}
 
 	if err := bkt.DeleteBucket([]byte(namespace)); err != nil {
@@ -157,32 +165,35 @@ func (s *namespaceStore) Delete(ctx context.Context, namespace string, opts ...n
 	return nil
 }
 
-func (s *namespaceStore) namespaceEmpty(ctx context.Context, namespace string) (bool, error) {
-	// Get all data buckets
-	buckets := []*bolt.Bucket{
-		getImagesBucket(s.tx, namespace),
-		getBlobsBucket(s.tx, namespace),
-		getContainersBucket(s.tx, namespace),
+// listNs returns the types of the remaining objects inside the given namespace.
+// It doesn't return exact objects due to performance concerns.
+func (s *namespaceStore) listNs(namespace string) ([]string, error) {
+	var out []string
+
+	if !isBucketEmpty(getImagesBucket(s.tx, namespace)) {
+		out = append(out, "images")
+	}
+	if !isBucketEmpty(getBlobsBucket(s.tx, namespace)) {
+		out = append(out, "blobs")
 	}
+	if !isBucketEmpty(getContainersBucket(s.tx, namespace)) {
+		out = append(out, "containers")
+	}
+
 	if snbkt := getSnapshottersBucket(s.tx, namespace); snbkt != nil {
 		if err := snbkt.ForEach(func(k, v []byte) error {
 			if v == nil {
-				buckets = append(buckets, snbkt.Bucket(k))
+				if !isBucketEmpty(snbkt.Bucket(k)) {
+					out = append(out, "snapshot-"+string(k))
+				}
 			}
 			return nil
 		}); err != nil {
-			return false, err
-		}
-	}
-
-	// Ensure data buckets are empty
-	for _, bkt := range buckets {
-		if !isBucketEmpty(bkt) {
-			return false, nil
+			return nil, err
 		}
 	}
 
-	return true, nil
+	return out, nil
 }
 
 func isBucketEmpty(bkt *bolt.Bucket) bool {

+ 30 - 18
vendor/github.com/containerd/containerd/mount/losetup_linux.go

@@ -69,7 +69,10 @@ func getFreeLoopDev() (uint32, error) {
 	return uint32(num), nil
 }
 
-func setupLoopDev(backingFile, loopDev string, param LoopParams) error {
+// setupLoopDev attaches the backing file to the loop device and returns
+// the file handle for the loop device. The caller is responsible for
+// closing the file handle.
+func setupLoopDev(backingFile, loopDev string, param LoopParams) (_ *os.File, retErr error) {
 	// 1. Open backing file and loop device
 	flags := os.O_RDWR
 	if param.Readonly {
@@ -78,19 +81,23 @@ func setupLoopDev(backingFile, loopDev string, param LoopParams) error {
 
 	back, err := os.OpenFile(backingFile, flags, 0)
 	if err != nil {
-		return errors.Wrapf(err, "could not open backing file: %s", backingFile)
+		return nil, errors.Wrapf(err, "could not open backing file: %s", backingFile)
 	}
 	defer back.Close()
 
 	loop, err := os.OpenFile(loopDev, flags, 0)
 	if err != nil {
-		return errors.Wrapf(err, "could not open loop device: %s", loopDev)
+		return nil, errors.Wrapf(err, "could not open loop device: %s", loopDev)
 	}
-	defer loop.Close()
+	defer func() {
+		if retErr != nil {
+			loop.Close()
+		}
+	}()
 
 	// 2. Set FD
 	if _, _, err = ioctl(loop.Fd(), unix.LOOP_SET_FD, back.Fd()); err != nil {
-		return errors.Wrapf(err, "could not set loop fd for device: %s", loopDev)
+		return nil, errors.Wrapf(err, "could not set loop fd for device: %s", loopDev)
 	}
 
 	// 3. Set Info
@@ -110,7 +117,7 @@ func setupLoopDev(backingFile, loopDev string, param LoopParams) error {
 
 	_, _, err = ioctl(loop.Fd(), unix.LOOP_SET_STATUS64, uintptr(unsafe.Pointer(&info)))
 	if err == nil {
-		return nil
+		return loop, nil
 	}
 
 	if param.Direct {
@@ -119,13 +126,12 @@ func setupLoopDev(backingFile, loopDev string, param LoopParams) error {
 		info.Flags &= ^(uint32(unix.LO_FLAGS_DIRECT_IO))
 		_, _, err = ioctl(loop.Fd(), unix.LOOP_SET_STATUS64, uintptr(unsafe.Pointer(&info)))
 		if err == nil {
-			return nil
+			return loop, nil
 		}
 	}
 
-	// Cleanup loop fd and return error
 	_, _, _ = ioctl(loop.Fd(), unix.LOOP_CLR_FD, 0)
-	return errors.Errorf("failed to set loop device info: %v", err)
+	return nil, errors.Errorf("failed to set loop device info: %v", err)
 }
 
 // setupLoop looks for (and possibly creates) a free loop device, and
@@ -142,15 +148,16 @@ func setupLoopDev(backingFile, loopDev string, param LoopParams) error {
 // the loop device when done with it.
 //
 // Upon success, the file handle to the loop device is returned.
-func setupLoop(backingFile string, param LoopParams) (string, error) {
+func setupLoop(backingFile string, param LoopParams) (*os.File, error) {
 	for retry := 1; retry < 100; retry++ {
 		num, err := getFreeLoopDev()
 		if err != nil {
-			return "", err
+			return nil, err
 		}
 
 		loopDev := fmt.Sprintf(loopDevFormat, num)
-		if err := setupLoopDev(backingFile, loopDev, param); err != nil {
+		file, err := setupLoopDev(backingFile, loopDev, param)
+		if err != nil {
 			// Per util-linux/sys-utils/losetup.c:create_loop(),
 			// free loop device can race and we end up failing
 			// with EBUSY when trying to set it up.
@@ -159,13 +166,13 @@ func setupLoop(backingFile string, param LoopParams) (string, error) {
 				time.Sleep(time.Millisecond * time.Duration(rand.Intn(retry*10)))
 				continue
 			}
-			return "", err
+			return nil, err
 		}
 
-		return loopDev, nil
+		return file, nil
 	}
 
-	return "", errors.New("timeout creating new loopback device")
+	return nil, errors.New("timeout creating new loopback device")
 }
 
 func removeLoop(loopdev string) error {
@@ -179,12 +186,17 @@ func removeLoop(loopdev string) error {
 	return err
 }
 
-// Attach a specified backing file to a loop device
+// AttachLoopDevice attaches a specified backing file to a loop device
 func AttachLoopDevice(backingFile string) (string, error) {
-	return setupLoop(backingFile, LoopParams{})
+	file, err := setupLoop(backingFile, LoopParams{})
+	if err != nil {
+		return "", err
+	}
+	defer file.Close()
+	return file.Name(), nil
 }
 
-// Detach a loop device
+// DetachLoopDevice detaches the provided loop devices
 func DetachLoopDevice(devices ...string) error {
 	for _, dev := range devices {
 		if err := removeLoop(dev); err != nil {

+ 4 - 2
vendor/github.com/containerd/containerd/mount/mount_linux.go

@@ -79,14 +79,16 @@ func (m *Mount) Mount(target string) (err error) {
 		// or remount with changed data
 		source := m.Source
 		if losetup {
-			devFile, err := setupLoop(m.Source, LoopParams{
+			loFile, err := setupLoop(m.Source, LoopParams{
 				Readonly:  oflags&unix.MS_RDONLY == unix.MS_RDONLY,
 				Autoclear: true})
 			if err != nil {
 				return err
 			}
+			defer loFile.Close()
+
 			// Mount the loop device instead
-			source = devFile
+			source = loFile.Name()
 		}
 		if err := mountAt(chdir, source, target, m.Type, uintptr(oflags), data); err != nil {
 			return err

+ 31 - 44
vendor/github.com/containerd/containerd/oci/spec_opts.go

@@ -38,7 +38,6 @@ import (
 	"github.com/opencontainers/runc/libcontainer/user"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/pkg/errors"
-	"github.com/syndtr/gocapability/capability"
 )
 
 // SpecOpts sets spec specific information to a newly generated OCI spec
@@ -274,6 +273,28 @@ func WithMounts(mounts []specs.Mount) SpecOpts {
 	}
 }
 
+// WithoutMounts removes mounts
+func WithoutMounts(dests ...string) SpecOpts {
+	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
+		var (
+			mounts  []specs.Mount
+			current = s.Mounts
+		)
+	mLoop:
+		for _, m := range current {
+			mDestination := filepath.Clean(m.Destination)
+			for _, dest := range dests {
+				if mDestination == dest {
+					continue mLoop
+				}
+			}
+			mounts = append(mounts, m)
+		}
+		s.Mounts = mounts
+		return nil
+	}
+}
+
 // WithHostNamespace allows a task to run inside the host's linux namespace
 func WithHostNamespace(ns specs.LinuxNamespaceType) SpecOpts {
 	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
@@ -295,10 +316,7 @@ func WithLinuxNamespace(ns specs.LinuxNamespace) SpecOpts {
 		setLinux(s)
 		for i, n := range s.Linux.Namespaces {
 			if n.Type == ns.Type {
-				before := s.Linux.Namespaces[:i]
-				after := s.Linux.Namespaces[i+1:]
-				s.Linux.Namespaces = append(before, ns)
-				s.Linux.Namespaces = append(s.Linux.Namespaces, after...)
+				s.Linux.Namespaces[i] = ns
 				return nil
 			}
 		}
@@ -776,29 +794,6 @@ func WithCapabilities(caps []string) SpecOpts {
 	}
 }
 
-// WithAllCapabilities sets all linux capabilities for the process
-var WithAllCapabilities = func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
-	return WithCapabilities(GetAllCapabilities())(ctx, client, c, s)
-}
-
-// GetAllCapabilities returns all caps up to CAP_LAST_CAP
-// or CAP_BLOCK_SUSPEND on RHEL6
-func GetAllCapabilities() []string {
-	last := capability.CAP_LAST_CAP
-	// hack for RHEL6 which has no /proc/sys/kernel/cap_last_cap
-	if last == capability.Cap(63) {
-		last = capability.CAP_BLOCK_SUSPEND
-	}
-	var caps []string
-	for _, cap := range capability.List() {
-		if cap > last {
-			continue
-		}
-		caps = append(caps, "CAP_"+strings.ToUpper(cap.String()))
-	}
-	return caps
-}
-
 func capsContain(caps []string, s string) bool {
 	for _, c := range caps {
 		if c == s {
@@ -954,16 +949,13 @@ func WithReadonlyPaths(paths []string) SpecOpts {
 
 // WithWriteableSysfs makes any sysfs mounts writeable
 func WithWriteableSysfs(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
-	for i, m := range s.Mounts {
+	for _, m := range s.Mounts {
 		if m.Type == "sysfs" {
-			var options []string
-			for _, o := range m.Options {
+			for i, o := range m.Options {
 				if o == "ro" {
-					o = "rw"
+					m.Options[i] = "rw"
 				}
-				options = append(options, o)
 			}
-			s.Mounts[i].Options = options
 		}
 	}
 	return nil
@@ -971,16 +963,13 @@ func WithWriteableSysfs(_ context.Context, _ Client, _ *containers.Container, s
 
 // WithWriteableCgroupfs makes any cgroup mounts writeable
 func WithWriteableCgroupfs(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
-	for i, m := range s.Mounts {
+	for _, m := range s.Mounts {
 		if m.Type == "cgroup" {
-			var options []string
-			for _, o := range m.Options {
+			for i, o := range m.Options {
 				if o == "ro" {
-					o = "rw"
+					m.Options[i] = "rw"
 				}
-				options = append(options, o)
 			}
-			s.Mounts[i].Options = options
 		}
 	}
 	return nil
@@ -1132,7 +1121,7 @@ func WithDefaultUnixDevices(_ context.Context, _ Client, _ *containers.Container
 
 // WithPrivileged sets up options for a privileged container
 var WithPrivileged = Compose(
-	WithAllCapabilities,
+	WithAllCurrentCapabilities,
 	WithMaskedPaths(nil),
 	WithReadonlyPaths(nil),
 	WithWriteableSysfs,
@@ -1205,15 +1194,13 @@ func WithLinuxDevices(devices []specs.LinuxDevice) SpecOpts {
 	}
 }
 
-var ErrNotADevice = errors.New("not a device node")
-
 // WithLinuxDevice adds the device specified by path to the spec
 func WithLinuxDevice(path, permissions string) SpecOpts {
 	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
 		setLinux(s)
 		setResources(s)
 
-		dev, err := deviceFromPath(path, permissions)
+		dev, err := deviceFromPath(path)
 		if err != nil {
 			return err
 		}

+ 42 - 79
vendor/github.com/containerd/containerd/oci/spec_opts_linux.go

@@ -20,20 +20,17 @@ package oci
 
 import (
 	"context"
-	"io/ioutil"
-	"os"
-	"path/filepath"
 
 	"github.com/containerd/containerd/containers"
+	"github.com/containerd/containerd/pkg/cap"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
-	"golang.org/x/sys/unix"
 )
 
 // WithHostDevices adds all the hosts device nodes to the container's spec
 func WithHostDevices(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
 	setLinux(s)
 
-	devs, err := getDevices("/dev")
+	devs, err := HostDevices()
 	if err != nil {
 		return err
 	}
@@ -41,83 +38,28 @@ func WithHostDevices(_ context.Context, _ Client, _ *containers.Container, s *Sp
 	return nil
 }
 
-func getDevices(path string) ([]specs.LinuxDevice, error) {
-	files, err := ioutil.ReadDir(path)
-	if err != nil {
-		return nil, err
-	}
-	var out []specs.LinuxDevice
-	for _, f := range files {
-		switch {
-		case f.IsDir():
-			switch f.Name() {
-			// ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825
-			// ".udev" added to address https://github.com/opencontainers/runc/issues/2093
-			case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts", ".udev":
-				continue
-			default:
-				sub, err := getDevices(filepath.Join(path, f.Name()))
-				if err != nil {
-					return nil, err
-				}
-
-				out = append(out, sub...)
-				continue
-			}
-		case f.Name() == "console":
-			continue
-		}
-		device, err := deviceFromPath(filepath.Join(path, f.Name()), "rwm")
+// WithDevices recursively adds devices from the passed in path and associated cgroup rules for that device.
+// If devicePath is a dir it traverses the dir to add all devices in that dir.
+// If devicePath is not a dir, it attempts to add the single device.
+// If containerPath is not set then the device path is used for the container path.
+func WithDevices(devicePath, containerPath, permissions string) SpecOpts {
+	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
+		devs, err := getDevices(devicePath, containerPath)
 		if err != nil {
-			if err == ErrNotADevice {
-				continue
-			}
-			if os.IsNotExist(err) {
-				continue
-			}
-			return nil, err
+			return err
 		}
-		out = append(out, *device)
-	}
-	return out, nil
-}
-
-func deviceFromPath(path, permissions string) (*specs.LinuxDevice, error) {
-	var stat unix.Stat_t
-	if err := unix.Lstat(path, &stat); err != nil {
-		return nil, err
-	}
-
-	var (
-		// The type is 32bit on mips.
-		devNumber = uint64(stat.Rdev) // nolint: unconvert
-		major     = unix.Major(devNumber)
-		minor     = unix.Minor(devNumber)
-	)
-	if major == 0 {
-		return nil, ErrNotADevice
-	}
-
-	var (
-		devType string
-		mode    = stat.Mode
-	)
-	switch {
-	case mode&unix.S_IFBLK == unix.S_IFBLK:
-		devType = "b"
-	case mode&unix.S_IFCHR == unix.S_IFCHR:
-		devType = "c"
+		for _, dev := range devs {
+			s.Linux.Devices = append(s.Linux.Devices, dev)
+			s.Linux.Resources.Devices = append(s.Linux.Resources.Devices, specs.LinuxDeviceCgroup{
+				Allow:  true,
+				Type:   dev.Type,
+				Major:  &dev.Major,
+				Minor:  &dev.Minor,
+				Access: permissions,
+			})
+		}
+		return nil
 	}
-	fm := os.FileMode(mode)
-	return &specs.LinuxDevice{
-		Type:     devType,
-		Path:     path,
-		Major:    int64(major),
-		Minor:    int64(minor),
-		FileMode: &fm,
-		UID:      &stat.Uid,
-		GID:      &stat.Gid,
-	}, nil
 }
 
 // WithMemorySwap sets the container's swap in bytes
@@ -180,3 +122,24 @@ func WithCPUCFS(quota int64, period uint64) SpecOpts {
 		return nil
 	}
 }
+
+// WithAllCurrentCapabilities propagates the effective capabilities of the caller process to the container process.
+// The capability set may differ from WithAllKnownCapabilities when running in a container.
+var WithAllCurrentCapabilities = func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
+	caps, err := cap.Current()
+	if err != nil {
+		return err
+	}
+	return WithCapabilities(caps)(ctx, client, c, s)
+}
+
+// WithAllKnownCapabilities sets all the the known linux capabilities for the container process
+var WithAllKnownCapabilities = func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
+	caps := cap.Known()
+	return WithCapabilities(caps)(ctx, client, c, s)
+}
+
+// WithoutRunMount removes the `/run` inside the spec
+func WithoutRunMount(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
+	return WithoutMounts("/run")(ctx, client, c, s)
+}

+ 38 - 0
vendor/github.com/containerd/containerd/oci/spec_opts_nonlinux.go

@@ -0,0 +1,38 @@
+// +build !linux
+
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package oci
+
+import (
+	"context"
+
+	"github.com/containerd/containerd/containers"
+)
+
+// WithAllCurrentCapabilities propagates the effective capabilities of the caller process to the container process.
+// The capability set may differ from WithAllKnownCapabilities when running in a container.
+//nolint: deadcode, unused
+var WithAllCurrentCapabilities = func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
+	return WithCapabilities(nil)(ctx, client, c, s)
+}
+
+// WithAllKnownCapabilities sets all the the known linux capabilities for the container process
+//nolint: deadcode, unused
+var WithAllKnownCapabilities = func(ctx context.Context, client Client, c *containers.Container, s *Spec) error {
+	return WithCapabilities(nil)(ctx, client, c, s)
+}

+ 10 - 79
vendor/github.com/containerd/containerd/oci/spec_opts_unix.go

@@ -20,20 +20,15 @@ package oci
 
 import (
 	"context"
-	"io/ioutil"
-	"os"
-	"path/filepath"
 
 	"github.com/containerd/containerd/containers"
-	specs "github.com/opencontainers/runtime-spec/specs-go"
-	"golang.org/x/sys/unix"
 )
 
 // WithHostDevices adds all the hosts device nodes to the container's spec
 func WithHostDevices(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
 	setLinux(s)
 
-	devs, err := getDevices("/dev")
+	devs, err := HostDevices()
 	if err != nil {
 		return err
 	}
@@ -41,82 +36,18 @@ func WithHostDevices(_ context.Context, _ Client, _ *containers.Container, s *Sp
 	return nil
 }
 
-func getDevices(path string) ([]specs.LinuxDevice, error) {
-	files, err := ioutil.ReadDir(path)
-	if err != nil {
-		return nil, err
-	}
-	var out []specs.LinuxDevice
-	for _, f := range files {
-		switch {
-		case f.IsDir():
-			switch f.Name() {
-			// ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825
-			// ".udev" added to address https://github.com/opencontainers/runc/issues/2093
-			case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts", ".udev":
-				continue
-			default:
-				sub, err := getDevices(filepath.Join(path, f.Name()))
-				if err != nil {
-					return nil, err
-				}
-
-				out = append(out, sub...)
-				continue
-			}
-		case f.Name() == "console":
-			continue
-		}
-		device, err := deviceFromPath(filepath.Join(path, f.Name()), "rwm")
+// WithDevices recursively adds devices from the passed in path and associated cgroup rules for that device.
+// If devicePath is a dir it traverses the dir to add all devices in that dir.
+// If devicePath is not a dir, it attempts to add the single device.
+func WithDevices(devicePath, containerPath, permissions string) SpecOpts {
+	return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error {
+		devs, err := getDevices(devicePath, containerPath)
 		if err != nil {
-			if err == ErrNotADevice {
-				continue
-			}
-			if os.IsNotExist(err) {
-				continue
-			}
-			return nil, err
+			return err
 		}
-		out = append(out, *device)
-	}
-	return out, nil
-}
-
-func deviceFromPath(path, permissions string) (*specs.LinuxDevice, error) {
-	var stat unix.Stat_t
-	if err := unix.Lstat(path, &stat); err != nil {
-		return nil, err
-	}
-
-	var (
-		devNumber = uint64(stat.Rdev)
-		major     = unix.Major(devNumber)
-		minor     = unix.Minor(devNumber)
-	)
-	if major == 0 {
-		return nil, ErrNotADevice
-	}
-
-	var (
-		devType string
-		mode    = stat.Mode
-	)
-	switch {
-	case mode&unix.S_IFBLK == unix.S_IFBLK:
-		devType = "b"
-	case mode&unix.S_IFCHR == unix.S_IFCHR:
-		devType = "c"
+		s.Linux.Devices = append(s.Linux.Devices, devs...)
+		return nil
 	}
-	fm := os.FileMode(mode)
-	return &specs.LinuxDevice{
-		Type:     devType,
-		Path:     path,
-		Major:    int64(major),
-		Minor:    int64(minor),
-		FileMode: &fm,
-		UID:      &stat.Uid,
-		GID:      &stat.Gid,
-	}, nil
 }
 
 // WithCPUCFS sets the container's Completely fair scheduling (CFS) quota and period

+ 1 - 1
vendor/github.com/containerd/containerd/oci/spec_opts_windows.go

@@ -74,6 +74,6 @@ func WithHostDevices(_ context.Context, _ Client, _ *containers.Container, s *Sp
 	return nil
 }
 
-func deviceFromPath(path, permissions string) (*specs.LinuxDevice, error) {
+func deviceFromPath(path string) (*specs.LinuxDevice, error) {
 	return nil, errors.New("device from path not supported on Windows")
 }

+ 137 - 0
vendor/github.com/containerd/containerd/oci/utils_unix.go

@@ -0,0 +1,137 @@
+// +build !windows
+
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package oci
+
+import (
+	"io/ioutil"
+	"os"
+	"path/filepath"
+
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/pkg/errors"
+	"golang.org/x/sys/unix"
+)
+
+var errNotADevice = errors.New("not a device node")
+
+// HostDevices returns all devices that can be found under /dev directory.
+func HostDevices() ([]specs.LinuxDevice, error) {
+	return getDevices("/dev", "")
+}
+
+func getDevices(path, containerPath string) ([]specs.LinuxDevice, error) {
+	stat, err := os.Stat(path)
+	if err != nil {
+		return nil, errors.Wrap(err, "error stating device path")
+	}
+
+	if !stat.IsDir() {
+		dev, err := deviceFromPath(path)
+		if err != nil {
+			return nil, err
+		}
+		if containerPath != "" {
+			dev.Path = containerPath
+		}
+		return []specs.LinuxDevice{*dev}, nil
+	}
+
+	files, err := ioutil.ReadDir(path)
+	if err != nil {
+		return nil, err
+	}
+	var out []specs.LinuxDevice
+	for _, f := range files {
+		switch {
+		case f.IsDir():
+			switch f.Name() {
+			// ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825
+			// ".udev" added to address https://github.com/opencontainers/runc/issues/2093
+			case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts", ".udev":
+				continue
+			default:
+				var cp string
+				if containerPath != "" {
+					cp = filepath.Join(containerPath, filepath.Base(f.Name()))
+				}
+				sub, err := getDevices(filepath.Join(path, f.Name()), cp)
+				if err != nil {
+					return nil, err
+				}
+
+				out = append(out, sub...)
+				continue
+			}
+		case f.Name() == "console":
+			continue
+		}
+		device, err := deviceFromPath(filepath.Join(path, f.Name()))
+		if err != nil {
+			if err == errNotADevice {
+				continue
+			}
+			if os.IsNotExist(err) {
+				continue
+			}
+			return nil, err
+		}
+		if containerPath != "" {
+			device.Path = filepath.Join(containerPath, filepath.Base(f.Name()))
+		}
+		out = append(out, *device)
+	}
+	return out, nil
+}
+
+func deviceFromPath(path string) (*specs.LinuxDevice, error) {
+	var stat unix.Stat_t
+	if err := unix.Lstat(path, &stat); err != nil {
+		return nil, err
+	}
+
+	var (
+		devNumber = uint64(stat.Rdev) //nolint: unconvert // the type is 32bit on mips.
+		major     = unix.Major(devNumber)
+		minor     = unix.Minor(devNumber)
+	)
+	if major == 0 {
+		return nil, errNotADevice
+	}
+
+	var (
+		devType string
+		mode    = stat.Mode
+	)
+	switch {
+	case mode&unix.S_IFBLK == unix.S_IFBLK:
+		devType = "b"
+	case mode&unix.S_IFCHR == unix.S_IFCHR:
+		devType = "c"
+	}
+	fm := os.FileMode(mode &^ unix.S_IFMT)
+	return &specs.LinuxDevice{
+		Type:     devType,
+		Path:     path,
+		Major:    int64(major),
+		Minor:    int64(minor),
+		FileMode: &fm,
+		UID:      &stat.Uid,
+		GID:      &stat.Gid,
+	}, nil
+}

+ 6 - 27
vendor/github.com/containerd/containerd/pkg/apparmor/apparmor.go

@@ -1,5 +1,3 @@
-// +build linux
-
 /*
    Copyright The containerd Authors.
 
@@ -18,31 +16,12 @@
 
 package apparmor
 
-import (
-	"io/ioutil"
-	"os"
-	"sync"
-)
-
-var (
-	appArmorSupported bool
-	checkAppArmor     sync.Once
-)
-
-// HostSupports returns true if apparmor is enabled for the host, if
-// apparmor_parser is enabled, and if we are not running docker-in-docker.
+// HostSupports returns true if apparmor is enabled for the host, // On non-Linux returns false
+// On Linux returns true if apparmor_parser is enabled, and if we
+//  are not running docker-in-docker.
 //
-// It is a modified version of libcontainer/apparmor.IsEnabled(), which does not
-// check for apparmor_parser to be present, or if we're running docker-in-docker.
+//  It is a modified version of libcontainer/apparmor.IsEnabled(), which does not
+//  check for apparmor_parser to be present, or if we're running docker-in-docker.
 func HostSupports() bool {
-	checkAppArmor.Do(func() {
-		// see https://github.com/docker/docker/commit/de191e86321f7d3136ff42ff75826b8107399497
-		if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil && os.Getenv("container") == "" {
-			if _, err = os.Stat("/sbin/apparmor_parser"); err == nil {
-				buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled")
-				appArmorSupported = err == nil && len(buf) > 1 && buf[0] == 'Y'
-			}
-		}
-	})
-	return appArmorSupported
+	return hostSupports()
 }

+ 48 - 0
vendor/github.com/containerd/containerd/pkg/apparmor/apparmor_linux.go

@@ -0,0 +1,48 @@
+// +build linux
+
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package apparmor
+
+import (
+	"io/ioutil"
+	"os"
+	"sync"
+)
+
+var (
+	appArmorSupported bool
+	checkAppArmor     sync.Once
+)
+
+// hostSupports returns true if apparmor is enabled for the host, if
+// apparmor_parser is enabled, and if we are not running docker-in-docker.
+//
+// It is a modified version of libcontainer/apparmor.IsEnabled(), which does not
+// check for apparmor_parser to be present, or if we're running docker-in-docker.
+func hostSupports() bool {
+	checkAppArmor.Do(func() {
+		// see https://github.com/docker/docker/commit/de191e86321f7d3136ff42ff75826b8107399497
+		if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil && os.Getenv("container") == "" {
+			if _, err = os.Stat("/sbin/apparmor_parser"); err == nil {
+				buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled")
+				appArmorSupported = err == nil && len(buf) > 1 && buf[0] == 'Y'
+			}
+		}
+	})
+	return appArmorSupported
+}

+ 1 - 2
vendor/github.com/containerd/containerd/pkg/apparmor/apparmor_unsupported.go

@@ -18,7 +18,6 @@
 
 package apparmor
 
-//nolint: deadcode, unused
-func HostSupports() bool {
+func hostSupports() bool {
 	return false
 }

+ 192 - 0
vendor/github.com/containerd/containerd/pkg/cap/cap_linux.go

@@ -0,0 +1,192 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+// Package cap provides Linux capability utility
+package cap
+
+import (
+	"bufio"
+	"io"
+	"os"
+	"strconv"
+	"strings"
+
+	"github.com/pkg/errors"
+)
+
+// FromNumber returns a cap string like "CAP_SYS_ADMIN"
+// that corresponds to the given number like 21.
+//
+// FromNumber returns an empty string for unknown cap number.
+func FromNumber(num int) string {
+	if num < 0 || num > len(capsLatest)-1 {
+		return ""
+	}
+	return capsLatest[num]
+}
+
+// FromBitmap parses an uint64 bitmap into string slice like
+// []{"CAP_SYS_ADMIN", ...}.
+//
+// Unknown cap numbers are returned as []int.
+func FromBitmap(v uint64) ([]string, []int) {
+	var (
+		res     []string
+		unknown []int
+	)
+	for i := 0; i <= 63; i++ {
+		if b := (v >> i) & 0x1; b == 0x1 {
+			if s := FromNumber(i); s != "" {
+				res = append(res, s)
+			} else {
+				unknown = append(unknown, i)
+			}
+		}
+	}
+	return res, unknown
+}
+
+// Type is the type of capability
+type Type int
+
+const (
+	// Effective is CapEff
+	Effective Type = 1 << iota
+	// Permitted is CapPrm
+	Permitted
+	// Inheritable is CapInh
+	Inheritable
+	// Bounding is CapBnd
+	Bounding
+	// Ambient is CapAmb
+	Ambient
+)
+
+// ParseProcPIDStatus returns uint64 bitmap value from /proc/<PID>/status file
+func ParseProcPIDStatus(r io.Reader) (map[Type]uint64, error) {
+	res := make(map[Type]uint64)
+	scanner := bufio.NewScanner(r)
+	for scanner.Scan() {
+		line := scanner.Text()
+		pair := strings.SplitN(line, ":", 2)
+		if len(pair) != 2 {
+			continue
+		}
+		k := strings.TrimSpace(pair[0])
+		v := strings.TrimSpace(pair[1])
+		switch k {
+		case "CapInh", "CapPrm", "CapEff", "CapBnd", "CapAmb":
+			ui64, err := strconv.ParseUint(v, 16, 64)
+			if err != nil {
+				return nil, errors.Errorf("failed to parse line %q", line)
+			}
+			switch k {
+			case "CapInh":
+				res[Inheritable] = ui64
+			case "CapPrm":
+				res[Permitted] = ui64
+			case "CapEff":
+				res[Effective] = ui64
+			case "CapBnd":
+				res[Bounding] = ui64
+			case "CapAmb":
+				res[Ambient] = ui64
+			}
+		}
+	}
+	if err := scanner.Err(); err != nil {
+		return nil, err
+	}
+	return res, nil
+}
+
+// Current returns the list of the effective and the known caps of
+// the current process.
+//
+// The result is like []string{"CAP_SYS_ADMIN", ...}.
+//
+// The result does not contain caps that are not recognized by
+// the "github.com/syndtr/gocapability" library.
+func Current() ([]string, error) {
+	f, err := os.Open("/proc/self/status")
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+	caps, err := ParseProcPIDStatus(f)
+	if err != nil {
+		return nil, err
+	}
+	capEff := caps[Effective]
+	names, _ := FromBitmap(capEff)
+	return names, nil
+}
+
+var (
+	// caps35 is the caps of kernel 3.5 (37 entries)
+	caps35 = []string{
+		"CAP_CHOWN",            // 2.2
+		"CAP_DAC_OVERRIDE",     // 2.2
+		"CAP_DAC_READ_SEARCH",  // 2.2
+		"CAP_FOWNER",           // 2.2
+		"CAP_FSETID",           // 2.2
+		"CAP_KILL",             // 2.2
+		"CAP_SETGID",           // 2.2
+		"CAP_SETUID",           // 2.2
+		"CAP_SETPCAP",          // 2.2
+		"CAP_LINUX_IMMUTABLE",  // 2.2
+		"CAP_NET_BIND_SERVICE", // 2.2
+		"CAP_NET_BROADCAST",    // 2.2
+		"CAP_NET_ADMIN",        // 2.2
+		"CAP_NET_RAW",          // 2.2
+		"CAP_IPC_LOCK",         // 2.2
+		"CAP_IPC_OWNER",        // 2.2
+		"CAP_SYS_MODULE",       // 2.2
+		"CAP_SYS_RAWIO",        // 2.2
+		"CAP_SYS_CHROOT",       // 2.2
+		"CAP_SYS_PTRACE",       // 2.2
+		"CAP_SYS_PACCT",        // 2.2
+		"CAP_SYS_ADMIN",        // 2.2
+		"CAP_SYS_BOOT",         // 2.2
+		"CAP_SYS_NICE",         // 2.2
+		"CAP_SYS_RESOURCE",     // 2.2
+		"CAP_SYS_TIME",         // 2.2
+		"CAP_SYS_TTY_CONFIG",   // 2.2
+		"CAP_MKNOD",            // 2.4
+		"CAP_LEASE",            // 2.4
+		"CAP_AUDIT_WRITE",      // 2.6.11
+		"CAP_AUDIT_CONTROL",    // 2.6.11
+		"CAP_SETFCAP",          // 2.6.24
+		"CAP_MAC_OVERRIDE",     // 2.6.25
+		"CAP_MAC_ADMIN",        // 2.6.25
+		"CAP_SYSLOG",           // 2.6.37
+		"CAP_WAKE_ALARM",       // 3.0
+		"CAP_BLOCK_SUSPEND",    // 3.5
+	}
+	// caps316 is the caps of kernel 3.16 (38 entries)
+	caps316 = append(caps35, "CAP_AUDIT_READ")
+	// caps58 is the caps of kernel 5.8 (40 entries)
+	caps58 = append(caps316, []string{"CAP_PERFMON", "CAP_BPF"}...)
+	// caps59 is the caps of kernel 5.9 (41 entries)
+	caps59     = append(caps58, "CAP_CHECKPOINT_RESTORE")
+	capsLatest = caps59
+)
+
+// Known returns the known cap strings of the latest kernel.
+// The current latest kernel is 5.9.
+func Known() []string {
+	return capsLatest
+}

+ 6 - 2
vendor/github.com/containerd/containerd/pkg/process/init.go

@@ -193,11 +193,15 @@ func (p *Init) createCheckpointedState(r *CreateConfig, pidFile *pidFile) error
 			ParentPath: r.ParentCheckpoint,
 		},
 		PidFile:     pidFile.Path(),
-		IO:          p.io.IO(),
 		NoPivot:     p.NoPivotRoot,
 		Detach:      true,
 		NoSubreaper: true,
 	}
+
+	if p.io != nil {
+		opts.IO = p.io.IO()
+	}
+
 	p.initState = &createdCheckpointState{
 		p:    p,
 		opts: opts,
@@ -441,7 +445,7 @@ func (p *Init) checkpoint(ctx context.Context, r *CheckpointConfig) error {
 	}, actions...); err != nil {
 		dumpLog := filepath.Join(p.Bundle, "criu-dump.log")
 		if cerr := copyFile(dumpLog, filepath.Join(work, "dump.log")); cerr != nil {
-			log.G(ctx).Error(err)
+			log.G(ctx).WithError(cerr).Error("failed to copy dump.log to criu-dump.log")
 		}
 		return fmt.Errorf("%s path= %s", criuError(err), dumpLog)
 	}

+ 2 - 16
vendor/github.com/containerd/containerd/pkg/process/io.go

@@ -34,7 +34,6 @@ import (
 	"github.com/containerd/containerd/log"
 	"github.com/containerd/containerd/namespaces"
 	"github.com/containerd/containerd/pkg/stdio"
-	"github.com/containerd/containerd/sys"
 	"github.com/containerd/fifo"
 	runc "github.com/containerd/go-runc"
 	"github.com/hashicorp/go-multierror"
@@ -179,7 +178,7 @@ func copyPipes(ctx context.Context, rio runc.IO, stdin, stdout, stderr string, w
 			},
 		},
 	} {
-		ok, err := sys.IsFifo(i.name)
+		ok, err := fifo.IsFifo(i.name)
 		if err != nil {
 			return err
 		}
@@ -252,14 +251,6 @@ func NewBinaryIO(ctx context.Context, id string, uri *url.URL) (_ runc.IO, err e
 		return nil, err
 	}
 
-	var args []string
-	for k, vs := range uri.Query() {
-		args = append(args, k)
-		if len(vs) > 0 {
-			args = append(args, vs[0])
-		}
-	}
-
 	var closers []func() error
 	defer func() {
 		if err == nil {
@@ -290,12 +281,7 @@ func NewBinaryIO(ctx context.Context, id string, uri *url.URL) (_ runc.IO, err e
 	}
 	closers = append(closers, r.Close, w.Close)
 
-	cmd := exec.Command(uri.Path, args...)
-	cmd.Env = append(cmd.Env,
-		"CONTAINER_ID="+id,
-		"CONTAINER_NAMESPACE="+ns,
-	)
-
+	cmd := NewBinaryCmd(uri, id, ns)
 	cmd.ExtraFiles = append(cmd.ExtraFiles, out.r, serr.r, w)
 	// don't need to register this with the reaper or wait when
 	// running inside a shim

+ 1 - 1
vendor/github.com/containerd/containerd/runtime/io.go → vendor/github.com/containerd/containerd/pkg/process/io_util.go

@@ -14,7 +14,7 @@
    limitations under the License.
 */
 
-package runtime
+package process
 
 import (
 	"net/url"

+ 1 - 1
vendor/github.com/containerd/containerd/pkg/process/utils.go

@@ -172,7 +172,7 @@ func (p *pidFile) Read() (int, error) {
 func waitTimeout(ctx context.Context, wg *sync.WaitGroup, timeout time.Duration) error {
 	ctx, cancel := context.WithTimeout(ctx, timeout)
 	defer cancel()
-	done := make(chan struct{}, 1)
+	done := make(chan struct{})
 	go func() {
 		wg.Wait()
 		close(done)

+ 1 - 1
vendor/github.com/containerd/containerd/sys/userns_linux.go → vendor/github.com/containerd/containerd/pkg/userns/userns_linux.go

@@ -14,7 +14,7 @@
    limitations under the License.
 */
 
-package sys
+package userns
 
 import (
 	"bufio"

+ 1 - 1
vendor/github.com/containerd/containerd/sys/userns_unsupported.go → vendor/github.com/containerd/containerd/pkg/userns/userns_unsupported.go

@@ -16,7 +16,7 @@
    limitations under the License.
 */
 
-package sys
+package userns
 
 // RunningInUserNS is a stub for non-Linux systems
 // Always returns false

+ 53 - 5
vendor/github.com/containerd/containerd/platforms/defaults_windows.go

@@ -19,15 +19,63 @@
 package platforms
 
 import (
+	"fmt"
 	"runtime"
+	"strconv"
+	"strings"
 
+	imagespec "github.com/opencontainers/image-spec/specs-go/v1"
 	specs "github.com/opencontainers/image-spec/specs-go/v1"
+	"golang.org/x/sys/windows"
 )
 
-// Default returns the default matcher for the platform.
+type matchComparer struct {
+	defaults        Matcher
+	osVersionPrefix string
+}
+
+// Match matches platform with the same windows major, minor
+// and build version.
+func (m matchComparer) Match(p imagespec.Platform) bool {
+	if m.defaults.Match(p) {
+		// TODO(windows): Figure out whether OSVersion is deprecated.
+		return strings.HasPrefix(p.OSVersion, m.osVersionPrefix)
+	}
+	return false
+}
+
+// Less sorts matched platforms in front of other platforms.
+// For matched platforms, it puts platforms with larger revision
+// number in front.
+func (m matchComparer) Less(p1, p2 imagespec.Platform) bool {
+	m1, m2 := m.Match(p1), m.Match(p2)
+	if m1 && m2 {
+		r1, r2 := revision(p1.OSVersion), revision(p2.OSVersion)
+		return r1 > r2
+	}
+	return m1 && !m2
+}
+
+func revision(v string) int {
+	parts := strings.Split(v, ".")
+	if len(parts) < 4 {
+		return 0
+	}
+	r, err := strconv.Atoi(parts[3])
+	if err != nil {
+		return 0
+	}
+	return r
+}
+
+// Default returns the current platform's default platform specification.
 func Default() MatchComparer {
-	return Ordered(DefaultSpec(), specs.Platform{
-		OS:           "linux",
-		Architecture: runtime.GOARCH,
-	})
+	major, minor, build := windows.RtlGetNtVersionNumbers()
+	return matchComparer{
+		defaults: Ordered(DefaultSpec(), specs.Platform{
+			OS:           "linux",
+			Architecture: runtime.GOARCH,
+		}),
+		osVersionPrefix: fmt.Sprintf("%d.%d.%d", major, minor, build),
+	}
 }

+ 45 - 7
vendor/github.com/containerd/containerd/remotes/docker/pusher.go

@@ -44,17 +44,47 @@ type dockerPusher struct {
 	tracker StatusTracker
 }
 
+// Writer implements Ingester API of content store. This allows the client
+// to receive ErrUnavailable when there is already an on-going upload.
+// Note that the tracker MUST implement StatusTrackLocker interface to avoid
+// race condition on StatusTracker.
+func (p dockerPusher) Writer(ctx context.Context, opts ...content.WriterOpt) (content.Writer, error) {
+	var wOpts content.WriterOpts
+	for _, opt := range opts {
+		if err := opt(&wOpts); err != nil {
+			return nil, err
+		}
+	}
+	if wOpts.Ref == "" {
+		return nil, errors.Wrap(errdefs.ErrInvalidArgument, "ref must not be empty")
+	}
+	return p.push(ctx, wOpts.Desc, wOpts.Ref, true)
+}
+
 func (p dockerPusher) Push(ctx context.Context, desc ocispec.Descriptor) (content.Writer, error) {
+	return p.push(ctx, desc, remotes.MakeRefKey(ctx, desc), false)
+}
+
+func (p dockerPusher) push(ctx context.Context, desc ocispec.Descriptor, ref string, unavailableOnFail bool) (content.Writer, error) {
+	if l, ok := p.tracker.(StatusTrackLocker); ok {
+		l.Lock(ref)
+		defer l.Unlock(ref)
+	}
 	ctx, err := ContextWithRepositoryScope(ctx, p.refspec, true)
 	if err != nil {
 		return nil, err
 	}
-	ref := remotes.MakeRefKey(ctx, desc)
 	status, err := p.tracker.GetStatus(ref)
 	if err == nil {
-		if status.Offset == status.Total {
+		if status.Committed && status.Offset == status.Total {
 			return nil, errors.Wrapf(errdefs.ErrAlreadyExists, "ref %v", ref)
 		}
+		if unavailableOnFail {
+			// Another push of this ref is happening elsewhere. The rest of function
+			// will continue only when `errdefs.IsNotFound(err) == true` (i.e. there
+			// is no actively-tracked ref already).
+			return nil, errors.Wrap(errdefs.ErrUnavailable, "push is on-going")
+		}
 		// TODO: Handle incomplete status
 	} else if !errdefs.IsNotFound(err) {
 		return nil, errors.Wrap(err, "failed to get status")
@@ -105,8 +135,11 @@ func (p dockerPusher) Push(ctx context.Context, desc ocispec.Descriptor) (conten
 
 			if exists {
 				p.tracker.SetStatus(ref, Status{
+					Committed: true,
 					Status: content.Status{
-						Ref: ref,
+						Ref:    ref,
+						Total:  desc.Size,
+						Offset: desc.Size,
 						// TODO: Set updated time?
 					},
 				})
@@ -162,8 +195,11 @@ func (p dockerPusher) Push(ctx context.Context, desc ocispec.Descriptor) (conten
 		case http.StatusOK, http.StatusAccepted, http.StatusNoContent:
 		case http.StatusCreated:
 			p.tracker.SetStatus(ref, Status{
+				Committed: true,
 				Status: content.Status{
-					Ref: ref,
+					Ref:    ref,
+					Total:  desc.Size,
+					Offset: desc.Size,
 				},
 			})
 			return nil, errors.Wrapf(errdefs.ErrAlreadyExists, "content %v on remote", desc.Digest)
@@ -341,8 +377,6 @@ func (pw *pushWriter) Commit(ctx context.Context, size int64, expected digest.Di
 	if err := pw.pipe.Close(); err != nil {
 		return err
 	}
-	// TODO: Update status to determine committing
-
 	// TODO: timeout waiting for response
 	resp := <-pw.responseC
 	if resp.err != nil {
@@ -354,7 +388,7 @@ func (pw *pushWriter) Commit(ctx context.Context, size int64, expected digest.Di
 	switch resp.StatusCode {
 	case http.StatusOK, http.StatusCreated, http.StatusNoContent, http.StatusAccepted:
 	default:
-		return errors.Errorf("unexpected status: %s", resp.Status)
+		return remoteserrors.NewUnexpectedStatusErr(resp.Response)
 	}
 
 	status, err := pw.tracker.GetStatus(pw.ref)
@@ -379,6 +413,10 @@ func (pw *pushWriter) Commit(ctx context.Context, size int64, expected digest.Di
 		return errors.Errorf("got digest %s, expected %s", actual, expected)
 	}
 
+	status.Committed = true
+	status.UpdatedAt = time.Now()
+	pw.tracker.SetStatus(pw.ref, status)
+
 	return nil
 }
 

+ 37 - 4
vendor/github.com/containerd/containerd/remotes/docker/registry.go

@@ -17,7 +17,10 @@
 package docker
 
 import (
+	"net"
 	"net/http"
+
+	"github.com/pkg/errors"
 )
 
 // HostCapabilities represent the capabilities of the registry
@@ -56,6 +59,7 @@ const (
 	// Reserved for future capabilities (i.e. search, catalog, remove)
 )
 
+// Has checks whether the capabilities list has the provide capability
 func (c HostCapabilities) Has(t HostCapabilities) bool {
 	return c&t == t
 }
@@ -201,12 +205,41 @@ func MatchAllHosts(string) (bool, error) {
 
 // MatchLocalhost is a host match function which returns true for
 // localhost.
+//
+// Note: this does not handle matching of ip addresses in octal,
+// decimal or hex form.
 func MatchLocalhost(host string) (bool, error) {
-	for _, s := range []string{"localhost", "127.0.0.1", "[::1]"} {
-		if len(host) >= len(s) && host[0:len(s)] == s && (len(host) == len(s) || host[len(s)] == ':') {
-			return true, nil
+	switch {
+	case host == "::1":
+		return true, nil
+	case host == "[::1]":
+		return true, nil
+	}
+	h, p, err := net.SplitHostPort(host)
+
+	// addrError helps distinguish between errors of form
+	// "no colon in address" and "too many colons in address".
+	// The former is fine as the host string need not have a
+	// port. Latter needs to be handled.
+	addrError := &net.AddrError{
+		Err:  "missing port in address",
+		Addr: host,
+	}
+	if err != nil {
+		if err.Error() != addrError.Error() {
+			return false, err
 		}
+		// host string without any port specified
+		h = host
+	} else if len(p) == 0 {
+		return false, errors.New("invalid host name format")
+	}
+
+	// use ipv4 dotted decimal for further checking
+	if h == "localhost" {
+		h = "127.0.0.1"
 	}
-	return host == "::1", nil
+	ip := net.ParseIP(h)
 
+	return ip.IsLoopback(), nil
 }

+ 2 - 0
vendor/github.com/containerd/containerd/remotes/docker/resolver.go

@@ -286,12 +286,14 @@ func (r *dockerResolver) Resolve(ctx context.Context, ref string) (string, ocisp
 				if lastErr == nil {
 					lastErr = err
 				}
+				log.G(ctx).WithError(err).Info("trying next host")
 				continue // try another host
 			}
 			resp.Body.Close() // don't care about body contents.
 
 			if resp.StatusCode > 299 {
 				if resp.StatusCode == http.StatusNotFound {
+					log.G(ctx).Info("trying next host - response was http.StatusNotFound")
 					continue
 				}
 				return "", ocispec.Descriptor{}, errors.Errorf("unexpected status code %v: %v", u, resp.Status)

+ 21 - 1
vendor/github.com/containerd/containerd/remotes/docker/status.go

@@ -21,6 +21,7 @@ import (
 
 	"github.com/containerd/containerd/content"
 	"github.com/containerd/containerd/errdefs"
+	"github.com/moby/locker"
 	"github.com/pkg/errors"
 )
 
@@ -28,6 +29,8 @@ import (
 type Status struct {
 	content.Status
 
+	Committed bool
+
 	// UploadUUID is used by the Docker registry to reference blob uploads
 	UploadUUID string
 }
@@ -38,15 +41,24 @@ type StatusTracker interface {
 	SetStatus(string, Status)
 }
 
+// StatusTrackLocker to track status of operations with lock
+type StatusTrackLocker interface {
+	StatusTracker
+	Lock(string)
+	Unlock(string)
+}
+
 type memoryStatusTracker struct {
 	statuses map[string]Status
 	m        sync.Mutex
+	locker   *locker.Locker
 }
 
 // NewInMemoryTracker returns a StatusTracker that tracks content status in-memory
-func NewInMemoryTracker() StatusTracker {
+func NewInMemoryTracker() StatusTrackLocker {
 	return &memoryStatusTracker{
 		statuses: map[string]Status{},
+		locker:   locker.New(),
 	}
 }
 
@@ -65,3 +77,11 @@ func (t *memoryStatusTracker) SetStatus(ref string, status Status) {
 	t.statuses[ref] = status
 	t.m.Unlock()
 }
+
+func (t *memoryStatusTracker) Lock(ref string) {
+	t.locker.Lock(ref)
+}
+
+func (t *memoryStatusTracker) Unlock(ref string) {
+	t.locker.Unlock(ref)
+}

+ 14 - 4
vendor/github.com/containerd/containerd/remotes/errors/errors.go

@@ -27,9 +27,10 @@ var _ error = ErrUnexpectedStatus{}
 
 // ErrUnexpectedStatus is returned if a registry API request returned with unexpected HTTP status
 type ErrUnexpectedStatus struct {
-	Status     string
-	StatusCode int
-	Body       []byte
+	Status                    string
+	StatusCode                int
+	Body                      []byte
+	RequestURL, RequestMethod string
 }
 
 func (e ErrUnexpectedStatus) Error() string {
@@ -42,5 +43,14 @@ func NewUnexpectedStatusErr(resp *http.Response) error {
 	if resp.Body != nil {
 		b, _ = ioutil.ReadAll(io.LimitReader(resp.Body, 64000)) // 64KB
 	}
-	return ErrUnexpectedStatus{Status: resp.Status, StatusCode: resp.StatusCode, Body: b}
+	err := ErrUnexpectedStatus{
+		Body:          b,
+		Status:        resp.Status,
+		StatusCode:    resp.StatusCode,
+		RequestMethod: resp.Request.Method,
+	}
+	if resp.Request.URL != nil {
+		err.RequestURL = resp.Request.URL.String()
+	}
+	return err
 }

+ 32 - 9
vendor/github.com/containerd/containerd/remotes/handlers.go

@@ -31,6 +31,7 @@ import (
 	ocispec "github.com/opencontainers/image-spec/specs-go/v1"
 	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
+	"golang.org/x/sync/semaphore"
 )
 
 type refKeyPrefix struct{}
@@ -55,25 +56,32 @@ func WithMediaTypeKeyPrefix(ctx context.Context, mediaType, prefix string) conte
 // used to lookup ongoing processes related to the descriptor. This function
 // may look to the context to namespace the reference appropriately.
 func MakeRefKey(ctx context.Context, desc ocispec.Descriptor) string {
+	key := desc.Digest.String()
+	if desc.Annotations != nil {
+		if name, ok := desc.Annotations[ocispec.AnnotationRefName]; ok {
+			key = fmt.Sprintf("%s@%s", name, desc.Digest.String())
+		}
+	}
+
 	if v := ctx.Value(refKeyPrefix{}); v != nil {
 		values := v.(map[string]string)
 		if prefix := values[desc.MediaType]; prefix != "" {
-			return prefix + "-" + desc.Digest.String()
+			return prefix + "-" + key
 		}
 	}
 
 	switch mt := desc.MediaType; {
 	case mt == images.MediaTypeDockerSchema2Manifest || mt == ocispec.MediaTypeImageManifest:
-		return "manifest-" + desc.Digest.String()
+		return "manifest-" + key
 	case mt == images.MediaTypeDockerSchema2ManifestList || mt == ocispec.MediaTypeImageIndex:
-		return "index-" + desc.Digest.String()
+		return "index-" + key
 	case images.IsLayerType(mt):
-		return "layer-" + desc.Digest.String()
+		return "layer-" + key
 	case images.IsKnownConfig(mt):
-		return "config-" + desc.Digest.String()
+		return "config-" + key
 	default:
 		log.G(ctx).Warnf("reference for unknown type: %s", mt)
-		return "unknown-" + desc.Digest.String()
+		return "unknown-" + key
 	}
 }
 
@@ -115,6 +123,12 @@ func fetch(ctx context.Context, ingester content.Ingester, fetcher Fetcher, desc
 		return err
 	}
 
+	if desc.Size == 0 {
+		// most likely a poorly configured registry/web front end which responded with no
+		// Content-Length header; unable (not to mention useless) to commit a 0-length entry
+		// into the content store. Error out here otherwise the error sent back is confusing
+		return errors.Wrapf(errdefs.ErrInvalidArgument, "unable to fetch descriptor (%s) which reports content size of zero", desc.Digest)
+	}
 	if ws.Offset == desc.Size {
 		// If writer is already complete, commit and return
 		err := cw.Commit(ctx, desc.Size, desc.Digest)
@@ -151,7 +165,15 @@ func PushHandler(pusher Pusher, provider content.Provider) images.HandlerFunc {
 func push(ctx context.Context, provider content.Provider, pusher Pusher, desc ocispec.Descriptor) error {
 	log.G(ctx).Debug("push")
 
-	cw, err := pusher.Push(ctx, desc)
+	var (
+		cw  content.Writer
+		err error
+	)
+	if cs, ok := pusher.(content.Ingester); ok {
+		cw, err = content.OpenWriter(ctx, cs, content.WithRef(MakeRefKey(ctx, desc)), content.WithDescriptor(desc))
+	} else {
+		cw, err = pusher.Push(ctx, desc)
+	}
 	if err != nil {
 		if !errdefs.IsAlreadyExists(err) {
 			return err
@@ -175,7 +197,8 @@ func push(ctx context.Context, provider content.Provider, pusher Pusher, desc oc
 //
 // Base handlers can be provided which will be called before any push specific
 // handlers.
-func PushContent(ctx context.Context, pusher Pusher, desc ocispec.Descriptor, store content.Store, platform platforms.MatchComparer, wrapper func(h images.Handler) images.Handler) error {
+func PushContent(ctx context.Context, pusher Pusher, desc ocispec.Descriptor, store content.Store, limiter *semaphore.Weighted, platform platforms.MatchComparer, wrapper func(h images.Handler) images.Handler) error {
+
 	var m sync.Mutex
 	manifestStack := []ocispec.Descriptor{}
 
@@ -207,7 +230,7 @@ func PushContent(ctx context.Context, pusher Pusher, desc ocispec.Descriptor, st
 		handler = wrapper(handler)
 	}
 
-	if err := images.Dispatch(ctx, handler, nil, desc); err != nil {
+	if err := images.Dispatch(ctx, handler, limiter, desc); err != nil {
 		return err
 	}
 

+ 2 - 0
vendor/github.com/containerd/containerd/remotes/resolver.go

@@ -45,6 +45,8 @@ type Resolver interface {
 	Fetcher(ctx context.Context, ref string) (Fetcher, error)
 
 	// Pusher returns a new pusher for the provided reference
+	// The returned Pusher should satisfy content.Ingester and concurrent attempts
+	// to push the same blob using the Ingester API should result in ErrUnavailable.
 	Pusher(ctx context.Context, ref string) (Pusher, error)
 }
 

+ 2 - 2
vendor/github.com/containerd/containerd/rootfs/init.go

@@ -67,7 +67,7 @@ func InitRootFS(ctx context.Context, name string, parent digest.Digest, readonly
 	return snapshotter.Prepare(ctx, name, parentS)
 }
 
-func createInitLayer(ctx context.Context, parent, initName string, initFn func(string) error, snapshotter snapshots.Snapshotter, mounter Mounter) (string, error) {
+func createInitLayer(ctx context.Context, parent, initName string, initFn func(string) error, snapshotter snapshots.Snapshotter, mounter Mounter) (_ string, retErr error) {
 	initS := fmt.Sprintf("%s %s", parent, initName)
 	if _, err := snapshotter.Stat(ctx, initS); err == nil {
 		return initS, nil
@@ -87,7 +87,7 @@ func createInitLayer(ctx context.Context, parent, initName string, initFn func(s
 	}
 
 	defer func() {
-		if err != nil {
+		if retErr != nil {
 			if rerr := snapshotter.Remove(ctx, td); rerr != nil {
 				log.G(ctx).Errorf("Failed to remove snapshot %s: %v", td, rerr)
 			}

+ 5 - 5
vendor/github.com/containerd/containerd/runtime/runtime.go

@@ -63,14 +63,14 @@ type PlatformRuntime interface {
 	// ID of the runtime
 	ID() string
 	// Create creates a task with the provided id and options.
-	Create(ctx context.Context, id string, opts CreateOpts) (Task, error)
+	Create(ctx context.Context, taskID string, opts CreateOpts) (Task, error)
 	// Get returns a task.
-	Get(context.Context, string) (Task, error)
+	Get(ctx context.Context, taskID string) (Task, error)
 	// Tasks returns all the current tasks for the runtime.
 	// Any container runs at most one task at a time.
-	Tasks(context.Context, bool) ([]Task, error)
+	Tasks(ctx context.Context, all bool) ([]Task, error)
 	// Add adds a task into runtime.
-	Add(context.Context, Task) error
+	Add(ctx context.Context, task Task) error
 	// Delete remove a task.
-	Delete(context.Context, string)
+	Delete(ctx context.Context, taskID string)
 }

+ 17 - 17
vendor/github.com/containerd/containerd/runtime/task.go

@@ -36,19 +36,19 @@ type Process interface {
 	// ID of the process
 	ID() string
 	// State returns the process state
-	State(context.Context) (State, error)
+	State(ctx context.Context) (State, error)
 	// Kill signals a container
-	Kill(context.Context, uint32, bool) error
-	// Pty resizes the processes pty/console
-	ResizePty(context.Context, ConsoleSize) error
-	// CloseStdin closes the processes stdin
-	CloseIO(context.Context) error
+	Kill(ctx context.Context, signal uint32, all bool) error
+	// ResizePty resizes the processes pty/console
+	ResizePty(ctx context.Context, size ConsoleSize) error
+	// CloseIO closes the processes IO
+	CloseIO(ctx context.Context) error
 	// Start the container's user defined process
-	Start(context.Context) error
+	Start(ctx context.Context) error
 	// Wait for the process to exit
-	Wait(context.Context) (*Exit, error)
+	Wait(ctx context.Context) (*Exit, error)
 	// Delete deletes the process
-	Delete(context.Context) (*Exit, error)
+	Delete(ctx context.Context) (*Exit, error)
 }
 
 // Task is the runtime object for an executing container
@@ -60,21 +60,21 @@ type Task interface {
 	// Namespace that the task exists in
 	Namespace() string
 	// Pause pauses the container process
-	Pause(context.Context) error
+	Pause(ctx context.Context) error
 	// Resume unpauses the container process
-	Resume(context.Context) error
+	Resume(ctx context.Context) error
 	// Exec adds a process into the container
-	Exec(context.Context, string, ExecOpts) (Process, error)
+	Exec(ctx context.Context, id string, opts ExecOpts) (Process, error)
 	// Pids returns all pids
-	Pids(context.Context) ([]ProcessInfo, error)
+	Pids(ctx context.Context) ([]ProcessInfo, error)
 	// Checkpoint checkpoints a container to an image with live system data
-	Checkpoint(context.Context, string, *types.Any) error
+	Checkpoint(ctx context.Context, path string, opts *types.Any) error
 	// Update sets the provided resources to a running task
-	Update(context.Context, *types.Any, map[string]string) error
+	Update(ctx context.Context, resources *types.Any, annotations map[string]string) error
 	// Process returns a process within the task for the provided id
-	Process(context.Context, string) (Process, error)
+	Process(ctx context.Context, id string) (Process, error)
 	// Stats returns runtime specific metrics for a task
-	Stats(context.Context) (*types.Any, error)
+	Stats(ctx context.Context) (*types.Any, error)
 }
 
 // ExecOpts provides additional options for additional processes running in a task

+ 1 - 1
vendor/github.com/containerd/containerd/runtime/v1/linux/runtime.go

@@ -219,7 +219,7 @@ func (r *Runtime) Create(ctx context.Context, id string, opts runtime.CreateOpts
 				namespaces.WithNamespace(context.TODO(), namespace), cleanupTimeout)
 			defer deferCancel()
 			if kerr := s.KillShim(deferCtx); kerr != nil {
-				log.G(ctx).WithError(err).Error("failed to kill shim")
+				log.G(ctx).WithError(kerr).Error("failed to kill shim")
 			}
 		}
 	}()

+ 1 - 4
vendor/github.com/containerd/containerd/runtime/v1/shim/client/client.go

@@ -182,10 +182,7 @@ func setupOOMScore(shimPid int) error {
 		return errors.Wrap(err, "get daemon OOM score")
 	}
 	shimScore := score + 1
-	if shimScore > sys.OOMScoreAdjMax {
-		shimScore = sys.OOMScoreAdjMax
-	}
-	if err := sys.SetOOMScore(shimPid, shimScore); err != nil {
+	if err := sys.AdjustOOMScore(shimPid, shimScore); err != nil {
 		return errors.Wrap(err, "set shim OOM score")
 	}
 	return nil

+ 3 - 0
vendor/github.com/containerd/containerd/runtime/v1/shim/service.go

@@ -397,6 +397,9 @@ func (s *Service) ListPids(ctx context.Context, r *shimapi.ListPidsRequest) (*sh
 		return nil, errdefs.ToGRPC(err)
 	}
 	var processes []*task.ProcessInfo
+
+	s.mu.Lock()
+	defer s.mu.Unlock()
 	for _, pid := range pids {
 		pInfo := task.ProcessInfo{
 			Pid: pid,

+ 3 - 3
vendor/github.com/containerd/containerd/runtime/v1/shim/service_linux.go

@@ -26,7 +26,7 @@ import (
 
 	"github.com/containerd/console"
 	"github.com/containerd/containerd/namespaces"
-	"github.com/containerd/containerd/runtime"
+	"github.com/containerd/containerd/pkg/process"
 	"github.com/containerd/fifo"
 	"github.com/pkg/errors"
 )
@@ -75,14 +75,14 @@ func (p *linuxPlatform) CopyConsole(ctx context.Context, console console.Console
 			return nil, err
 		}
 
-		cmd := runtime.NewBinaryCmd(uri, id, ns)
+		cmd := process.NewBinaryCmd(uri, id, ns)
 
 		// In case of unexpected errors during logging binary start, close open pipes
 		var filesToClose []*os.File
 
 		defer func() {
 			if retErr != nil {
-				runtime.CloseFiles(filesToClose...)
+				process.CloseFiles(filesToClose...)
 			}
 		}()
 

+ 3 - 4
vendor/github.com/containerd/containerd/runtime/v1/shim/service_unix.go

@@ -28,7 +28,7 @@ import (
 
 	"github.com/containerd/console"
 	"github.com/containerd/containerd/namespaces"
-	"github.com/containerd/containerd/runtime"
+	"github.com/containerd/containerd/pkg/process"
 	"github.com/containerd/fifo"
 	"github.com/pkg/errors"
 )
@@ -63,15 +63,14 @@ func (p *unixPlatform) CopyConsole(ctx context.Context, console console.Console,
 		if err != nil {
 			return nil, err
 		}
-
-		cmd := runtime.NewBinaryCmd(uri, id, ns)
+		cmd := process.NewBinaryCmd(uri, id, ns)
 
 		// In case of unexpected errors during logging binary start, close open pipes
 		var filesToClose []*os.File
 
 		defer func() {
 			if retErr != nil {
-				runtime.CloseFiles(filesToClose...)
+				process.CloseFiles(filesToClose...)
 			}
 		}()
 

+ 2 - 0
vendor/github.com/containerd/containerd/services/introspection/introspection.go

@@ -25,6 +25,7 @@ import (
 	ptypes "github.com/gogo/protobuf/types"
 )
 
+// Service defines the instrospection service interface
 type Service interface {
 	Plugins(context.Context, []string) (*api.PluginsResponse, error)
 	Server(context.Context, *ptypes.Empty) (*api.ServerResponse, error)
@@ -36,6 +37,7 @@ type introspectionRemote struct {
 
 var _ = (Service)(&introspectionRemote{})
 
+// NewIntrospectionServiceFromClient creates a new introspection service from an API client
 func NewIntrospectionServiceFromClient(c api.IntrospectionClient) Service {
 	return &introspectionRemote{client: c}
 }

+ 4 - 0
vendor/github.com/containerd/containerd/services/introspection/local.go

@@ -54,6 +54,7 @@ func init() {
 	})
 }
 
+// Local is a local implementation of the introspection service
 type Local struct {
 	mu      sync.Mutex
 	plugins []api.Plugin
@@ -62,6 +63,7 @@ type Local struct {
 
 var _ = (api.IntrospectionClient)(&Local{})
 
+// UpdateLocal updates the local introspection service
 func (l *Local) UpdateLocal(root string, plugins []api.Plugin) {
 	l.mu.Lock()
 	defer l.mu.Unlock()
@@ -69,6 +71,7 @@ func (l *Local) UpdateLocal(root string, plugins []api.Plugin) {
 	l.plugins = plugins
 }
 
+// Plugins returns the locally defined plugins
 func (l *Local) Plugins(ctx context.Context, req *api.PluginsRequest, _ ...grpc.CallOption) (*api.PluginsResponse, error) {
 	filter, err := filters.ParseAll(req.Filters...)
 	if err != nil {
@@ -96,6 +99,7 @@ func (l *Local) getPlugins() []api.Plugin {
 	return l.plugins
 }
 
+// Server returns the local server information
 func (l *Local) Server(ctx context.Context, _ *ptypes.Empty, _ ...grpc.CallOption) (*api.ServerResponse, error) {
 	u, err := l.getUUID()
 	if err != nil {

+ 20 - 7
vendor/github.com/containerd/containerd/services/server/config/config.go

@@ -20,9 +20,10 @@ import (
 	"path/filepath"
 	"strings"
 
-	"github.com/BurntSushi/toml"
 	"github.com/imdario/mergo"
+	"github.com/pelletier/go-toml"
 	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
 
 	"github.com/containerd/containerd/errdefs"
 	"github.com/containerd/containerd/plugin"
@@ -55,7 +56,7 @@ type Config struct {
 	// required plugin doesn't exist or fails to be initialized or started.
 	RequiredPlugins []string `toml:"required_plugins"`
 	// Plugins provides plugin specific configuration for the initialization of a plugin
-	Plugins map[string]toml.Primitive `toml:"plugins"`
+	Plugins map[string]toml.Tree `toml:"plugins"`
 	// OOMScore adjust the containerd's oom score
 	OOMScore int `toml:"oom_score"`
 	// Cgroup specifies cgroup information for the containerd daemon process
@@ -94,7 +95,9 @@ func (c *Config) GetVersion() int {
 
 // ValidateV2 validates the config for a v2 file
 func (c *Config) ValidateV2() error {
-	if c.GetVersion() != 2 {
+	version := c.GetVersion()
+	if version < 2 {
+		logrus.Warnf("deprecated version : `%d`, please switch to version `2`", version)
 		return nil
 	}
 	for _, p := range c.DisabledPlugins {
@@ -209,7 +212,7 @@ func (c *Config) Decode(p *plugin.Registration) (interface{}, error) {
 	if !ok {
 		return p.Config, nil
 	}
-	if err := toml.PrimitiveDecode(data, p.Config); err != nil {
+	if err := data.Unmarshal(p.Config); err != nil {
 		return nil, err
 	}
 	return p.Config, nil
@@ -258,16 +261,26 @@ func LoadConfig(path string, out *Config) error {
 		out.Imports = append(out.Imports, path)
 	}
 
-	return out.ValidateV2()
+	err := out.ValidateV2()
+	if err != nil {
+		return errors.Wrapf(err, "failed to load TOML from %s", path)
+	}
+	return nil
 }
 
 // loadConfigFile decodes a TOML file at the given path
 func loadConfigFile(path string) (*Config, error) {
 	config := &Config{}
-	_, err := toml.DecodeFile(path, &config)
+
+	file, err := toml.LoadFile(path)
 	if err != nil {
-		return nil, err
+		return nil, errors.Wrapf(err, "failed to load TOML: %s", path)
+	}
+
+	if err := file.Unmarshal(config); err != nil {
+		return nil, errors.Wrap(err, "failed to unmarshal TOML")
 	}
+
 	return config, nil
 }
 

+ 2 - 1
vendor/github.com/containerd/containerd/snapshots/snapshotter.go

@@ -28,7 +28,8 @@ import (
 const (
 	// UnpackKeyPrefix is the beginning of the key format used for snapshots that will have
 	// image content unpacked into them.
-	UnpackKeyPrefix       = "extract"
+	UnpackKeyPrefix = "extract"
+	// UnpackKeyFormat is the format for the snapshotter keys used for extraction
 	UnpackKeyFormat       = UnpackKeyPrefix + "-%s %s"
 	inheritedLabelsPrefix = "containerd.io/snapshot/"
 	labelSnapshotRef      = "containerd.io/snapshot.ref"

+ 66 - 4
vendor/github.com/containerd/containerd/sys/filesys_windows.go

@@ -22,11 +22,14 @@ import (
 	"os"
 	"path/filepath"
 	"regexp"
+	"sort"
+	"strconv"
 	"strings"
 	"syscall"
 	"unsafe"
 
 	"github.com/Microsoft/hcsshim"
+	"github.com/pkg/errors"
 	"golang.org/x/sys/windows"
 )
 
@@ -257,12 +260,71 @@ func windowsOpenSequential(path string, mode int, _ uint32) (fd windows.Handle,
 	return h, e
 }
 
-// ForceRemoveAll is the same as os.RemoveAll, but uses hcsshim.DestroyLayer in order
-// to delete container layers.
+// ForceRemoveAll is the same as os.RemoveAll, but is aware of io.containerd.snapshotter.v1.windows
+// and uses hcsshim to unmount and delete container layers contained therein, in the correct order,
+// when passed a containerd root data directory (i.e. the `--root` directory for containerd).
 func ForceRemoveAll(path string) error {
+	// snapshots/windows/windows.go init()
+	const snapshotPlugin = "io.containerd.snapshotter.v1" + "." + "windows"
+	// snapshots/windows/windows.go NewSnapshotter()
+	snapshotDir := filepath.Join(path, snapshotPlugin, "snapshots")
+	if stat, err := os.Stat(snapshotDir); err == nil && stat.IsDir() {
+		if err := cleanupWCOWLayers(snapshotDir); err != nil {
+			return errors.Wrapf(err, "failed to cleanup WCOW layers in %s", snapshotDir)
+		}
+	}
+
+	return os.RemoveAll(path)
+}
+
+func cleanupWCOWLayers(root string) error {
+	// See snapshots/windows/windows.go getSnapshotDir()
+	var layerNums []int
+	if err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
+		if path != root && info.IsDir() {
+			if layerNum, err := strconv.Atoi(filepath.Base(path)); err == nil {
+				layerNums = append(layerNums, layerNum)
+			} else {
+				return err
+			}
+			return filepath.SkipDir
+		}
+
+		return nil
+	}); err != nil {
+		return err
+	}
+
+	sort.Sort(sort.Reverse(sort.IntSlice(layerNums)))
+
+	for _, layerNum := range layerNums {
+		if err := cleanupWCOWLayer(filepath.Join(root, strconv.Itoa(layerNum))); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func cleanupWCOWLayer(layerPath string) error {
 	info := hcsshim.DriverInfo{
-		HomeDir: filepath.Dir(path),
+		HomeDir: filepath.Dir(layerPath),
 	}
 
-	return hcsshim.DestroyLayer(info, filepath.Base(path))
+	// ERROR_DEV_NOT_EXIST is returned if the layer is not currently prepared.
+	if err := hcsshim.UnprepareLayer(info, filepath.Base(layerPath)); err != nil {
+		if hcserror, ok := err.(*hcsshim.HcsError); !ok || hcserror.Err != windows.ERROR_DEV_NOT_EXIST {
+			return errors.Wrapf(err, "failed to unprepare %s", layerPath)
+		}
+	}
+
+	if err := hcsshim.DeactivateLayer(info, filepath.Base(layerPath)); err != nil {
+		return errors.Wrapf(err, "failed to deactivate %s", layerPath)
+	}
+
+	if err := hcsshim.DestroyLayer(info, filepath.Base(layerPath)); err != nil {
+		return errors.Wrapf(err, "failed to destroy %s", layerPath)
+	}
+
+	return nil
 }

+ 29 - 7
vendor/github.com/containerd/containerd/sys/oom_unix.go → vendor/github.com/containerd/containerd/sys/oom_linux.go

@@ -1,5 +1,3 @@
-// +build !windows
-
 /*
    Copyright The containerd Authors.
 
@@ -24,17 +22,34 @@ import (
 	"os"
 	"strconv"
 	"strings"
+
+	"github.com/containerd/containerd/pkg/userns"
+	"golang.org/x/sys/unix"
 )
 
 const (
-	// OOMScoreMaxKillable is the maximum score keeping the process killable by the oom killer
-	OOMScoreMaxKillable = -999
-	// OOMScoreAdjMax is from OOM_SCORE_ADJ_MAX https://github.com/torvalds/linux/blob/master/include/uapi/linux/oom.h
+	// OOMScoreAdjMin is from OOM_SCORE_ADJ_MIN https://github.com/torvalds/linux/blob/v5.10/include/uapi/linux/oom.h#L9
+	OOMScoreAdjMin = -1000
+	// OOMScoreAdjMax is from OOM_SCORE_ADJ_MAX https://github.com/torvalds/linux/blob/v5.10/include/uapi/linux/oom.h#L10
 	OOMScoreAdjMax = 1000
 )
 
+// AdjustOOMScore sets the oom score for the provided pid. If the provided score
+// is out of range (-1000 - 1000), it is clipped to the min/max value.
+func AdjustOOMScore(pid, score int) error {
+	if score > OOMScoreAdjMax {
+		score = OOMScoreAdjMax
+	} else if score < OOMScoreAdjMin {
+		score = OOMScoreAdjMin
+	}
+	return SetOOMScore(pid, score)
+}
+
 // SetOOMScore sets the oom score for the provided pid
 func SetOOMScore(pid, score int) error {
+	if score > OOMScoreAdjMax || score < OOMScoreAdjMin {
+		return fmt.Errorf("value out of range (%d): OOM score must be between %d and %d", score, OOMScoreAdjMin, OOMScoreAdjMax)
+	}
 	path := fmt.Sprintf("/proc/%d/oom_score_adj", pid)
 	f, err := os.OpenFile(path, os.O_WRONLY, 0)
 	if err != nil {
@@ -42,7 +57,7 @@ func SetOOMScore(pid, score int) error {
 	}
 	defer f.Close()
 	if _, err = f.WriteString(strconv.Itoa(score)); err != nil {
-		if os.IsPermission(err) && (RunningInUserNS() || RunningUnprivileged()) {
+		if os.IsPermission(err) && (!runningPrivileged() || userns.RunningInUserNS()) {
 			return nil
 		}
 		return err
@@ -50,7 +65,8 @@ func SetOOMScore(pid, score int) error {
 	return nil
 }
 
-// GetOOMScoreAdj gets the oom score for a process
+// GetOOMScoreAdj gets the oom score for a process. It returns 0 (zero) if either
+// no oom score is set, or a sore is set to 0.
 func GetOOMScoreAdj(pid int) (int, error) {
 	path := fmt.Sprintf("/proc/%d/oom_score_adj", pid)
 	data, err := ioutil.ReadFile(path)
@@ -59,3 +75,9 @@ func GetOOMScoreAdj(pid int) (int, error) {
 	}
 	return strconv.Atoi(strings.TrimSpace(string(data)))
 }
+
+// runningPrivileged returns true if the effective user ID of the
+// calling process is 0
+func runningPrivileged() bool {
+	return unix.Geteuid() == 0
+}

+ 13 - 1
vendor/github.com/containerd/containerd/sys/oom_windows.go → vendor/github.com/containerd/containerd/sys/oom_unsupported.go

@@ -1,3 +1,5 @@
+// +build !linux
+
 /*
    Copyright The containerd Authors.
 
@@ -17,10 +19,20 @@
 package sys
 
 const (
-	// OOMScoreAdjMax is not implemented on Windows
+	// OOMScoreMaxKillable is not implemented on non Linux
+	OOMScoreMaxKillable = 0
+	// OOMScoreAdjMax is not implemented on non Linux
 	OOMScoreAdjMax = 0
 )
 
+// AdjustOOMScore sets the oom score for the provided pid. If the provided score
+// is out of range (-1000 - 1000), it is clipped to the min/max value.
+//
+// Not implemented on Windows
+func AdjustOOMScore(pid, score int) error {
+	return nil
+}
+
 // SetOOMScore sets the oom score for the process
 //
 // Not implemented on Windows

+ 4 - 14
vendor/github.com/containerd/containerd/sys/env.go → vendor/github.com/containerd/containerd/sys/userns_deprecated.go

@@ -1,5 +1,3 @@
-// +build !windows
-
 /*
    Copyright The containerd Authors.
 
@@ -18,16 +16,8 @@
 
 package sys
 
-import "golang.org/x/sys/unix"
-
-// RunningPrivileged returns true if the effective user ID of the
-// calling process is 0
-func RunningPrivileged() bool {
-	return unix.Geteuid() == 0
-}
+import "github.com/containerd/containerd/pkg/userns"
 
-// RunningUnprivileged returns true if the effective user ID of the
-// calling process is not 0
-func RunningUnprivileged() bool {
-	return !RunningPrivileged()
-}
+// RunningInUserNS detects whether we are currently running in a user namespace.
+// Deprecated: use github.com/containerd/containerd/pkg/userns.RunningInUserNS instead.
+var RunningInUserNS = userns.RunningInUserNS

+ 12 - 3
vendor/github.com/containerd/containerd/task.go

@@ -451,11 +451,20 @@ func (t *task) Checkpoint(ctx context.Context, opts ...CheckpointTaskOpts) (Imag
 		}
 		request.Options = any
 	}
-	// make sure we pause it and resume after all other filesystem operations are completed
-	if err := t.Pause(ctx); err != nil {
+
+	status, err := t.Status(ctx)
+	if err != nil {
 		return nil, err
 	}
-	defer t.Resume(ctx)
+
+	if status.Status != Paused {
+		// make sure we pause it and resume after all other filesystem operations are completed
+		if err := t.Pause(ctx); err != nil {
+			return nil, err
+		}
+		defer t.Resume(ctx)
+	}
+
 	index := v1.Index{
 		Versioned: is.Versioned{
 			SchemaVersion: 2,

+ 1 - 1
vendor/github.com/containerd/containerd/version/version.go

@@ -23,7 +23,7 @@ var (
 	Package = "github.com/containerd/containerd"
 
 	// Version holds the complete version number. Filled in at linking time.
-	Version = "1.5.0-beta.0+unknown"
+	Version = "1.5.2+unknown"
 
 	// Revision is filled with the VCS (e.g. git) revision being used to build
 	// the program at linking time.

+ 4 - 0
vendor/github.com/containerd/continuity/README.md

@@ -63,6 +63,10 @@ $ stat -c %a Makefile
 $ ./bin/continuity verify . /tmp/a.pb
 ```
 
+## Platforms
+
+continuity primarily targets Linux.  continuity may compile for and work on
+other operating systems, but those platforms are not tested.
 
 ## Contribution Guide
 ### Building Proto Package

+ 1 - 1
vendor/github.com/containerd/continuity/devices/devices_unix.go

@@ -56,7 +56,7 @@ func Mknod(p string, mode os.FileMode, maj, min int) error {
 		m |= unix.S_IFIFO
 	}
 
-	return unix.Mknod(p, m, int(dev))
+	return mknod(p, m, dev)
 }
 
 // syscallMode returns the syscall-specific mode bits from Go's portable mode bits.

+ 5 - 5
vendor/github.com/containerd/fifo/mkfifo_nosolaris.go → vendor/github.com/containerd/continuity/devices/mknod_freebsd.go

@@ -1,4 +1,4 @@
-// +build !solaris
+// +build freebsd
 
 /*
    Copyright The containerd Authors.
@@ -16,10 +16,10 @@
    limitations under the License.
 */
 
-package fifo
+package devices
 
-import "syscall"
+import "golang.org/x/sys/unix"
 
-func mkfifo(path string, mode uint32) (err error) {
-	return syscall.Mkfifo(path, mode)
+func mknod(path string, mode uint32, dev uint64) (err error) {
+	return unix.Mknod(path, mode, dev)
 }

+ 5 - 7
vendor/github.com/containerd/fifo/mkfifo_solaris.go → vendor/github.com/containerd/continuity/devices/mknod_unix.go

@@ -1,4 +1,4 @@
-// +build solaris
+// +build linux darwin solaris
 
 /*
    Copyright The containerd Authors.
@@ -16,12 +16,10 @@
    limitations under the License.
 */
 
-package fifo
+package devices
 
-import (
-	"golang.org/x/sys/unix"
-)
+import "golang.org/x/sys/unix"
 
-func mkfifo(path string, mode uint32) (err error) {
-	return unix.Mkfifo(path, mode)
+func mknod(path string, mode uint32, dev uint64) (err error) {
+	return unix.Mknod(path, mode, int(dev))
 }

+ 17 - 2
vendor/github.com/containerd/continuity/fs/copy.go

@@ -39,6 +39,8 @@ type XAttrErrorHandler func(dst, src, xattrKey string, err error) error
 
 type copyDirOpts struct {
 	xeh XAttrErrorHandler
+	// xex contains a set of xattrs to exclude when copying
+	xex map[string]struct{}
 }
 
 type CopyDirOpt func(*copyDirOpts) error
@@ -61,6 +63,19 @@ func WithAllowXAttrErrors() CopyDirOpt {
 	return WithXAttrErrorHandler(xeh)
 }
 
+// WithXAttrExclude allows for exclusion of specified xattr during CopyDir operation.
+func WithXAttrExclude(keys ...string) CopyDirOpt {
+	return func(o *copyDirOpts) error {
+		if o.xex == nil {
+			o.xex = make(map[string]struct{}, len(keys))
+		}
+		for _, key := range keys {
+			o.xex[key] = struct{}{}
+		}
+		return nil
+	}
+}
+
 // CopyDir copies the directory from src to dst.
 // Most efficient copy of files is attempted.
 func CopyDir(dst, src string, opts ...CopyDirOpt) error {
@@ -104,7 +119,7 @@ func copyDirectory(dst, src string, inodes map[uint64]string, o *copyDirOpts) er
 		return errors.Wrapf(err, "failed to copy file info for %s", dst)
 	}
 
-	if err := copyXAttrs(dst, src, o.xeh); err != nil {
+	if err := copyXAttrs(dst, src, o.xex, o.xeh); err != nil {
 		return errors.Wrap(err, "failed to copy xattrs")
 	}
 
@@ -150,7 +165,7 @@ func copyDirectory(dst, src string, inodes map[uint64]string, o *copyDirOpts) er
 			return errors.Wrap(err, "failed to copy file info")
 		}
 
-		if err := copyXAttrs(target, source, o.xeh); err != nil {
+		if err := copyXAttrs(target, source, o.xex, o.xeh); err != nil {
 			return errors.Wrap(err, "failed to copy xattrs")
 		}
 	}

+ 40 - 0
vendor/github.com/containerd/continuity/fs/copy_darwinopenbsdsolaris.go

@@ -0,0 +1,40 @@
+// +build darwin openbsd solaris
+
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package fs
+
+import (
+	"os"
+	"syscall"
+
+	"github.com/pkg/errors"
+	"golang.org/x/sys/unix"
+)
+
+func copyDevice(dst string, fi os.FileInfo) error {
+	st, ok := fi.Sys().(*syscall.Stat_t)
+	if !ok {
+		return errors.New("unsupported stat type")
+	}
+	return unix.Mknod(dst, uint32(fi.Mode()), int(st.Rdev))
+}
+
+func utimesNano(name string, atime, mtime syscall.Timespec) error {
+	timespec := []syscall.Timespec{atime, mtime}
+	return syscall.UtimesNano(name, timespec)
+}

+ 42 - 0
vendor/github.com/containerd/continuity/fs/copy_freebsd.go

@@ -0,0 +1,42 @@
+// +build freebsd
+
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package fs
+
+import (
+	"os"
+	"syscall"
+
+	"github.com/pkg/errors"
+	"golang.org/x/sys/unix"
+)
+
+func copyDevice(dst string, fi os.FileInfo) error {
+	st, ok := fi.Sys().(*syscall.Stat_t)
+	if !ok {
+		return errors.New("unsupported stat type")
+	}
+	return unix.Mknod(dst, uint32(fi.Mode()), st.Rdev)
+}
+
+func utimesNano(name string, atime, mtime syscall.Timespec) error {
+	at := unix.NsecToTimespec(atime.Nano())
+	mt := unix.NsecToTimespec(mtime.Nano())
+	utimes := [2]unix.Timespec{at, mt}
+	return unix.UtimesNanoAt(unix.AT_FDCWD, name, utimes[0:], unix.AT_SYMLINK_NOFOLLOW)
+}

Some files were not shown because too many files changed in this diff