vendor: github.com/opencontainers/runc v1.2.0-rc.1

full diff: https://github.com/opencontainers/runc/compare/v1.1.12...v1.2.0-rc.1 Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2024-04-03 14:25:31 +02:00 · 2024-04-03 14:25:31 +02:00 · 2dc62475f8
commit 2dc62475f8
parent 5890b67b7f
29 changed files with 744 additions and 1086 deletions
--- a/vendor.mod
+++ b/vendor.mod
@ -77,7 +77,7 @@ require (
 	github.com/morikuni/aec v1.0.0
 	github.com/opencontainers/go-digest v1.0.0
 	github.com/opencontainers/image-spec v1.1.0-rc5
-	github.com/opencontainers/runc v1.1.12
+	github.com/opencontainers/runc v1.2.0-rc.1
 	github.com/opencontainers/runtime-spec v1.2.0
 	github.com/opencontainers/selinux v1.11.0
 	github.com/pelletier/go-toml v1.9.5
--- a/vendor.sum
+++ b/vendor.sum
@ -539,8 +539,8 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8
 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
 github.com/opencontainers/image-spec v1.1.0-rc5 h1:Ygwkfw9bpDvs+c9E34SdgGOj41dX/cbdlwvlWt0pnFI=
 github.com/opencontainers/image-spec v1.1.0-rc5/go.mod h1:X4pATf0uXsnn3g5aiGIsVnJBR4mxhKzfwmvK/B2NTm8=
-github.com/opencontainers/runc v1.1.12 h1:BOIssBaW1La0/qbNZHXOOa71dZfZEQOzW7dqQf3phss=
-github.com/opencontainers/runc v1.1.12/go.mod h1:S+lQwSfncpBha7XTy/5lBwWgm5+y5Ma/O44Ekby9FK8=
+github.com/opencontainers/runc v1.2.0-rc.1 h1:SMjop2pxxYRTfKdsigna/8xRoaoCfIQfD2cVuOb64/o=
+github.com/opencontainers/runc v1.2.0-rc.1/go.mod h1:m9JwxfHzXz5YTTXBQr7EY9KTuazFAGPyMQx2nRR3vTw=
 github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
 github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk=
 github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
--- a/vendor/github.com/opencontainers/runc/NOTICE
+++ b/vendor/github.com/opencontainers/runc/NOTICE
@ -8,9 +8,9 @@ The following is courtesy of our legal counsel:


 Use and transfer of Docker may be subject to certain restrictions by the
-United States and other governments.  
+United States and other governments.
 It is your responsibility to ensure that your use and/or transfer does not
-violate applicable laws. 
+violate applicable laws.

 For more information, please see http://www.bis.doc.gov

--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
@ -1,9 +1,24 @@
 package cgroups

 import (
+	"errors"
+
 	"github.com/opencontainers/runc/libcontainer/configs"
 )

+var (
+	// ErrDevicesUnsupported is an error returned when a cgroup manager
+	// is not configured to set device rules.
+	ErrDevicesUnsupported = errors.New("cgroup manager is not configured to set device rules")
+
+	// DevicesSetV1 and DevicesSetV2 are functions to set devices for
+	// cgroup v1 and v2, respectively. Unless libcontainer/cgroups/devices
+	// package is imported, it is set to nil, so cgroup managers can't
+	// manage devices.
+	DevicesSetV1 func(path string, r *configs.Resources) error
+	DevicesSetV2 func(path string, r *configs.Resources) error
+)
+
 type Manager interface {
 	// Apply creates a cgroup, if not yet created, and adds a process
 	// with the specified pid into that cgroup.  A special value of -1
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go
@ -50,24 +50,13 @@ func WriteFile(dir, file, data string) error {
 		return err
 	}
 	defer fd.Close()
-	if err := retryingWriteFile(fd, data); err != nil {
+	if _, err := fd.WriteString(data); err != nil {
 		// Having data in the error message helps in debugging.
 		return fmt.Errorf("failed to write %q: %w", data, err)
 	}
 	return nil
 }

-func retryingWriteFile(fd *os.File, data string) error {
-	for {
-		_, err := fd.Write([]byte(data))
-		if errors.Is(err, unix.EINTR) {
-			logrus.Infof("interrupted while writing %s to %s", data, fd.Name())
-			continue
-		}
-		return err
-	}
-}
-
 const (
 	cgroupfsDir    = "/sys/fs/cgroup"
 	cgroupfsPrefix = cgroupfsDir + "/"
@ -90,7 +79,7 @@ func prepareOpenat2() error {
 		})
 		if err != nil {
 			prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
-			if err != unix.ENOSYS { //nolint:errorlint // unix errors are bare
+			if err != unix.ENOSYS {
 				logrus.Warnf("falling back to securejoin: %s", prepErr)
 			} else {
 				logrus.Debug("openat2 not available, falling back to securejoin")
@ -148,8 +137,9 @@ func openFile(dir, file string, flags int) (*os.File, error) {
 		//
 		// TODO: if such usage will ever be common, amend this
 		// to reopen cgroupRootHandle and retry openat2.
-		fdStr := strconv.Itoa(int(cgroupRootHandle.Fd()))
-		fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr)
+		fdPath, closer := utils.ProcThreadSelf("fd/" + strconv.Itoa(int(cgroupRootHandle.Fd())))
+		defer closer()
+		fdDest, _ := os.Readlink(fdPath)
 		if fdDest != cgroupfsDir {
 			// Wrap the error so it is clear that cgroupRootHandle
 			// is opened to an unexpected/wrong directory.
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
@ -32,9 +32,22 @@ type CpuUsage struct {
 	UsageInUsermode uint64 `json:"usage_in_usermode"`
 }

+type PSIData struct {
+	Avg10  float64 `json:"avg10"`
+	Avg60  float64 `json:"avg60"`
+	Avg300 float64 `json:"avg300"`
+	Total  uint64  `json:"total"`
+}
+
+type PSIStats struct {
+	Some PSIData `json:"some,omitempty"`
+	Full PSIData `json:"full,omitempty"`
+}
+
 type CpuStats struct {
 	CpuUsage       CpuUsage       `json:"cpu_usage,omitempty"`
 	ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
+	PSI            *PSIStats      `json:"psi,omitempty"`
 }

 type CPUSetStats struct {
@ -91,6 +104,7 @@ type MemoryStats struct {
 	UseHierarchy bool `json:"use_hierarchy"`

 	Stats map[string]uint64 `json:"stats,omitempty"`
+	PSI   *PSIStats         `json:"psi,omitempty"`
 }

 type PageUsageByNUMA struct {
@ -135,6 +149,7 @@ type BlkioStats struct {
 	IoMergedRecursive       []BlkioStatEntry `json:"io_merged_recursive,omitempty"`
 	IoTimeRecursive         []BlkioStatEntry `json:"io_time_recursive,omitempty"`
 	SectorsRecursive        []BlkioStatEntry `json:"sectors_recursive,omitempty"`
+	PSI                     *PSIStats        `json:"psi,omitempty"`
 }

 type HugetlbStats struct {
@ -157,6 +172,13 @@ type RdmaStats struct {
 	RdmaCurrent []RdmaEntry `json:"rdma_current,omitempty"`
 }

+type MiscStats struct {
+	// current resource usage for a key in misc
+	Usage uint64 `json:"usage,omitempty"`
+	// number of times the resource usage was about to go over the max boundary
+	Events uint64 `json:"events,omitempty"`
+}
+
 type Stats struct {
 	CpuStats    CpuStats    `json:"cpu_stats,omitempty"`
 	CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"`
@ -166,10 +188,13 @@ type Stats struct {
 	// the map is in the format "size of hugepage: stats of the hugepage"
 	HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
 	RdmaStats    RdmaStats               `json:"rdma_stats,omitempty"`
+	// the map is in the format "misc resource name: stats of the key"
+	MiscStats map[string]MiscStats `json:"misc_stats,omitempty"`
 }

 func NewStats() *Stats {
 	memoryStats := MemoryStats{Stats: make(map[string]uint64)}
 	hugetlbStats := make(map[string]HugetlbStats)
-	return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats}
+	miscStats := make(map[string]MiscStats)
+	return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats, MiscStats: miscStats}
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
@ -36,13 +36,13 @@ func IsCgroup2UnifiedMode() bool {
 		var st unix.Statfs_t
 		err := unix.Statfs(unifiedMountpoint, &st)
 		if err != nil {
+			level := logrus.WarnLevel
 			if os.IsNotExist(err) && userns.RunningInUserNS() {
-				// ignore the "not found" error if running in userns
-				logrus.WithError(err).Debugf("%s missing, assuming cgroup v1", unifiedMountpoint)
-				isUnified = false
-				return
+				// For rootless containers, sweep it under the rug.
+				level = logrus.DebugLevel
 			}
-			panic(fmt.Sprintf("cannot statfs cgroup root: %s", err))
+			logrus.StandardLogger().Logf(level,
+				"statfs %s: %v; assuming cgroup v1", unifiedMountpoint, err)
 		}
 		isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC
 	})
@ -217,21 +217,26 @@ func PathExists(path string) bool {
 	return true
 }

-func EnterPid(cgroupPaths map[string]string, pid int) error {
-	for _, path := range cgroupPaths {
-		if PathExists(path) {
-			if err := WriteCgroupProc(path, pid); err != nil {
-				return err
-			}
-		}
-	}
-	return nil
-}
+// rmdir tries to remove a directory, optionally retrying on EBUSY.
+func rmdir(path string, retry bool) error {
+	delay := time.Millisecond
+	tries := 10

-func rmdir(path string) error {
+again:
 	err := unix.Rmdir(path)
-	if err == nil || err == unix.ENOENT { //nolint:errorlint // unix errors are bare
+	switch err { // nolint:errorlint // unix errors are bare
+	case nil, unix.ENOENT:
 		return nil
+	case unix.EINTR:
+		goto again
+	case unix.EBUSY:
+		if retry && tries > 0 {
+			time.Sleep(delay)
+			delay *= 2
+			tries--
+			goto again
+
+		}
 	}
 	return &os.PathError{Op: "rmdir", Path: path, Err: err}
 }
@ -239,68 +244,42 @@ func rmdir(path string) error {
 // RemovePath aims to remove cgroup path. It does so recursively,
 // by removing any subdirectories (sub-cgroups) first.
 func RemovePath(path string) error {
-	// try the fast path first
-	if err := rmdir(path); err == nil {
+	// Try the fast path first.
+	if err := rmdir(path, false); err == nil {
 		return nil
 	}

 	infos, err := os.ReadDir(path)
-	if err != nil {
-		if os.IsNotExist(err) {
-			err = nil
-		}
+	if err != nil && !os.IsNotExist(err) {
 		return err
 	}
 	for _, info := range infos {
 		if info.IsDir() {
-			// We should remove subcgroups dir first
+			// We should remove subcgroup first.
 			if err = RemovePath(filepath.Join(path, info.Name())); err != nil {
 				break
 			}
 		}
 	}
 	if err == nil {
-		err = rmdir(path)
+		err = rmdir(path, true)
 	}
 	return err
 }

 // RemovePaths iterates over the provided paths removing them.
-// We trying to remove all paths five times with increasing delay between tries.
-// If after all there are not removed cgroups - appropriate error will be
-// returned.
 func RemovePaths(paths map[string]string) (err error) {
-	const retries = 5
-	delay := 10 * time.Millisecond
-	for i := 0; i < retries; i++ {
-		if i != 0 {
-			time.Sleep(delay)
-			delay *= 2
-		}
-		for s, p := range paths {
-			if err := RemovePath(p); err != nil {
-				// do not log intermediate iterations
-				switch i {
-				case 0:
-					logrus.WithError(err).Warnf("Failed to remove cgroup (will retry)")
-				case retries - 1:
-					logrus.WithError(err).Error("Failed to remove cgroup")
-				}
-			}
-			_, err := os.Stat(p)
-			// We need this strange way of checking cgroups existence because
-			// RemoveAll almost always returns error, even on already removed
-			// cgroups
-			if os.IsNotExist(err) {
-				delete(paths, s)
-			}
-		}
-		if len(paths) == 0 {
-			//nolint:ineffassign,staticcheck // done to help garbage collecting: opencontainers/runc#2506
-			paths = make(map[string]string)
-			return nil
+	for s, p := range paths {
+		if err := RemovePath(p); err == nil {
+			delete(paths, s)
 		}
 	}
+	if len(paths) == 0 {
+		//nolint:ineffassign,staticcheck // done to help garbage collecting: opencontainers/runc#2506
+		// TODO: switch to clear once Go < 1.21 is not supported.
+		paths = make(map[string]string)
+		return nil
+	}
 	return fmt.Errorf("Failed to remove paths: %v", paths)
 }

--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go
@ -99,11 +99,12 @@ func tryDefaultPath(cgroupPath, subsystem string) string {
 // expensive), so it is assumed that cgroup mounts are not being changed.
 func readCgroupMountinfo() ([]*mountinfo.Info, error) {
 	readMountinfoOnce.Do(func() {
+		// mountinfo.GetMounts uses /proc/thread-self, so we can use it without
+		// issues.
 		cgroupMountinfo, readMountinfoErr = mountinfo.GetMounts(
 			mountinfo.FSTypeFilter("cgroup"),
 		)
 	})
-
 	return cgroupMountinfo, readMountinfoErr
 }

@ -196,6 +197,9 @@ func getCgroupMountsV1(all bool) ([]Mount, error) {
 		return nil, err
 	}

+	// We don't need to use /proc/thread-self here because runc always runs
+	// with every thread in the same cgroup. This lets us avoid having to do
+	// runtime.LockOSThread.
 	allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
 	if err != nil {
 		return nil, err
@ -214,6 +218,10 @@ func GetOwnCgroup(subsystem string) (string, error) {
 	if IsCgroup2UnifiedMode() {
 		return "", errUnified
 	}
+
+	// We don't need to use /proc/thread-self here because runc always runs
+	// with every thread in the same cgroup. This lets us avoid having to do
+	// runtime.LockOSThread.
 	cgroups, err := ParseCgroupFile("/proc/self/cgroup")
 	if err != nil {
 		return "", err
@ -236,27 +244,6 @@ func GetOwnCgroupPath(subsystem string) (string, error) {
 	return getCgroupPathHelper(subsystem, cgroup)
 }

-func GetInitCgroup(subsystem string) (string, error) {
-	if IsCgroup2UnifiedMode() {
-		return "", errUnified
-	}
-	cgroups, err := ParseCgroupFile("/proc/1/cgroup")
-	if err != nil {
-		return "", err
-	}
-
-	return getControllerPath(subsystem, cgroups)
-}
-
-func GetInitCgroupPath(subsystem string) (string, error) {
-	cgroup, err := GetInitCgroup(subsystem)
-	if err != nil {
-		return "", err
-	}
-
-	return getCgroupPathHelper(subsystem, cgroup)
-}
-
 func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
 	mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
 	if err != nil {
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go
@ -2,8 +2,8 @@ package configs

 import "fmt"

-// blockIODevice holds major:minor format supported in blkio cgroup
-type blockIODevice struct {
+// BlockIODevice holds major:minor format supported in blkio cgroup.
+type BlockIODevice struct {
 	// Major is the device's major number
 	Major int64 `json:"major"`
 	// Minor is the device's minor number
@ -12,7 +12,7 @@ type blockIODevice struct {

 // WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair
 type WeightDevice struct {
-	blockIODevice
+	BlockIODevice
 	// Weight is the bandwidth rate for the device, range is from 10 to 1000
 	Weight uint16 `json:"weight"`
 	// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
@ -41,7 +41,7 @@ func (wd *WeightDevice) LeafWeightString() string {

 // ThrottleDevice struct holds a `major:minor rate_per_second` pair
 type ThrottleDevice struct {
-	blockIODevice
+	BlockIODevice
 	// Rate is the IO rate limit per cgroup per device
 	Rate uint64 `json:"rate"`
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
@ -69,6 +69,9 @@ type Resources struct {
 	// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
 	CpuQuota int64 `json:"cpu_quota"`

+	// CPU hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst in a given period.
+	CpuBurst *uint64 `json:"cpu_burst"` //nolint:revive
+
 	// CPU period to be used for hardcapping (in usecs). 0 to use system default.
 	CpuPeriod uint64 `json:"cpu_period"`

@ -84,6 +87,9 @@ type Resources struct {
 	// MEM to use
 	CpusetMems string `json:"cpuset_mems"`

+	// cgroup SCHED_IDLE
+	CPUIdle *int64 `json:"cpu_idle,omitempty"`
+
 	// Process limit; set <= `0' to disable limit.
 	PidsLimit int64 `json:"pids_limit"`

@ -155,4 +161,9 @@ type Resources struct {
 	// during Set() to figure out whether the freeze is required. Those
 	// methods may be relatively slow, thus this flag.
 	SkipFreezeOnSet bool `json:"-"`
+
+	// MemoryCheckBeforeUpdate is a flag for cgroup v2 managers to check
+	// if the new memory limits (Memory and MemorySwap) being set are lower
+	// than the current memory usage, and reject if so.
+	MemoryCheckBeforeUpdate bool `json:"memory_check_before_update"`
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
@ -8,6 +8,7 @@ import (
 	"time"

 	"github.com/sirupsen/logrus"
+	"golang.org/x/sys/unix"

 	"github.com/opencontainers/runc/libcontainer/devices"
 	"github.com/opencontainers/runtime-spec/specs-go"
@ -31,12 +32,13 @@ type IDMap struct {
 // for syscalls. Additional architectures can be added by specifying them in
 // Architectures.
 type Seccomp struct {
-	DefaultAction    Action     `json:"default_action"`
-	Architectures    []string   `json:"architectures"`
-	Syscalls         []*Syscall `json:"syscalls"`
-	DefaultErrnoRet  *uint      `json:"default_errno_ret"`
-	ListenerPath     string     `json:"listener_path,omitempty"`
-	ListenerMetadata string     `json:"listener_metadata,omitempty"`
+	DefaultAction    Action                   `json:"default_action"`
+	Architectures    []string                 `json:"architectures"`
+	Flags            []specs.LinuxSeccompFlag `json:"flags"`
+	Syscalls         []*Syscall               `json:"syscalls"`
+	DefaultErrnoRet  *uint                    `json:"default_errno_ret"`
+	ListenerPath     string                   `json:"listener_path,omitempty"`
+	ListenerMetadata string                   `json:"listener_metadata,omitempty"`
 }

 // Action is taken upon rule match in Seccomp
@ -83,9 +85,6 @@ type Syscall struct {
 	Args     []*Arg `json:"args"`
 }

-// TODO Windows. Many of these fields should be factored out into those parts
-// which are common across platforms, and those which are platform specific.
-
 // Config defines configuration options for executing a process inside a contained environment.
 type Config struct {
 	// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
@ -121,6 +120,9 @@ type Config struct {
 	// Hostname optionally sets the container's hostname if provided
 	Hostname string `json:"hostname"`

+	// Domainname optionally sets the container's domainname if provided
+	Domainname string `json:"domainname"`
+
 	// Namespaces specifies the container's namespaces that it should setup when cloning the init process
 	// If a namespace is not provided that namespace is shared from the container's parent process
 	Namespaces Namespaces `json:"namespaces"`
@ -158,11 +160,11 @@ type Config struct {
 	// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
 	OomScoreAdj *int `json:"oom_score_adj,omitempty"`

-	// UidMappings is an array of User ID mappings for User Namespaces
-	UidMappings []IDMap `json:"uid_mappings"`
+	// UIDMappings is an array of User ID mappings for User Namespaces
+	UIDMappings []IDMap `json:"uid_mappings"`

-	// GidMappings is an array of Group ID mappings for User Namespaces
-	GidMappings []IDMap `json:"gid_mappings"`
+	// GIDMappings is an array of Group ID mappings for User Namespaces
+	GIDMappings []IDMap `json:"gid_mappings"`

 	// MaskPaths specifies paths within the container's rootfs to mask over with a bind
 	// mount pointing to /dev/null as to prevent reads of the file.
@ -211,8 +213,87 @@ type Config struct {
 	// RootlessCgroups is set when unlikely to have the full access to cgroups.
 	// When RootlessCgroups is set, cgroups errors are ignored.
 	RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
+
+	// TimeOffsets specifies the offset for supporting time namespaces.
+	TimeOffsets map[string]specs.LinuxTimeOffset `json:"time_offsets,omitempty"`
+
+	// Scheduler represents the scheduling attributes for a process.
+	Scheduler *Scheduler `json:"scheduler,omitempty"`
+
+	// Personality contains configuration for the Linux personality syscall.
+	Personality *LinuxPersonality `json:"personality,omitempty"`
+
+	// IOPriority is the container's I/O priority.
+	IOPriority *IOPriority `json:"io_priority,omitempty"`
 }

+// Scheduler is based on the Linux sched_setattr(2) syscall.
+type Scheduler = specs.Scheduler
+
+// ToSchedAttr is to convert *configs.Scheduler to *unix.SchedAttr.
+func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) {
+	var policy uint32
+	switch scheduler.Policy {
+	case specs.SchedOther:
+		policy = 0
+	case specs.SchedFIFO:
+		policy = 1
+	case specs.SchedRR:
+		policy = 2
+	case specs.SchedBatch:
+		policy = 3
+	case specs.SchedISO:
+		policy = 4
+	case specs.SchedIdle:
+		policy = 5
+	case specs.SchedDeadline:
+		policy = 6
+	default:
+		return nil, fmt.Errorf("invalid scheduler policy: %s", scheduler.Policy)
+	}
+
+	var flags uint64
+	for _, flag := range scheduler.Flags {
+		switch flag {
+		case specs.SchedFlagResetOnFork:
+			flags |= 0x01
+		case specs.SchedFlagReclaim:
+			flags |= 0x02
+		case specs.SchedFlagDLOverrun:
+			flags |= 0x04
+		case specs.SchedFlagKeepPolicy:
+			flags |= 0x08
+		case specs.SchedFlagKeepParams:
+			flags |= 0x10
+		case specs.SchedFlagUtilClampMin:
+			flags |= 0x20
+		case specs.SchedFlagUtilClampMax:
+			flags |= 0x40
+		default:
+			return nil, fmt.Errorf("invalid scheduler flag: %s", flag)
+		}
+	}
+
+	return &unix.SchedAttr{
+		Size:     unix.SizeofSchedAttr,
+		Policy:   policy,
+		Flags:    flags,
+		Nice:     scheduler.Nice,
+		Priority: uint32(scheduler.Priority),
+		Runtime:  scheduler.Runtime,
+		Deadline: scheduler.Deadline,
+		Period:   scheduler.Period,
+	}, nil
+}
+
+var IOPrioClassMapping = map[specs.IOPriorityClass]int{
+	specs.IOPRIO_CLASS_RT:   1,
+	specs.IOPRIO_CLASS_BE:   2,
+	specs.IOPRIO_CLASS_IDLE: 3,
+}
+
+type IOPriority = specs.LinuxIOPriority
+
 type (
 	HookName string
 	HookList []Hook
@ -277,6 +358,7 @@ type Capabilities struct {
 	Ambient []string
 }

+// Deprecated: use (Hooks).Run instead.
 func (hooks HookList) RunHooks(state *specs.State) error {
 	for i, h := range hooks {
 		if err := h.Run(state); err != nil {
@ -333,6 +415,18 @@ func (hooks *Hooks) MarshalJSON() ([]byte, error) {
 	})
 }

+// Run executes all hooks for the given hook name.
+func (hooks Hooks) Run(name HookName, state *specs.State) error {
+	list := hooks[name]
+	for i, h := range list {
+		if err := h.Run(state); err != nil {
+			return fmt.Errorf("error running %s hook #%d: %w", name, i, err)
+		}
+	}
+
+	return nil
+}
+
 type Hook interface {
 	// Run executes the hook with the provided state.
 	Run(*specs.State) error
@ -393,7 +487,7 @@ func (c Command) Run(s *specs.State) error {
 	go func() {
 		err := cmd.Wait()
 		if err != nil {
-			err = fmt.Errorf("error running hook: %w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
+			err = fmt.Errorf("%w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
 		}
 		errC <- err
 	}()
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
@ -7,22 +7,33 @@ import (
 )

 var (
-	errNoUIDMap   = errors.New("User namespaces enabled, but no uid mappings found.")
-	errNoUserMap  = errors.New("User namespaces enabled, but no user mapping found.")
-	errNoGIDMap   = errors.New("User namespaces enabled, but no gid mappings found.")
-	errNoGroupMap = errors.New("User namespaces enabled, but no group mapping found.")
+	errNoUIDMap = errors.New("user namespaces enabled, but no uid mappings found")
+	errNoGIDMap = errors.New("user namespaces enabled, but no gid mappings found")
 )

+// Please check https://man7.org/linux/man-pages/man2/personality.2.html for const details.
+// https://raw.githubusercontent.com/torvalds/linux/master/include/uapi/linux/personality.h
+const (
+	PerLinux   = 0x0000
+	PerLinux32 = 0x0008
+)
+
+type LinuxPersonality struct {
+	// Domain for the personality
+	// can only contain values "LINUX" and "LINUX32"
+	Domain int `json:"domain"`
+}
+
 // HostUID gets the translated uid for the process on host which could be
 // different when user namespaces are enabled.
 func (c Config) HostUID(containerId int) (int, error) {
 	if c.Namespaces.Contains(NEWUSER) {
-		if c.UidMappings == nil {
+		if len(c.UIDMappings) == 0 {
 			return -1, errNoUIDMap
 		}
-		id, found := c.hostIDFromMapping(int64(containerId), c.UidMappings)
+		id, found := c.hostIDFromMapping(int64(containerId), c.UIDMappings)
 		if !found {
-			return -1, errNoUserMap
+			return -1, fmt.Errorf("user namespaces enabled, but no mapping found for uid %d", containerId)
 		}
 		// If we are a 32-bit binary running on a 64-bit system, it's possible
 		// the mapped user is too large to store in an int, which means we
@ -47,12 +58,12 @@ func (c Config) HostRootUID() (int, error) {
 // different when user namespaces are enabled.
 func (c Config) HostGID(containerId int) (int, error) {
 	if c.Namespaces.Contains(NEWUSER) {
-		if c.GidMappings == nil {
+		if len(c.GIDMappings) == 0 {
 			return -1, errNoGIDMap
 		}
-		id, found := c.hostIDFromMapping(int64(containerId), c.GidMappings)
+		id, found := c.hostIDFromMapping(int64(containerId), c.GIDMappings)
 		if !found {
-			return -1, errNoGroupMap
+			return -1, fmt.Errorf("user namespaces enabled, but no mapping found for gid %d", containerId)
 		}
 		// If we are a 32-bit binary running on a 64-bit system, it's possible
 		// the mapped user is too large to store in an int, which means we
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
@ -1,48 +1,7 @@
 package configs

-import "golang.org/x/sys/unix"
-
 const (
 	// EXT_COPYUP is a directive to copy up the contents of a directory when
 	// a tmpfs is mounted over it.
-	EXT_COPYUP = 1 << iota //nolint:golint // ignore "don't use ALL_CAPS" warning
+	EXT_COPYUP = 1 << iota //nolint:golint,revive // ignore "don't use ALL_CAPS" warning
 )
-
-type Mount struct {
-	// Source path for the mount.
-	Source string `json:"source"`
-
-	// Destination path for the mount inside the container.
-	Destination string `json:"destination"`
-
-	// Device the mount is for.
-	Device string `json:"device"`
-
-	// Mount flags.
-	Flags int `json:"flags"`
-
-	// Propagation Flags
-	PropagationFlags []int `json:"propagation_flags"`
-
-	// Mount data applied to the mount.
-	Data string `json:"data"`
-
-	// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
-	Relabel string `json:"relabel"`
-
-	// RecAttr represents mount properties to be applied recursively (AT_RECURSIVE), see mount_setattr(2).
-	RecAttr *unix.MountAttr `json:"rec_attr"`
-
-	// Extensions are additional flags that are specific to runc.
-	Extensions int `json:"extensions"`
-
-	// Optional Command to be run before Source is mounted.
-	PremountCmds []Command `json:"premount_cmds"`
-
-	// Optional Command to be run after Source is mounted.
-	PostmountCmds []Command `json:"postmount_cmds"`
-}
-
-func (m *Mount) IsBind() bool {
-	return m.Flags&unix.MS_BIND != 0
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_linux.go
@ -0,0 +1,66 @@
+package configs
+
+import "golang.org/x/sys/unix"
+
+type MountIDMapping struct {
+	// Recursive indicates if the mapping needs to be recursive.
+	Recursive bool `json:"recursive"`
+
+	// UserNSPath is a path to a user namespace that indicates the necessary
+	// id-mappings for MOUNT_ATTR_IDMAP. If set to non-"", UIDMappings and
+	// GIDMappings must be set to nil.
+	UserNSPath string `json:"userns_path,omitempty"`
+
+	// UIDMappings is the uid mapping set for this mount, to be used with
+	// MOUNT_ATTR_IDMAP.
+	UIDMappings []IDMap `json:"uid_mappings,omitempty"`
+
+	// GIDMappings is the gid mapping set for this mount, to be used with
+	// MOUNT_ATTR_IDMAP.
+	GIDMappings []IDMap `json:"gid_mappings,omitempty"`
+}
+
+type Mount struct {
+	// Source path for the mount.
+	Source string `json:"source"`
+
+	// Destination path for the mount inside the container.
+	Destination string `json:"destination"`
+
+	// Device the mount is for.
+	Device string `json:"device"`
+
+	// Mount flags.
+	Flags int `json:"flags"`
+
+	// Mount flags that were explicitly cleared in the configuration (meaning
+	// the user explicitly requested that these flags *not* be set).
+	ClearedFlags int `json:"cleared_flags"`
+
+	// Propagation Flags
+	PropagationFlags []int `json:"propagation_flags"`
+
+	// Mount data applied to the mount.
+	Data string `json:"data"`
+
+	// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
+	Relabel string `json:"relabel"`
+
+	// RecAttr represents mount properties to be applied recursively (AT_RECURSIVE), see mount_setattr(2).
+	RecAttr *unix.MountAttr `json:"rec_attr"`
+
+	// Extensions are additional flags that are specific to runc.
+	Extensions int `json:"extensions"`
+
+	// Mapping is the MOUNT_ATTR_IDMAP configuration for the mount. If non-nil,
+	// the mount is configured to use MOUNT_ATTR_IDMAP-style id mappings.
+	IDMapping *MountIDMapping `json:"id_mapping,omitempty"`
+}
+
+func (m *Mount) IsBind() bool {
+	return m.Flags&unix.MS_BIND != 0
+}
+
+func (m *Mount) IsIDMapped() bool {
+	return m.IDMapping != nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_unsupported.go
@ -0,0 +1,10 @@
+//go:build !linux
+// +build !linux
+
+package configs
+
+type Mount struct{}
+
+func (m *Mount) IsBind() bool {
+	return false
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
@ -14,6 +14,7 @@ const (
 	NEWIPC    NamespaceType = "NEWIPC"
 	NEWUSER   NamespaceType = "NEWUSER"
 	NEWCGROUP NamespaceType = "NEWCGROUP"
+	NEWTIME   NamespaceType = "NEWTIME"
 )

 var (
@ -38,6 +39,8 @@ func NsName(ns NamespaceType) string {
 		return "uts"
 	case NEWCGROUP:
 		return "cgroup"
+	case NEWTIME:
+		return "time"
 	}
 	return ""
 }
@ -56,6 +59,9 @@ func IsNamespaceSupported(ns NamespaceType) bool {
 	if nsFile == "" {
 		return false
 	}
+	// We don't need to use /proc/thread-self here because the list of
+	// namespace types is unrelated to the thread. This lets us avoid having to
+	// do runtime.LockOSThread.
 	_, err := os.Stat("/proc/self/ns/" + nsFile)
 	// a namespace is supported if it exists and we have permissions to read it
 	supported = err == nil
@ -72,6 +78,7 @@ func NamespaceTypes() []NamespaceType {
 		NEWPID,
 		NEWNS,
 		NEWCGROUP,
+		NEWTIME,
 	}
 }

--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
@ -17,6 +17,7 @@ var namespaceInfo = map[NamespaceType]int{
 	NEWUTS:    unix.CLONE_NEWUTS,
 	NEWPID:    unix.CLONE_NEWPID,
 	NEWCGROUP: unix.CLONE_NEWCGROUP,
+	NEWTIME:   unix.CLONE_NEWTIME,
 }

 // CloneFlags parses the container's Namespaces options to set the correct
@ -31,3 +32,15 @@ func (n *Namespaces) CloneFlags() uintptr {
 	}
 	return uintptr(flag)
 }
+
+// IsPrivate tells whether the namespace of type t is configured as private
+// (i.e. it exists and is not shared).
+func (n Namespaces) IsPrivate(t NamespaceType) bool {
+	for _, v := range n {
+		if v.Type == t {
+			return v.Path == ""
+		}
+	}
+	// Not found, so implicitly sharing a parent namespace.
+	return false
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go
@ -1,157 +0,0 @@
-//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris
-// +build darwin dragonfly freebsd linux netbsd openbsd solaris
-
-package user
-
-import (
-	"io"
-	"os"
-	"strconv"
-
-	"golang.org/x/sys/unix"
-)
-
-// Unix-specific path to the passwd and group formatted files.
-const (
-	unixPasswdPath = "/etc/passwd"
-	unixGroupPath  = "/etc/group"
-)
-
-// LookupUser looks up a user by their username in /etc/passwd. If the user
-// cannot be found (or there is no /etc/passwd file on the filesystem), then
-// LookupUser returns an error.
-func LookupUser(username string) (User, error) {
-	return lookupUserFunc(func(u User) bool {
-		return u.Name == username
-	})
-}
-
-// LookupUid looks up a user by their user id in /etc/passwd. If the user cannot
-// be found (or there is no /etc/passwd file on the filesystem), then LookupId
-// returns an error.
-func LookupUid(uid int) (User, error) {
-	return lookupUserFunc(func(u User) bool {
-		return u.Uid == uid
-	})
-}
-
-func lookupUserFunc(filter func(u User) bool) (User, error) {
-	// Get operating system-specific passwd reader-closer.
-	passwd, err := GetPasswd()
-	if err != nil {
-		return User{}, err
-	}
-	defer passwd.Close()
-
-	// Get the users.
-	users, err := ParsePasswdFilter(passwd, filter)
-	if err != nil {
-		return User{}, err
-	}
-
-	// No user entries found.
-	if len(users) == 0 {
-		return User{}, ErrNoPasswdEntries
-	}
-
-	// Assume the first entry is the "correct" one.
-	return users[0], nil
-}
-
-// LookupGroup looks up a group by its name in /etc/group. If the group cannot
-// be found (or there is no /etc/group file on the filesystem), then LookupGroup
-// returns an error.
-func LookupGroup(groupname string) (Group, error) {
-	return lookupGroupFunc(func(g Group) bool {
-		return g.Name == groupname
-	})
-}
-
-// LookupGid looks up a group by its group id in /etc/group. If the group cannot
-// be found (or there is no /etc/group file on the filesystem), then LookupGid
-// returns an error.
-func LookupGid(gid int) (Group, error) {
-	return lookupGroupFunc(func(g Group) bool {
-		return g.Gid == gid
-	})
-}
-
-func lookupGroupFunc(filter func(g Group) bool) (Group, error) {
-	// Get operating system-specific group reader-closer.
-	group, err := GetGroup()
-	if err != nil {
-		return Group{}, err
-	}
-	defer group.Close()
-
-	// Get the users.
-	groups, err := ParseGroupFilter(group, filter)
-	if err != nil {
-		return Group{}, err
-	}
-
-	// No user entries found.
-	if len(groups) == 0 {
-		return Group{}, ErrNoGroupEntries
-	}
-
-	// Assume the first entry is the "correct" one.
-	return groups[0], nil
-}
-
-func GetPasswdPath() (string, error) {
-	return unixPasswdPath, nil
-}
-
-func GetPasswd() (io.ReadCloser, error) {
-	return os.Open(unixPasswdPath)
-}
-
-func GetGroupPath() (string, error) {
-	return unixGroupPath, nil
-}
-
-func GetGroup() (io.ReadCloser, error) {
-	return os.Open(unixGroupPath)
-}
-
-// CurrentUser looks up the current user by their user id in /etc/passwd. If the
-// user cannot be found (or there is no /etc/passwd file on the filesystem),
-// then CurrentUser returns an error.
-func CurrentUser() (User, error) {
-	return LookupUid(unix.Getuid())
-}
-
-// CurrentGroup looks up the current user's group by their primary group id's
-// entry in /etc/passwd. If the group cannot be found (or there is no
-// /etc/group file on the filesystem), then CurrentGroup returns an error.
-func CurrentGroup() (Group, error) {
-	return LookupGid(unix.Getgid())
-}
-
-func currentUserSubIDs(fileName string) ([]SubID, error) {
-	u, err := CurrentUser()
-	if err != nil {
-		return nil, err
-	}
-	filter := func(entry SubID) bool {
-		return entry.Name == u.Name || entry.Name == strconv.Itoa(u.Uid)
-	}
-	return ParseSubIDFileFilter(fileName, filter)
-}
-
-func CurrentUserSubUIDs() ([]SubID, error) {
-	return currentUserSubIDs("/etc/subuid")
-}
-
-func CurrentUserSubGIDs() ([]SubID, error) {
-	return currentUserSubIDs("/etc/subgid")
-}
-
-func CurrentProcessUIDMap() ([]IDMap, error) {
-	return ParseIDMapFile("/proc/self/uid_map")
-}
-
-func CurrentProcessGIDMap() ([]IDMap, error) {
-	return ParseIDMapFile("/proc/self/gid_map")
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/user/user.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/user/user.go
@ -1,605 +0,0 @@
-package user
-
-import (
-	"bufio"
-	"bytes"
-	"errors"
-	"fmt"
-	"io"
-	"os"
-	"strconv"
-	"strings"
-)
-
-const (
-	minID = 0
-	maxID = 1<<31 - 1 // for 32-bit systems compatibility
-)
-
-var (
-	// ErrNoPasswdEntries is returned if no matching entries were found in /etc/group.
-	ErrNoPasswdEntries = errors.New("no matching entries in passwd file")
-	// ErrNoGroupEntries is returned if no matching entries were found in /etc/passwd.
-	ErrNoGroupEntries = errors.New("no matching entries in group file")
-	// ErrRange is returned if a UID or GID is outside of the valid range.
-	ErrRange = fmt.Errorf("uids and gids must be in range %d-%d", minID, maxID)
-)
-
-type User struct {
-	Name  string
-	Pass  string
-	Uid   int
-	Gid   int
-	Gecos string
-	Home  string
-	Shell string
-}
-
-type Group struct {
-	Name string
-	Pass string
-	Gid  int
-	List []string
-}
-
-// SubID represents an entry in /etc/sub{u,g}id
-type SubID struct {
-	Name  string
-	SubID int64
-	Count int64
-}
-
-// IDMap represents an entry in /proc/PID/{u,g}id_map
-type IDMap struct {
-	ID       int64
-	ParentID int64
-	Count    int64
-}
-
-func parseLine(line []byte, v ...interface{}) {
-	parseParts(bytes.Split(line, []byte(":")), v...)
-}
-
-func parseParts(parts [][]byte, v ...interface{}) {
-	if len(parts) == 0 {
-		return
-	}
-
-	for i, p := range parts {
-		// Ignore cases where we don't have enough fields to populate the arguments.
-		// Some configuration files like to misbehave.
-		if len(v) <= i {
-			break
-		}
-
-		// Use the type of the argument to figure out how to parse it, scanf() style.
-		// This is legit.
-		switch e := v[i].(type) {
-		case *string:
-			*e = string(p)
-		case *int:
-			// "numbers", with conversion errors ignored because of some misbehaving configuration files.
-			*e, _ = strconv.Atoi(string(p))
-		case *int64:
-			*e, _ = strconv.ParseInt(string(p), 10, 64)
-		case *[]string:
-			// Comma-separated lists.
-			if len(p) != 0 {
-				*e = strings.Split(string(p), ",")
-			} else {
-				*e = []string{}
-			}
-		default:
-			// Someone goof'd when writing code using this function. Scream so they can hear us.
-			panic(fmt.Sprintf("parseLine only accepts {*string, *int, *int64, *[]string} as arguments! %#v is not a pointer!", e))
-		}
-	}
-}
-
-func ParsePasswdFile(path string) ([]User, error) {
-	passwd, err := os.Open(path)
-	if err != nil {
-		return nil, err
-	}
-	defer passwd.Close()
-	return ParsePasswd(passwd)
-}
-
-func ParsePasswd(passwd io.Reader) ([]User, error) {
-	return ParsePasswdFilter(passwd, nil)
-}
-
-func ParsePasswdFileFilter(path string, filter func(User) bool) ([]User, error) {
-	passwd, err := os.Open(path)
-	if err != nil {
-		return nil, err
-	}
-	defer passwd.Close()
-	return ParsePasswdFilter(passwd, filter)
-}
-
-func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
-	if r == nil {
-		return nil, errors.New("nil source for passwd-formatted data")
-	}
-
-	var (
-		s   = bufio.NewScanner(r)
-		out = []User{}
-	)
-
-	for s.Scan() {
-		line := bytes.TrimSpace(s.Bytes())
-		if len(line) == 0 {
-			continue
-		}
-
-		// see: man 5 passwd
-		//  name:password:UID:GID:GECOS:directory:shell
-		// Name:Pass:Uid:Gid:Gecos:Home:Shell
-		//  root:x:0:0:root:/root:/bin/bash
-		//  adm:x:3:4:adm:/var/adm:/bin/false
-		p := User{}
-		parseLine(line, &p.Name, &p.Pass, &p.Uid, &p.Gid, &p.Gecos, &p.Home, &p.Shell)
-
-		if filter == nil || filter(p) {
-			out = append(out, p)
-		}
-	}
-	if err := s.Err(); err != nil {
-		return nil, err
-	}
-
-	return out, nil
-}
-
-func ParseGroupFile(path string) ([]Group, error) {
-	group, err := os.Open(path)
-	if err != nil {
-		return nil, err
-	}
-
-	defer group.Close()
-	return ParseGroup(group)
-}
-
-func ParseGroup(group io.Reader) ([]Group, error) {
-	return ParseGroupFilter(group, nil)
-}
-
-func ParseGroupFileFilter(path string, filter func(Group) bool) ([]Group, error) {
-	group, err := os.Open(path)
-	if err != nil {
-		return nil, err
-	}
-	defer group.Close()
-	return ParseGroupFilter(group, filter)
-}
-
-func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
-	if r == nil {
-		return nil, errors.New("nil source for group-formatted data")
-	}
-	rd := bufio.NewReader(r)
-	out := []Group{}
-
-	// Read the file line-by-line.
-	for {
-		var (
-			isPrefix  bool
-			wholeLine []byte
-			err       error
-		)
-
-		// Read the next line. We do so in chunks (as much as reader's
-		// buffer is able to keep), check if we read enough columns
-		// already on each step and store final result in wholeLine.
-		for {
-			var line []byte
-			line, isPrefix, err = rd.ReadLine()
-
-			if err != nil {
-				// We should return no error if EOF is reached
-				// without a match.
-				if err == io.EOF {
-					err = nil
-				}
-				return out, err
-			}
-
-			// Simple common case: line is short enough to fit in a
-			// single reader's buffer.
-			if !isPrefix && len(wholeLine) == 0 {
-				wholeLine = line
-				break
-			}
-
-			wholeLine = append(wholeLine, line...)
-
-			// Check if we read the whole line already.
-			if !isPrefix {
-				break
-			}
-		}
-
-		// There's no spec for /etc/passwd or /etc/group, but we try to follow
-		// the same rules as the glibc parser, which allows comments and blank
-		// space at the beginning of a line.
-		wholeLine = bytes.TrimSpace(wholeLine)
-		if len(wholeLine) == 0 || wholeLine[0] == '#' {
-			continue
-		}
-
-		// see: man 5 group
-		//  group_name:password:GID:user_list
-		// Name:Pass:Gid:List
-		//  root:x:0:root
-		//  adm:x:4:root,adm,daemon
-		p := Group{}
-		parseLine(wholeLine, &p.Name, &p.Pass, &p.Gid, &p.List)
-
-		if filter == nil || filter(p) {
-			out = append(out, p)
-		}
-	}
-}
-
-type ExecUser struct {
-	Uid   int
-	Gid   int
-	Sgids []int
-	Home  string
-}
-
-// GetExecUserPath is a wrapper for GetExecUser. It reads data from each of the
-// given file paths and uses that data as the arguments to GetExecUser. If the
-// files cannot be opened for any reason, the error is ignored and a nil
-// io.Reader is passed instead.
-func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) {
-	var passwd, group io.Reader
-
-	if passwdFile, err := os.Open(passwdPath); err == nil {
-		passwd = passwdFile
-		defer passwdFile.Close()
-	}
-
-	if groupFile, err := os.Open(groupPath); err == nil {
-		group = groupFile
-		defer groupFile.Close()
-	}
-
-	return GetExecUser(userSpec, defaults, passwd, group)
-}
-
-// GetExecUser parses a user specification string (using the passwd and group
-// readers as sources for /etc/passwd and /etc/group data, respectively). In
-// the case of blank fields or missing data from the sources, the values in
-// defaults is used.
-//
-// GetExecUser will return an error if a user or group literal could not be
-// found in any entry in passwd and group respectively.
-//
-// Examples of valid user specifications are:
-//   - ""
-//   - "user"
-//   - "uid"
-//   - "user:group"
-//   - "uid:gid
-//   - "user:gid"
-//   - "uid:group"
-//
-// It should be noted that if you specify a numeric user or group id, they will
-// not be evaluated as usernames (only the metadata will be filled). So attempting
-// to parse a user with user.Name = "1337" will produce the user with a UID of
-// 1337.
-func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (*ExecUser, error) {
-	if defaults == nil {
-		defaults = new(ExecUser)
-	}
-
-	// Copy over defaults.
-	user := &ExecUser{
-		Uid:   defaults.Uid,
-		Gid:   defaults.Gid,
-		Sgids: defaults.Sgids,
-		Home:  defaults.Home,
-	}
-
-	// Sgids slice *cannot* be nil.
-	if user.Sgids == nil {
-		user.Sgids = []int{}
-	}
-
-	// Allow for userArg to have either "user" syntax, or optionally "user:group" syntax
-	var userArg, groupArg string
-	parseLine([]byte(userSpec), &userArg, &groupArg)
-
-	// Convert userArg and groupArg to be numeric, so we don't have to execute
-	// Atoi *twice* for each iteration over lines.
-	uidArg, uidErr := strconv.Atoi(userArg)
-	gidArg, gidErr := strconv.Atoi(groupArg)
-
-	// Find the matching user.
-	users, err := ParsePasswdFilter(passwd, func(u User) bool {
-		if userArg == "" {
-			// Default to current state of the user.
-			return u.Uid == user.Uid
-		}
-
-		if uidErr == nil {
-			// If the userArg is numeric, always treat it as a UID.
-			return uidArg == u.Uid
-		}
-
-		return u.Name == userArg
-	})
-
-	// If we can't find the user, we have to bail.
-	if err != nil && passwd != nil {
-		if userArg == "" {
-			userArg = strconv.Itoa(user.Uid)
-		}
-		return nil, fmt.Errorf("unable to find user %s: %w", userArg, err)
-	}
-
-	var matchedUserName string
-	if len(users) > 0 {
-		// First match wins, even if there's more than one matching entry.
-		matchedUserName = users[0].Name
-		user.Uid = users[0].Uid
-		user.Gid = users[0].Gid
-		user.Home = users[0].Home
-	} else if userArg != "" {
-		// If we can't find a user with the given username, the only other valid
-		// option is if it's a numeric username with no associated entry in passwd.
-
-		if uidErr != nil {
-			// Not numeric.
-			return nil, fmt.Errorf("unable to find user %s: %w", userArg, ErrNoPasswdEntries)
-		}
-		user.Uid = uidArg
-
-		// Must be inside valid uid range.
-		if user.Uid < minID || user.Uid > maxID {
-			return nil, ErrRange
-		}
-
-		// Okay, so it's numeric. We can just roll with this.
-	}
-
-	// On to the groups. If we matched a username, we need to do this because of
-	// the supplementary group IDs.
-	if groupArg != "" || matchedUserName != "" {
-		groups, err := ParseGroupFilter(group, func(g Group) bool {
-			// If the group argument isn't explicit, we'll just search for it.
-			if groupArg == "" {
-				// Check if user is a member of this group.
-				for _, u := range g.List {
-					if u == matchedUserName {
-						return true
-					}
-				}
-				return false
-			}
-
-			if gidErr == nil {
-				// If the groupArg is numeric, always treat it as a GID.
-				return gidArg == g.Gid
-			}
-
-			return g.Name == groupArg
-		})
-		if err != nil && group != nil {
-			return nil, fmt.Errorf("unable to find groups for spec %v: %w", matchedUserName, err)
-		}
-
-		// Only start modifying user.Gid if it is in explicit form.
-		if groupArg != "" {
-			if len(groups) > 0 {
-				// First match wins, even if there's more than one matching entry.
-				user.Gid = groups[0].Gid
-			} else {
-				// If we can't find a group with the given name, the only other valid
-				// option is if it's a numeric group name with no associated entry in group.
-
-				if gidErr != nil {
-					// Not numeric.
-					return nil, fmt.Errorf("unable to find group %s: %w", groupArg, ErrNoGroupEntries)
-				}
-				user.Gid = gidArg
-
-				// Must be inside valid gid range.
-				if user.Gid < minID || user.Gid > maxID {
-					return nil, ErrRange
-				}
-
-				// Okay, so it's numeric. We can just roll with this.
-			}
-		} else if len(groups) > 0 {
-			// Supplementary group ids only make sense if in the implicit form.
-			user.Sgids = make([]int, len(groups))
-			for i, group := range groups {
-				user.Sgids[i] = group.Gid
-			}
-		}
-	}
-
-	return user, nil
-}
-
-// GetAdditionalGroups looks up a list of groups by name or group id
-// against the given /etc/group formatted data. If a group name cannot
-// be found, an error will be returned. If a group id cannot be found,
-// or the given group data is nil, the id will be returned as-is
-// provided it is in the legal range.
-func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, error) {
-	groups := []Group{}
-	if group != nil {
-		var err error
-		groups, err = ParseGroupFilter(group, func(g Group) bool {
-			for _, ag := range additionalGroups {
-				if g.Name == ag || strconv.Itoa(g.Gid) == ag {
-					return true
-				}
-			}
-			return false
-		})
-		if err != nil {
-			return nil, fmt.Errorf("Unable to find additional groups %v: %w", additionalGroups, err)
-		}
-	}
-
-	gidMap := make(map[int]struct{})
-	for _, ag := range additionalGroups {
-		var found bool
-		for _, g := range groups {
-			// if we found a matched group either by name or gid, take the
-			// first matched as correct
-			if g.Name == ag || strconv.Itoa(g.Gid) == ag {
-				if _, ok := gidMap[g.Gid]; !ok {
-					gidMap[g.Gid] = struct{}{}
-					found = true
-					break
-				}
-			}
-		}
-		// we asked for a group but didn't find it. let's check to see
-		// if we wanted a numeric group
-		if !found {
-			gid, err := strconv.ParseInt(ag, 10, 64)
-			if err != nil {
-				// Not a numeric ID either.
-				return nil, fmt.Errorf("Unable to find group %s: %w", ag, ErrNoGroupEntries)
-			}
-			// Ensure gid is inside gid range.
-			if gid < minID || gid > maxID {
-				return nil, ErrRange
-			}
-			gidMap[int(gid)] = struct{}{}
-		}
-	}
-	gids := []int{}
-	for gid := range gidMap {
-		gids = append(gids, gid)
-	}
-	return gids, nil
-}
-
-// GetAdditionalGroupsPath is a wrapper around GetAdditionalGroups
-// that opens the groupPath given and gives it as an argument to
-// GetAdditionalGroups.
-func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) {
-	var group io.Reader
-
-	if groupFile, err := os.Open(groupPath); err == nil {
-		group = groupFile
-		defer groupFile.Close()
-	}
-	return GetAdditionalGroups(additionalGroups, group)
-}
-
-func ParseSubIDFile(path string) ([]SubID, error) {
-	subid, err := os.Open(path)
-	if err != nil {
-		return nil, err
-	}
-	defer subid.Close()
-	return ParseSubID(subid)
-}
-
-func ParseSubID(subid io.Reader) ([]SubID, error) {
-	return ParseSubIDFilter(subid, nil)
-}
-
-func ParseSubIDFileFilter(path string, filter func(SubID) bool) ([]SubID, error) {
-	subid, err := os.Open(path)
-	if err != nil {
-		return nil, err
-	}
-	defer subid.Close()
-	return ParseSubIDFilter(subid, filter)
-}
-
-func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) {
-	if r == nil {
-		return nil, errors.New("nil source for subid-formatted data")
-	}
-
-	var (
-		s   = bufio.NewScanner(r)
-		out = []SubID{}
-	)
-
-	for s.Scan() {
-		line := bytes.TrimSpace(s.Bytes())
-		if len(line) == 0 {
-			continue
-		}
-
-		// see: man 5 subuid
-		p := SubID{}
-		parseLine(line, &p.Name, &p.SubID, &p.Count)
-
-		if filter == nil || filter(p) {
-			out = append(out, p)
-		}
-	}
-	if err := s.Err(); err != nil {
-		return nil, err
-	}
-
-	return out, nil
-}
-
-func ParseIDMapFile(path string) ([]IDMap, error) {
-	r, err := os.Open(path)
-	if err != nil {
-		return nil, err
-	}
-	defer r.Close()
-	return ParseIDMap(r)
-}
-
-func ParseIDMap(r io.Reader) ([]IDMap, error) {
-	return ParseIDMapFilter(r, nil)
-}
-
-func ParseIDMapFileFilter(path string, filter func(IDMap) bool) ([]IDMap, error) {
-	r, err := os.Open(path)
-	if err != nil {
-		return nil, err
-	}
-	defer r.Close()
-	return ParseIDMapFilter(r, filter)
-}
-
-func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) {
-	if r == nil {
-		return nil, errors.New("nil source for idmap-formatted data")
-	}
-
-	var (
-		s   = bufio.NewScanner(r)
-		out = []IDMap{}
-	)
-
-	for s.Scan() {
-		line := bytes.TrimSpace(s.Bytes())
-		if len(line) == 0 {
-			continue
-		}
-
-		// see: man 7 user_namespaces
-		p := IDMap{}
-		parseParts(bytes.Fields(line), &p.ID, &p.ParentID, &p.Count)
-
-		if filter == nil || filter(p) {
-			out = append(out, p)
-		}
-	}
-	if err := s.Err(); err != nil {
-		return nil, err
-	}
-
-	return out, nil
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go
@ -1,43 +0,0 @@
-//go:build gofuzz
-// +build gofuzz
-
-package user
-
-import (
-	"io"
-	"strings"
-)
-
-func IsDivisbleBy(n int, divisibleby int) bool {
-	return (n % divisibleby) == 0
-}
-
-func FuzzUser(data []byte) int {
-	if len(data) == 0 {
-		return -1
-	}
-	if !IsDivisbleBy(len(data), 5) {
-		return -1
-	}
-
-	var divided [][]byte
-
-	chunkSize := len(data) / 5
-
-	for i := 0; i < len(data); i += chunkSize {
-		end := i + chunkSize
-
-		divided = append(divided, data[i:end])
-	}
-
-	_, _ = ParsePasswdFilter(strings.NewReader(string(divided[0])), nil)
-
-	var passwd, group io.Reader
-
-	group = strings.NewReader(string(divided[1]))
-	_, _ = GetAdditionalGroups([]string{string(divided[2])}, group)
-
-	passwd = strings.NewReader(string(divided[3]))
-	_, _ = GetExecUser(string(divided[4]), nil, passwd, group)
-	return 1
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns.go
@ -1,5 +1,4 @@
 package userns

 // RunningInUserNS detects whether we are currently running in a user namespace.
-// Originally copied from github.com/lxc/lxd/shared/util.go
 var RunningInUserNS = runningInUserNS
--- a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go
@ -3,14 +3,7 @@

 package userns

-import (
-	"strings"
-
-	"github.com/opencontainers/runc/libcontainer/user"
-)
-
-func FuzzUIDMap(data []byte) int {
-	uidmap, _ := user.ParseIDMap(strings.NewReader(string(data)))
-	_ = uidMapInUserNS(uidmap)
+func FuzzUIDMap(uidmap []byte) int {
+	_ = uidMapInUserNS(string(uidmap))
 	return 1
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux.go
@ -1,9 +1,10 @@
 package userns

 import (
+	"bufio"
+	"fmt"
+	"os"
 	"sync"
-
-	"github.com/opencontainers/runc/libcontainer/user"
 )

 var (
@ -12,26 +13,43 @@ var (
 )

 // runningInUserNS detects whether we are currently running in a user namespace.
-// Originally copied from github.com/lxc/lxd/shared/util.go
+//
+// Originally copied from https://github.com/lxc/incus/blob/e45085dd42f826b3c8c3228e9733c0b6f998eafe/shared/util.go#L678-L700.
 func runningInUserNS() bool {
 	nsOnce.Do(func() {
-		uidmap, err := user.CurrentProcessUIDMap()
+		file, err := os.Open("/proc/self/uid_map")
 		if err != nil {
-			// This kernel-provided file only exists if user namespaces are supported
+			// This kernel-provided file only exists if user namespaces are supported.
 			return
 		}
-		inUserNS = uidMapInUserNS(uidmap)
+		defer file.Close()
+
+		buf := bufio.NewReader(file)
+		l, _, err := buf.ReadLine()
+		if err != nil {
+			return
+		}
+
+		inUserNS = uidMapInUserNS(string(l))
 	})
 	return inUserNS
 }

-func uidMapInUserNS(uidmap []user.IDMap) bool {
-	/*
-	 * We assume we are in the initial user namespace if we have a full
-	 * range - 4294967295 uids starting at uid 0.
-	 */
-	if len(uidmap) == 1 && uidmap[0].ID == 0 && uidmap[0].ParentID == 0 && uidmap[0].Count == 4294967295 {
+func uidMapInUserNS(uidMap string) bool {
+	if uidMap == "" {
+		// File exist but empty (the initial state when userns is created,
+		// see user_namespaces(7)).
+		return true
+	}
+
+	var a, b, c int64
+	if _, err := fmt.Sscanf(uidMap, "%d %d %d", &a, &b, &c); err != nil {
+		// Assume we are in a regular, non user namespace.
 		return false
 	}
-	return true
+
+	// As per user_namespaces(7), /proc/self/uid_map of
+	// the initial user namespace shows 0 0 4294967295.
+	initNS := a == 0 && b == 0 && c == 4294967295
+	return !initNS
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go
@ -3,8 +3,6 @@

 package userns

-import "github.com/opencontainers/runc/libcontainer/user"
-
 // runningInUserNS is a stub for non-Linux systems
 // Always returns false
 func runningInUserNS() bool {
@ -13,6 +11,6 @@ func runningInUserNS() bool {

 // uidMapInUserNS is a stub for non-Linux systems
 // Always returns false
-func uidMapInUserNS(uidmap []user.IDMap) bool {
+func uidMapInUserNS(uidMap string) bool {
 	return false
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/userns/usernsfd_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/userns/usernsfd_linux.go
@ -0,0 +1,156 @@
+package userns
+
+import (
+	"fmt"
+	"os"
+	"sort"
+	"strings"
+	"sync"
+	"syscall"
+
+	"github.com/sirupsen/logrus"
+	"golang.org/x/sys/unix"
+
+	"github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type Mapping struct {
+	UIDMappings []configs.IDMap
+	GIDMappings []configs.IDMap
+}
+
+func (m Mapping) toSys() (uids, gids []syscall.SysProcIDMap) {
+	for _, uid := range m.UIDMappings {
+		uids = append(uids, syscall.SysProcIDMap{
+			ContainerID: int(uid.ContainerID),
+			HostID:      int(uid.HostID),
+			Size:        int(uid.Size),
+		})
+	}
+	for _, gid := range m.GIDMappings {
+		gids = append(gids, syscall.SysProcIDMap{
+			ContainerID: int(gid.ContainerID),
+			HostID:      int(gid.HostID),
+			Size:        int(gid.Size),
+		})
+	}
+	return
+}
+
+// id returns a unique identifier for this mapping, agnostic of the order of
+// the uid and gid mappings (because the order doesn't matter to the kernel).
+// The set of userns handles is indexed using this ID.
+func (m Mapping) id() string {
+	var uids, gids []string
+	for _, idmap := range m.UIDMappings {
+		uids = append(uids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size))
+	}
+	for _, idmap := range m.GIDMappings {
+		gids = append(gids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size))
+	}
+	// We don't care about the sort order -- just sort them.
+	sort.Strings(uids)
+	sort.Strings(gids)
+	return "uid=" + strings.Join(uids, ",") + ";gid=" + strings.Join(gids, ",")
+}
+
+type Handles struct {
+	m    sync.Mutex
+	maps map[string]*os.File
+}
+
+// Release all resources associated with this Handle. All existing files
+// returned from Get() will continue to work even after calling Release(). The
+// same Handles can be re-used after calling Release().
+func (hs *Handles) Release() {
+	hs.m.Lock()
+	defer hs.m.Unlock()
+
+	// Close the files for good measure, though GC will do that for us anyway.
+	for _, file := range hs.maps {
+		_ = file.Close()
+	}
+	hs.maps = nil
+}
+
+func spawnProc(req Mapping) (*os.Process, error) {
+	// We need to spawn a subprocess with the requested mappings, which is
+	// unfortunately quite expensive. The "safe" way of doing this is natively
+	// with Go (and then spawning something like "sleep infinity"), but
+	// execve() is a waste of cycles because we just need some process to have
+	// the right mapping, we don't care what it's executing. The "unsafe"
+	// option of doing a clone() behind the back of Go is probably okay in
+	// theory as long as we just do kill(getpid(), SIGSTOP). However, if we
+	// tell Go to put the new process into PTRACE_TRACEME mode, we can avoid
+	// the exec and not have to faff around with the mappings.
+	//
+	// Note that Go's stdlib does not support newuidmap, but in the case of
+	// id-mapped mounts, it seems incredibly unlikely that the user will be
+	// requesting us to do a remapping as an unprivileged user with mappings
+	// they have privileges over.
+	logrus.Debugf("spawning dummy process for id-mapping %s", req.id())
+	uidMappings, gidMappings := req.toSys()
+	// We don't need to use /proc/thread-self here because the exe mm of a
+	// thread-group is guaranteed to be the same for all threads by definition.
+	// This lets us avoid having to do runtime.LockOSThread.
+	return os.StartProcess("/proc/self/exe", []string{"runc", "--help"}, &os.ProcAttr{
+		Sys: &syscall.SysProcAttr{
+			Cloneflags:                 unix.CLONE_NEWUSER,
+			UidMappings:                uidMappings,
+			GidMappings:                gidMappings,
+			GidMappingsEnableSetgroups: false,
+			// Put the process into PTRACE_TRACEME mode to allow us to get the
+			// userns without having a proper execve() target.
+			Ptrace: true,
+		},
+	})
+}
+
+func dupFile(f *os.File) (*os.File, error) {
+	newFd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0)
+	if err != nil {
+		return nil, os.NewSyscallError("fcntl(F_DUPFD_CLOEXEC)", err)
+	}
+	return os.NewFile(uintptr(newFd), f.Name()), nil
+}
+
+// Get returns a handle to a /proc/$pid/ns/user nsfs file with the requested
+// mapping. The processes spawned to produce userns nsfds are cached, so if
+// equivalent user namespace mappings are requested, the same user namespace
+// will be returned. The caller is responsible for closing the returned file
+// descriptor.
+func (hs *Handles) Get(req Mapping) (file *os.File, err error) {
+	hs.m.Lock()
+	defer hs.m.Unlock()
+
+	if hs.maps == nil {
+		hs.maps = make(map[string]*os.File)
+	}
+
+	file, ok := hs.maps[req.id()]
+	if !ok {
+		proc, err := spawnProc(req)
+		if err != nil {
+			return nil, fmt.Errorf("failed to spawn dummy process for map %s: %w", req.id(), err)
+		}
+		// Make sure we kill the helper process. We ignore errors because
+		// there's not much we can do about them anyway, and ultimately
+		defer func() {
+			_ = proc.Kill()
+			_, _ = proc.Wait()
+		}()
+
+		// Stash away a handle to the userns file. This is neater than keeping
+		// the process alive, because Go's GC can handle files much better than
+		// leaked processes, and having long-living useless processes seems
+		// less than ideal.
+		file, err = os.Open(fmt.Sprintf("/proc/%d/ns/user", proc.Pid))
+		if err != nil {
+			return nil, err
+		}
+		hs.maps[req.id()] = file
+	}
+	// Duplicate the file, to make sure the lifecycle of each *os.File we
+	// return is independent.
+	return dupFile(file)
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
@ -19,13 +19,14 @@ package utils
 import (
 	"fmt"
 	"os"
+	"runtime"

 	"golang.org/x/sys/unix"
 )

-// MaxSendfdLen is the maximum length of the name of a file descriptor being
-// sent using SendFd. The name of the file handle returned by RecvFd will never
-// be larger than this value.
+// MaxNameLen is the maximum length of the name of a file descriptor being sent
+// using SendFile. The name of the file handle returned by RecvFile will never be
+// larger than this value.
 const MaxNameLen = 4096

 // oobSpace is the size of the oob slice required to store a single FD. Note
@ -33,26 +34,21 @@ const MaxNameLen = 4096
 // so sizeof(fd) = 4.
 var oobSpace = unix.CmsgSpace(4)

-// RecvFd waits for a file descriptor to be sent over the given AF_UNIX
+// RecvFile waits for a file descriptor to be sent over the given AF_UNIX
 // socket. The file name of the remote file descriptor will be recreated
 // locally (it is sent as non-auxiliary data in the same payload).
-func RecvFd(socket *os.File) (*os.File, error) {
-	// For some reason, unix.Recvmsg uses the length rather than the capacity
-	// when passing the msg_controllen and other attributes to recvmsg.  So we
-	// have to actually set the length.
+func RecvFile(socket *os.File) (_ *os.File, Err error) {
 	name := make([]byte, MaxNameLen)
 	oob := make([]byte, oobSpace)

 	sockfd := socket.Fd()
-	n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0)
+	n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, unix.MSG_CMSG_CLOEXEC)
 	if err != nil {
 		return nil, err
 	}
-
 	if n >= MaxNameLen || oobn != oobSpace {
-		return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
+		return nil, fmt.Errorf("recvfile: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
 	}
-
 	// Truncate.
 	name = name[:n]
 	oob = oob[:oobn]
@ -61,36 +57,63 @@ func RecvFd(socket *os.File) (*os.File, error) {
 	if err != nil {
 		return nil, err
 	}
+
+	// We cannot control how many SCM_RIGHTS we receive, and upon receiving
+	// them all of the descriptors are installed in our fd table, so we need to
+	// parse all of the SCM_RIGHTS we received in order to close all of the
+	// descriptors on error.
+	var fds []int
+	defer func() {
+		for i, fd := range fds {
+			if i == 0 && Err == nil {
+				// Only close the first one on error.
+				continue
+			}
+			// Always close extra ones.
+			_ = unix.Close(fd)
+		}
+	}()
+	var lastErr error
+	for _, scm := range scms {
+		if scm.Header.Type == unix.SCM_RIGHTS {
+			scmFds, err := unix.ParseUnixRights(&scm)
+			if err != nil {
+				lastErr = err
+			} else {
+				fds = append(fds, scmFds...)
+			}
+		}
+	}
+	if lastErr != nil {
+		return nil, lastErr
+	}
+
+	// We do this after collecting the fds to make sure we close them all when
+	// returning an error here.
 	if len(scms) != 1 {
 		return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
 	}
-	scm := scms[0]
-
-	fds, err := unix.ParseUnixRights(&scm)
-	if err != nil {
-		return nil, err
-	}
 	if len(fds) != 1 {
 		return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
 	}
-	fd := uintptr(fds[0])
-
-	return os.NewFile(fd, string(name)), nil
+	return os.NewFile(uintptr(fds[0]), string(name)), nil
 }

-// SendFd sends a file descriptor over the given AF_UNIX socket. In
-// addition, the file.Name() of the given file will also be sent as
-// non-auxiliary data in the same payload (allowing to send contextual
-// information for a file descriptor).
-func SendFd(socket *os.File, name string, fd uintptr) error {
+// SendFile sends a file over the given AF_UNIX socket. file.Name() is also
+// included so that if the other end uses RecvFile, the file will have the same
+// name information.
+func SendFile(socket *os.File, file *os.File) error {
+	name := file.Name()
 	if len(name) >= MaxNameLen {
 		return fmt.Errorf("sendfd: filename too long: %s", name)
 	}
-	return SendFds(socket, []byte(name), int(fd))
+	err := SendRawFd(socket, name, file.Fd())
+	runtime.KeepAlive(file)
+	return err
 }

-// SendFds sends a list of files descriptor and msg over the given AF_UNIX socket.
-func SendFds(socket *os.File, msg []byte, fds ...int) error {
-	oob := unix.UnixRights(fds...)
-	return unix.Sendmsg(int(socket.Fd()), msg, oob, nil, 0)
+// SendRawFd sends a specific file descriptor over the given AF_UNIX socket.
+func SendRawFd(socket *os.File, msg string, fd uintptr) error {
+	oob := unix.UnixRights(int(fd))
+	return unix.Sendmsg(int(socket.Fd()), []byte(msg), oob, nil, 0)
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
@ -3,15 +3,12 @@ package utils
 import (
 	"encoding/binary"
 	"encoding/json"
-	"fmt"
 	"io"
 	"os"
 	"path/filepath"
-	"strconv"
 	"strings"
 	"unsafe"

-	securejoin "github.com/cyphar/filepath-securejoin"
 	"golang.org/x/sys/unix"
 )

@ -43,6 +40,9 @@ func ExitStatus(status unix.WaitStatus) int {
 }

 // WriteJSON writes the provided struct v to w using standard json marshaling
+// without a trailing newline. This is used instead of json.Encoder because
+// there might be a problem in json decoder in some cases, see:
+// https://github.com/docker/docker/issues/14203#issuecomment-174177790
 func WriteJSON(w io.Writer, v interface{}) error {
 	data, err := json.Marshal(v)
 	if err != nil {
@ -99,52 +99,16 @@ func stripRoot(root, path string) string {
 	return CleanPath("/" + path)
 }

-// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
-// corresponding to the unsafePath resolved within the root. Before passing the
-// fd, this path is verified to have been inside the root -- so operating on it
-// through the passed fdpath should be safe. Do not access this path through
-// the original path strings, and do not attempt to use the pathname outside of
-// the passed closure (the file handle will be freed once the closure returns).
-func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
-	// Remove the root then forcefully resolve inside the root.
-	unsafePath = stripRoot(root, unsafePath)
-	path, err := securejoin.SecureJoin(root, unsafePath)
-	if err != nil {
-		return fmt.Errorf("resolving path inside rootfs failed: %w", err)
-	}
-
-	// Open the target path.
-	fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
-	if err != nil {
-		return fmt.Errorf("open o_path procfd: %w", err)
-	}
-	defer fh.Close()
-
-	// Double-check the path is the one we expected.
-	procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd()))
-	if realpath, err := os.Readlink(procfd); err != nil {
-		return fmt.Errorf("procfd verification failed: %w", err)
-	} else if realpath != path {
-		return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
-	}
-
-	// Run the closure.
-	return fn(procfd)
-}
-
-// SearchLabels searches a list of key-value pairs for the provided key and
-// returns the corresponding value. The pairs must be separated with '='.
-func SearchLabels(labels []string, query string) string {
-	for _, l := range labels {
-		parts := strings.SplitN(l, "=", 2)
-		if len(parts) < 2 {
-			continue
-		}
-		if parts[0] == query {
-			return parts[1]
+// SearchLabels searches through a list of key=value pairs for a given key,
+// returning its value, and the binary flag telling whether the key exist.
+func SearchLabels(labels []string, key string) (string, bool) {
+	key += "="
+	for _, s := range labels {
+		if strings.HasPrefix(s, key) {
+			return s[len(key):], true
 		}
 	}
-	return ""
+	return "", false
 }

 // Annotations returns the bundle path and user defined annotations from the
--- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
@ -5,10 +5,16 @@ package utils

 import (
 	"fmt"
+	"math"
 	"os"
+	"path/filepath"
+	"runtime"
 	"strconv"
+	"sync"
 	_ "unsafe" // for go:linkname

+	securejoin "github.com/cyphar/filepath-securejoin"
+	"github.com/sirupsen/logrus"
 	"golang.org/x/sys/unix"
 )

@ -24,12 +30,39 @@ func EnsureProcHandle(fh *os.File) error {
 	return nil
 }

+var (
+	haveCloseRangeCloexecBool bool
+	haveCloseRangeCloexecOnce sync.Once
+)
+
+func haveCloseRangeCloexec() bool {
+	haveCloseRangeCloexecOnce.Do(func() {
+		// Make sure we're not closing a random file descriptor.
+		tmpFd, err := unix.FcntlInt(0, unix.F_DUPFD_CLOEXEC, 0)
+		if err != nil {
+			return
+		}
+		defer unix.Close(tmpFd)
+
+		err = unix.CloseRange(uint(tmpFd), uint(tmpFd), unix.CLOSE_RANGE_CLOEXEC)
+		// Any error means we cannot use close_range(CLOSE_RANGE_CLOEXEC).
+		// -ENOSYS and -EINVAL ultimately mean we don't have support, but any
+		// other potential error would imply that even the most basic close
+		// operation wouldn't work.
+		haveCloseRangeCloexecBool = err == nil
+	})
+	return haveCloseRangeCloexecBool
+}
+
 type fdFunc func(fd int)

 // fdRangeFrom calls the passed fdFunc for each file descriptor that is open in
 // the current process.
 func fdRangeFrom(minFd int, fn fdFunc) error {
-	fdDir, err := os.Open("/proc/self/fd")
+	procSelfFd, closer := ProcThreadSelf("fd")
+	defer closer()
+
+	fdDir, err := os.Open(procSelfFd)
 	if err != nil {
 		return err
 	}
@ -67,6 +100,12 @@ func fdRangeFrom(minFd int, fn fdFunc) error {
 // CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or
 // equal to minFd in the current process.
 func CloseExecFrom(minFd int) error {
+	// Use close_range(CLOSE_RANGE_CLOEXEC) if possible.
+	if haveCloseRangeCloexec() {
+		err := unix.CloseRange(uint(minFd), math.MaxUint, unix.CLOSE_RANGE_CLOEXEC)
+		return os.NewSyscallError("close_range", err)
+	}
+	// Otherwise, fall back to the standard loop.
 	return fdRangeFrom(minFd, unix.CloseOnExec)
 }

@ -89,7 +128,8 @@ func runtime_IsPollDescriptor(fd uintptr) bool //nolint:revive
 // *os.File operations would apply to the wrong file). This function is only
 // intended to be called from the last stage of runc init.
 func UnsafeCloseFrom(minFd int) error {
-	// We must not close some file descriptors.
+	// We cannot use close_range(2) even if it is available, because we must
+	// not close some file descriptors.
 	return fdRangeFrom(minFd, func(fd int) {
 		if runtime_IsPollDescriptor(uintptr(fd)) {
 			// These are the Go runtimes internal netpoll file descriptors.
@ -107,11 +147,117 @@ func UnsafeCloseFrom(minFd int) error {
 	})
 }

-// NewSockPair returns a new unix socket pair
-func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
+// NewSockPair returns a new SOCK_STREAM unix socket pair.
+func NewSockPair(name string) (parent, child *os.File, err error) {
 	fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
 	if err != nil {
 		return nil, nil, err
 	}
 	return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
 }
+
+// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
+// corresponding to the unsafePath resolved within the root. Before passing the
+// fd, this path is verified to have been inside the root -- so operating on it
+// through the passed fdpath should be safe. Do not access this path through
+// the original path strings, and do not attempt to use the pathname outside of
+// the passed closure (the file handle will be freed once the closure returns).
+func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
+	// Remove the root then forcefully resolve inside the root.
+	unsafePath = stripRoot(root, unsafePath)
+	path, err := securejoin.SecureJoin(root, unsafePath)
+	if err != nil {
+		return fmt.Errorf("resolving path inside rootfs failed: %w", err)
+	}
+
+	procSelfFd, closer := ProcThreadSelf("fd/")
+	defer closer()
+
+	// Open the target path.
+	fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
+	if err != nil {
+		return fmt.Errorf("open o_path procfd: %w", err)
+	}
+	defer fh.Close()
+
+	procfd := filepath.Join(procSelfFd, strconv.Itoa(int(fh.Fd())))
+	// Double-check the path is the one we expected.
+	if realpath, err := os.Readlink(procfd); err != nil {
+		return fmt.Errorf("procfd verification failed: %w", err)
+	} else if realpath != path {
+		return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
+	}
+
+	return fn(procfd)
+}
+
+type ProcThreadSelfCloser func()
+
+var (
+	haveProcThreadSelf     bool
+	haveProcThreadSelfOnce sync.Once
+)
+
+// ProcThreadSelf returns a string that is equivalent to
+// /proc/thread-self/<subpath>, with a graceful fallback on older kernels where
+// /proc/thread-self doesn't exist. This method DOES NOT use SecureJoin,
+// meaning that the passed string needs to be trusted. The caller _must_ call
+// the returned procThreadSelfCloser function (which is runtime.UnlockOSThread)
+// *only once* after it has finished using the returned path string.
+func ProcThreadSelf(subpath string) (string, ProcThreadSelfCloser) {
+	haveProcThreadSelfOnce.Do(func() {
+		if _, err := os.Stat("/proc/thread-self/"); err == nil {
+			haveProcThreadSelf = true
+		} else {
+			logrus.Debugf("cannot stat /proc/thread-self (%v), falling back to /proc/self/task/<tid>", err)
+		}
+	})
+
+	// We need to lock our thread until the caller is done with the path string
+	// because any non-atomic operation on the path (such as opening a file,
+	// then reading it) could be interrupted by the Go runtime where the
+	// underlying thread is swapped out and the original thread is killed,
+	// resulting in pull-your-hair-out-hard-to-debug issues in the caller. In
+	// addition, the pre-3.17 fallback makes everything non-atomic because the
+	// same thing could happen between unix.Gettid() and the path operations.
+	//
+	// In theory, we don't need to lock in the atomic user case when using
+	// /proc/thread-self/, but it's better to be safe than sorry (and there are
+	// only one or two truly atomic users of /proc/thread-self/).
+	runtime.LockOSThread()
+
+	threadSelf := "/proc/thread-self/"
+	if !haveProcThreadSelf {
+		// Pre-3.17 kernels did not have /proc/thread-self, so do it manually.
+		threadSelf = "/proc/self/task/" + strconv.Itoa(unix.Gettid()) + "/"
+		if _, err := os.Stat(threadSelf); err != nil {
+			// Unfortunately, this code is called from rootfs_linux.go where we
+			// are running inside the pid namespace of the container but /proc
+			// is the host's procfs. Unfortunately there is no real way to get
+			// the correct tid to use here (the kernel age means we cannot do
+			// things like set up a private fsopen("proc") -- even scanning
+			// NSpid in all of the tasks in /proc/self/task/*/status requires
+			// Linux 4.1).
+			//
+			// So, we just have to assume that /proc/self is acceptable in this
+			// one specific case.
+			if os.Getpid() == 1 {
+				logrus.Debugf("/proc/thread-self (tid=%d) cannot be emulated inside the initial container setup -- using /proc/self instead: %v", unix.Gettid(), err)
+			} else {
+				// This should never happen, but the fallback should work in most cases...
+				logrus.Warnf("/proc/thread-self could not be emulated for pid=%d (tid=%d) -- using more buggy /proc/self fallback instead: %v", os.Getpid(), unix.Gettid(), err)
+			}
+			threadSelf = "/proc/self/"
+		}
+	}
+	return threadSelf + subpath, runtime.UnlockOSThread
+}
+
+// ProcThreadSelfFd is small wrapper around ProcThreadSelf to make it easier to
+// create a /proc/thread-self handle for given file descriptor.
+//
+// It is basically equivalent to ProcThreadSelf(fmt.Sprintf("fd/%d", fd)), but
+// without using fmt.Sprintf to avoid unneeded overhead.
+func ProcThreadSelfFd(fd uintptr) (string, ProcThreadSelfCloser) {
+	return ProcThreadSelf("fd/" + strconv.FormatUint(uint64(fd), 10))
+}
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@ -986,12 +986,11 @@ github.com/opencontainers/go-digest/digestset
 github.com/opencontainers/image-spec/identity
 github.com/opencontainers/image-spec/specs-go
 github.com/opencontainers/image-spec/specs-go/v1
-# github.com/opencontainers/runc v1.1.12
-## explicit; go 1.17
+# github.com/opencontainers/runc v1.2.0-rc.1
+## explicit; go 1.20
 github.com/opencontainers/runc/libcontainer/cgroups
 github.com/opencontainers/runc/libcontainer/configs
 github.com/opencontainers/runc/libcontainer/devices
-github.com/opencontainers/runc/libcontainer/user
 github.com/opencontainers/runc/libcontainer/userns
 github.com/opencontainers/runc/libcontainer/utils
 # github.com/opencontainers/runtime-spec v1.2.0