vendor: github.com/opencontainers/runc v1.1.12

- release notes: https://github.com/opencontainers/runc/releases/tag/v1.1.12
- full diff: https://github.com/opencontainers/runc/compare/v1.1.5...v1.1.12

Signed-off-by: Andrey Epifanov <aepifanov@mirantis.com>
This commit is contained in:
Andrey Epifanov 2024-01-31 13:22:09 -08:00
parent e23be1cc51
commit 76894512ab
No known key found for this signature in database
GPG key ID: 58BE16FE2B25829D
14 changed files with 710 additions and 46 deletions

View file

@ -65,7 +65,7 @@ require (
github.com/morikuni/aec v1.0.0
github.com/opencontainers/go-digest v1.0.0
github.com/opencontainers/image-spec v1.1.0-rc2.0.20221005185240-3a7f492d3f1b
github.com/opencontainers/runc v1.1.5
github.com/opencontainers/runc v1.1.12
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417
github.com/opencontainers/selinux v1.10.2
github.com/pelletier/go-toml v1.9.5

View file

@ -172,7 +172,6 @@ github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cb
github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/checkpoint-restore/go-criu/v4 v4.1.0/go.mod h1:xUQBLp4RLc5zJtWY++yjOoMoB5lihDt7fai+75m+rGw=
github.com/checkpoint-restore/go-criu/v5 v5.0.0/go.mod h1:cfwC0EG7HMUenopBsUf9d89JlCLQIfgVcNsNN0t6T2M=
github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
@ -333,7 +332,6 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw=
github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/cyphar/filepath-securejoin v0.2.2/go.mod h1:FpkQEhXnPnOthhzymB7CGsFk2G9VLXONKD9G7QGMM+4=
github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
github.com/cyphar/filepath-securejoin v0.2.4 h1:Ugdm7cg7i6ZK6x3xDF1oEu1nfkyfH53EtKeQYTC3kyg=
github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
github.com/d2g/dhcp4 v0.0.0-20170904100407-a1d1b6c41b1c/go.mod h1:Ct2BUK8SB0YC1SMSibvLzxjeJLnrYEVLULFNiHY9YfQ=
@ -746,7 +744,6 @@ github.com/moby/sys/mount v0.3.3 h1:fX1SVkXFJ47XWDoeFW4Sq7PdQJnV2QIDZAqjNqgEjUs=
github.com/moby/sys/mount v0.3.3/go.mod h1:PBaEorSNTLG5t/+4EgukEQVlAvVEc6ZjTySwKdqp5K0=
github.com/moby/sys/mountinfo v0.4.0/go.mod h1:rEr8tzG/lsIZHBtN/JjGG+LMYx9eXgW2JI+6q0qou+A=
github.com/moby/sys/mountinfo v0.4.1/go.mod h1:rEr8tzG/lsIZHBtN/JjGG+LMYx9eXgW2JI+6q0qou+A=
github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU=
github.com/moby/sys/mountinfo v0.6.2 h1:BzJjoreD5BMFNmD9Rus6gdd1pLuecOFPt8wC+Vygl78=
github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI=
github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5lXtc=
@ -812,8 +809,8 @@ github.com/opencontainers/runc v1.0.0-rc8.0.20190926000215-3e425f80a8c9/go.mod h
github.com/opencontainers/runc v1.0.0-rc9/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U=
github.com/opencontainers/runc v1.0.0-rc93/go.mod h1:3NOsor4w32B2tC0Zbl8Knk4Wg84SM2ImC1fxBuqJ/H0=
github.com/opencontainers/runc v1.0.2/go.mod h1:aTaHFFwQXuA71CiyxOdFFIorAoemI04suvGRQFzWTD0=
github.com/opencontainers/runc v1.1.5 h1:L44KXEpKmfWDcS02aeGm8QNTFXTo2D+8MYGDIJ/GDEs=
github.com/opencontainers/runc v1.1.5/go.mod h1:1J5XiS+vdZ3wCyZybsuxXZWGrgSr8fFJHLXuG2PsnNg=
github.com/opencontainers/runc v1.1.12 h1:BOIssBaW1La0/qbNZHXOOa71dZfZEQOzW7dqQf3phss=
github.com/opencontainers/runc v1.1.12/go.mod h1:S+lQwSfncpBha7XTy/5lBwWgm5+y5Ma/O44Ekby9FK8=
github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.0.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.0.2-0.20190207185410-29686dbc5559/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
@ -825,7 +822,6 @@ github.com/opencontainers/runtime-tools v0.0.0-20181011054405-1d69bd0f9c39/go.mo
github.com/opencontainers/selinux v1.6.0/go.mod h1:VVGKuOLlE7v4PJyT6h7mNWvq1rzqiriPsEqVhc+svHE=
github.com/opencontainers/selinux v1.8.0/go.mod h1:RScLhm78qiWa2gbVCcGkC7tCGdgk3ogry1nUQF8Evvo=
github.com/opencontainers/selinux v1.8.2/go.mod h1:MUIHuUEvKB1wtJjQdOyYRgOnLD2xAPP8dBsCoU0KuF8=
github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
github.com/opencontainers/selinux v1.10.2 h1:NFy2xCsjn7+WspbfZkUd5zyVeisV7VFbPSP96+8/ha4=
github.com/opencontainers/selinux v1.10.2/go.mod h1:cARutUbaUrlRClyvxOICCgKixCs6L05aUsohzA3EkHQ=
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
@ -911,7 +907,6 @@ github.com/sclevine/spec v1.2.0/go.mod h1:W4J29eT/Kzv7/b9IWLB055Z+qvVC9vt0Arko24
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
github.com/seccomp/libseccomp-golang v0.9.1/go.mod h1:GbW5+tmTXfcxTToHLXlScSlAvWlF4P2Ca7zGrPiEpWo=
github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/sirupsen/logrus v1.0.4-0.20170822132746-89742aefa4b2/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc=
github.com/sirupsen/logrus v1.0.6/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc=
@ -1338,7 +1333,6 @@ golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210908233432-aa78b53d3365/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211210111614-af8b64212486/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=

View file

@ -10,6 +10,7 @@ import (
"strings"
"sync"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
@ -76,16 +77,16 @@ var (
// TestMode is set to true by unit tests that need "fake" cgroupfs.
TestMode bool
cgroupFd int = -1
prepOnce sync.Once
prepErr error
resolveFlags uint64
cgroupRootHandle *os.File
prepOnce sync.Once
prepErr error
resolveFlags uint64
)
func prepareOpenat2() error {
prepOnce.Do(func() {
fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{
Flags: unix.O_DIRECTORY | unix.O_PATH,
Flags: unix.O_DIRECTORY | unix.O_PATH | unix.O_CLOEXEC,
})
if err != nil {
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
@ -96,15 +97,16 @@ func prepareOpenat2() error {
}
return
}
file := os.NewFile(uintptr(fd), cgroupfsDir)
var st unix.Statfs_t
if err = unix.Fstatfs(fd, &st); err != nil {
if err := unix.Fstatfs(int(file.Fd()), &st); err != nil {
prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err}
logrus.Warnf("falling back to securejoin: %s", prepErr)
return
}
cgroupFd = fd
cgroupRootHandle = file
resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS
if st.Type == unix.CGROUP2_SUPER_MAGIC {
// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
@ -122,7 +124,7 @@ func openFile(dir, file string, flags int) (*os.File, error) {
flags |= os.O_TRUNC | os.O_CREATE
mode = 0o600
}
path := path.Join(dir, file)
path := path.Join(dir, utils.CleanPath(file))
if prepareOpenat2() != nil {
return openFallback(path, flags, mode)
}
@ -131,7 +133,7 @@ func openFile(dir, file string, flags int) (*os.File, error) {
return openFallback(path, flags, mode)
}
fd, err := unix.Openat2(cgroupFd, relPath,
fd, err := unix.Openat2(int(cgroupRootHandle.Fd()), relPath,
&unix.OpenHow{
Resolve: resolveFlags,
Flags: uint64(flags) | unix.O_CLOEXEC,
@ -139,20 +141,20 @@ func openFile(dir, file string, flags int) (*os.File, error) {
})
if err != nil {
err = &os.PathError{Op: "openat2", Path: path, Err: err}
// Check if cgroupFd is still opened to cgroupfsDir
// Check if cgroupRootHandle is still opened to cgroupfsDir
// (happens when this package is incorrectly used
// across the chroot/pivot_root/mntns boundary, or
// when /sys/fs/cgroup is remounted).
//
// TODO: if such usage will ever be common, amend this
// to reopen cgroupFd and retry openat2.
fdStr := strconv.Itoa(cgroupFd)
// to reopen cgroupRootHandle and retry openat2.
fdStr := strconv.Itoa(int(cgroupRootHandle.Fd()))
fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr)
if fdDest != cgroupfsDir {
// Wrap the error so it is clear that cgroupFd
// Wrap the error so it is clear that cgroupRootHandle
// is opened to an unexpected/wrong directory.
err = fmt.Errorf("cgroupFd %s unexpectedly opened to %s != %s: %w",
fdStr, fdDest, cgroupfsDir, err)
err = fmt.Errorf("cgroupRootHandle %d unexpectedly opened to %s != %s: %w",
cgroupRootHandle.Fd(), fdDest, cgroupfsDir, err)
}
return nil, err
}

View file

@ -78,6 +78,8 @@ type MemoryStats struct {
Usage MemoryData `json:"usage,omitempty"`
// usage of memory + swap
SwapUsage MemoryData `json:"swap_usage,omitempty"`
// usage of swap only
SwapOnlyUsage MemoryData `json:"swap_only_usage,omitempty"`
// usage of kernel memory
KernelUsage MemoryData `json:"kernel_usage,omitempty"`
// usage of kernel TCP memory

View file

@ -162,8 +162,10 @@ func readProcsFile(dir string) ([]int, error) {
// ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup
// or /proc/<pid>/cgroup, into a map of subsystems to cgroup paths, e.g.
// "cpu": "/user.slice/user-1000.slice"
// "pids": "/user.slice/user-1000.slice"
//
// "cpu": "/user.slice/user-1000.slice"
// "pids": "/user.slice/user-1000.slice"
//
// etc.
//
// Note that for cgroup v2 unified hierarchy, there are no per-controller

View file

@ -21,9 +21,9 @@ type Rlimit struct {
// IDMap represents UID/GID Mappings for User Namespaces.
type IDMap struct {
ContainerID int `json:"container_id"`
HostID int `json:"host_id"`
Size int `json:"size"`
ContainerID int64 `json:"container_id"`
HostID int64 `json:"host_id"`
Size int64 `json:"size"`
}
// Seccomp represents syscall restrictions

View file

@ -1,6 +1,10 @@
package configs
import "errors"
import (
"errors"
"fmt"
"math"
)
var (
errNoUIDMap = errors.New("User namespaces enabled, but no uid mappings found.")
@ -16,11 +20,18 @@ func (c Config) HostUID(containerId int) (int, error) {
if c.UidMappings == nil {
return -1, errNoUIDMap
}
id, found := c.hostIDFromMapping(containerId, c.UidMappings)
id, found := c.hostIDFromMapping(int64(containerId), c.UidMappings)
if !found {
return -1, errNoUserMap
}
return id, nil
// If we are a 32-bit binary running on a 64-bit system, it's possible
// the mapped user is too large to store in an int, which means we
// cannot do the mapping. We can't just return an int64, because
// os.Setuid() takes an int.
if id > math.MaxInt {
return -1, fmt.Errorf("mapping for uid %d (host id %d) is larger than native integer size (%d)", containerId, id, math.MaxInt)
}
return int(id), nil
}
// Return unchanged id.
return containerId, nil
@ -39,11 +50,18 @@ func (c Config) HostGID(containerId int) (int, error) {
if c.GidMappings == nil {
return -1, errNoGIDMap
}
id, found := c.hostIDFromMapping(containerId, c.GidMappings)
id, found := c.hostIDFromMapping(int64(containerId), c.GidMappings)
if !found {
return -1, errNoGroupMap
}
return id, nil
// If we are a 32-bit binary running on a 64-bit system, it's possible
// the mapped user is too large to store in an int, which means we
// cannot do the mapping. We can't just return an int64, because
// os.Setgid() takes an int.
if id > math.MaxInt {
return -1, fmt.Errorf("mapping for gid %d (host id %d) is larger than native integer size (%d)", containerId, id, math.MaxInt)
}
return int(id), nil
}
// Return unchanged id.
return containerId, nil
@ -57,7 +75,7 @@ func (c Config) HostRootGID() (int, error) {
// Utility function that gets a host ID for a container ID from user namespace map
// if that ID is present in the map.
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
func (c Config) hostIDFromMapping(containerID int64, uMap []IDMap) (int64, bool) {
for _, m := range uMap {
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
hostID := m.HostID + (containerID - m.ContainerID)

View file

@ -201,7 +201,7 @@ func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
if err != nil {
// We should return no error if EOF is reached
// without a match.
if err == io.EOF { //nolint:errorlint // comparison with io.EOF is legit, https://github.com/polyfloyd/go-errorlint/pull/12
if err == io.EOF {
err = nil
}
return out, err
@ -280,13 +280,13 @@ func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath
// found in any entry in passwd and group respectively.
//
// Examples of valid user specifications are:
// * ""
// * "user"
// * "uid"
// * "user:group"
// * "uid:gid
// * "user:gid"
// * "uid:group"
// - ""
// - "user"
// - "uid"
// - "user:group"
// - "uid:gid
// - "user:gid"
// - "uid:group"
//
// It should be noted that if you specify a numeric user or group id, they will
// not be evaluated as usernames (only the metadata will be filled). So attempting

View file

@ -0,0 +1,79 @@
#define _GNU_SOURCE
#include <fcntl.h>
#include <sched.h>
#include <stdio.h>
#include <unistd.h>
#include <stdarg.h>
#include <stdlib.h>
/*
* All of the code here is run inside an aync-signal-safe context, so we need
* to be careful to not call any functions that could cause issues. In theory,
* since we are a Go program, there are fewer restrictions in practice, it's
* better to be safe than sorry.
*
* The only exception is exit, which we need to call to make sure we don't
* return into runc.
*/
void bail(int pipefd, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
vdprintf(pipefd, fmt, args);
va_end(args);
exit(1);
}
int spawn_userns_cat(char *userns_path, char *path, int outfd, int errfd)
{
char buffer[4096] = { 0 };
pid_t child = fork();
if (child != 0)
return child;
/* in child */
/* Join the target userns. */
int nsfd = open(userns_path, O_RDONLY);
if (nsfd < 0)
bail(errfd, "open userns path %s failed: %m", userns_path);
int err = setns(nsfd, CLONE_NEWUSER);
if (err < 0)
bail(errfd, "setns %s failed: %m", userns_path);
close(nsfd);
/* Pipe the requested file contents. */
int fd = open(path, O_RDONLY);
if (fd < 0)
bail(errfd, "open %s in userns %s failed: %m", path, userns_path);
int nread, ntotal = 0;
while ((nread = read(fd, buffer, sizeof(buffer))) != 0) {
if (nread < 0)
bail(errfd, "read bytes from %s failed (after %d total bytes read): %m", path, ntotal);
ntotal += nread;
int nwritten = 0;
while (nwritten < nread) {
int n = write(outfd, buffer, nread - nwritten);
if (n < 0)
bail(errfd, "write %d bytes from %s failed (after %d bytes written): %m",
nread - nwritten, path, nwritten);
nwritten += n;
}
if (nread != nwritten)
bail(errfd, "mismatch for bytes read and written: %d read != %d written", nread, nwritten);
}
close(fd);
close(outfd);
close(errfd);
/* We must exit here, otherwise we would return into a forked runc. */
exit(0);
}

View file

@ -0,0 +1,186 @@
//go:build linux
package userns
import (
"bufio"
"bytes"
"fmt"
"io"
"os"
"unsafe"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/sirupsen/logrus"
)
/*
#include <stdlib.h>
extern int spawn_userns_cat(char *userns_path, char *path, int outfd, int errfd);
*/
import "C"
func parseIdmapData(data []byte) (ms []configs.IDMap, err error) {
scanner := bufio.NewScanner(bytes.NewReader(data))
for scanner.Scan() {
var m configs.IDMap
line := scanner.Text()
if _, err := fmt.Sscanf(line, "%d %d %d", &m.ContainerID, &m.HostID, &m.Size); err != nil {
return nil, fmt.Errorf("parsing id map failed: invalid format in line %q: %w", line, err)
}
ms = append(ms, m)
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("parsing id map failed: %w", err)
}
return ms, nil
}
// Do something equivalent to nsenter --user=<nsPath> cat <path>, but more
// efficiently. Returns the contents of the requested file from within the user
// namespace.
func spawnUserNamespaceCat(nsPath string, path string) ([]byte, error) {
rdr, wtr, err := os.Pipe()
if err != nil {
return nil, fmt.Errorf("create pipe for userns spawn failed: %w", err)
}
defer rdr.Close()
defer wtr.Close()
errRdr, errWtr, err := os.Pipe()
if err != nil {
return nil, fmt.Errorf("create error pipe for userns spawn failed: %w", err)
}
defer errRdr.Close()
defer errWtr.Close()
cNsPath := C.CString(nsPath)
defer C.free(unsafe.Pointer(cNsPath))
cPath := C.CString(path)
defer C.free(unsafe.Pointer(cPath))
childPid := C.spawn_userns_cat(cNsPath, cPath, C.int(wtr.Fd()), C.int(errWtr.Fd()))
if childPid < 0 {
return nil, fmt.Errorf("failed to spawn fork for userns")
} else if childPid == 0 {
// this should never happen
panic("runc executing inside fork child -- unsafe state!")
}
// We are in the parent -- close the write end of the pipe before reading.
wtr.Close()
output, err := io.ReadAll(rdr)
rdr.Close()
if err != nil {
return nil, fmt.Errorf("reading from userns spawn failed: %w", err)
}
// Ditto for the error pipe.
errWtr.Close()
errOutput, err := io.ReadAll(errRdr)
errRdr.Close()
if err != nil {
return nil, fmt.Errorf("reading from userns spawn error pipe failed: %w", err)
}
errOutput = bytes.TrimSpace(errOutput)
// Clean up the child.
child, err := os.FindProcess(int(childPid))
if err != nil {
return nil, fmt.Errorf("could not find userns spawn process: %w", err)
}
state, err := child.Wait()
if err != nil {
return nil, fmt.Errorf("failed to wait for userns spawn process: %w", err)
}
if !state.Success() {
errStr := string(errOutput)
if errStr == "" {
errStr = fmt.Sprintf("unknown error (status code %d)", state.ExitCode())
}
return nil, fmt.Errorf("userns spawn: %s", errStr)
} else if len(errOutput) > 0 {
// We can just ignore weird output in the error pipe if the process
// didn't bail(), but for completeness output for debugging.
logrus.Debugf("userns spawn succeeded but unexpected error message found: %s", string(errOutput))
}
// The subprocess succeeded, return whatever it wrote to the pipe.
return output, nil
}
func GetUserNamespaceMappings(nsPath string) (uidMap, gidMap []configs.IDMap, err error) {
var (
pid int
extra rune
tryFastPath bool
)
// nsPath is usually of the form /proc/<pid>/ns/user, which means that we
// already have a pid that is part of the user namespace and thus we can
// just use the pid to read from /proc/<pid>/*id_map.
//
// Note that Sscanf doesn't consume the whole input, so we check for any
// trailing data with %c. That way, we can be sure the pattern matched
// /proc/$pid/ns/user _exactly_ iff n === 1.
if n, _ := fmt.Sscanf(nsPath, "/proc/%d/ns/user%c", &pid, &extra); n == 1 {
tryFastPath = pid > 0
}
for _, mapType := range []struct {
name string
idMap *[]configs.IDMap
}{
{"uid_map", &uidMap},
{"gid_map", &gidMap},
} {
var mapData []byte
if tryFastPath {
path := fmt.Sprintf("/proc/%d/%s", pid, mapType.name)
data, err := os.ReadFile(path)
if err != nil {
// Do not error out here -- we need to try the slow path if the
// fast path failed.
logrus.Debugf("failed to use fast path to read %s from userns %s (error: %s), falling back to slow userns-join path", mapType.name, nsPath, err)
} else {
mapData = data
}
} else {
logrus.Debugf("cannot use fast path to read %s from userns %s, falling back to slow userns-join path", mapType.name, nsPath)
}
if mapData == nil {
// We have to actually join the namespace if we cannot take the
// fast path. The path is resolved with respect to the child
// process, so just use /proc/self.
data, err := spawnUserNamespaceCat(nsPath, "/proc/self/"+mapType.name)
if err != nil {
return nil, nil, err
}
mapData = data
}
idMap, err := parseIdmapData(mapData)
if err != nil {
return nil, nil, fmt.Errorf("failed to parse %s of userns %s: %w", mapType.name, nsPath, err)
}
*mapType.idMap = idMap
}
return uidMap, gidMap, nil
}
// IsSameMapping returns whether or not the two id mappings are the same. Note
// that if the order of the mappings is different, or a mapping has been split,
// the mappings will be considered different.
func IsSameMapping(a, b []configs.IDMap) bool {
if len(a) != len(b) {
return false
}
for idx := range a {
if a[idx] != b[idx] {
return false
}
}
return true
}

View file

@ -0,0 +1,96 @@
package utils
/*
* Copyright 2016, 2017 SUSE LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import (
"fmt"
"os"
"golang.org/x/sys/unix"
)
// MaxSendfdLen is the maximum length of the name of a file descriptor being
// sent using SendFd. The name of the file handle returned by RecvFd will never
// be larger than this value.
const MaxNameLen = 4096
// oobSpace is the size of the oob slice required to store a single FD. Note
// that unix.UnixRights appears to make the assumption that fd is always int32,
// so sizeof(fd) = 4.
var oobSpace = unix.CmsgSpace(4)
// RecvFd waits for a file descriptor to be sent over the given AF_UNIX
// socket. The file name of the remote file descriptor will be recreated
// locally (it is sent as non-auxiliary data in the same payload).
func RecvFd(socket *os.File) (*os.File, error) {
// For some reason, unix.Recvmsg uses the length rather than the capacity
// when passing the msg_controllen and other attributes to recvmsg. So we
// have to actually set the length.
name := make([]byte, MaxNameLen)
oob := make([]byte, oobSpace)
sockfd := socket.Fd()
n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0)
if err != nil {
return nil, err
}
if n >= MaxNameLen || oobn != oobSpace {
return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
}
// Truncate.
name = name[:n]
oob = oob[:oobn]
scms, err := unix.ParseSocketControlMessage(oob)
if err != nil {
return nil, err
}
if len(scms) != 1 {
return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
}
scm := scms[0]
fds, err := unix.ParseUnixRights(&scm)
if err != nil {
return nil, err
}
if len(fds) != 1 {
return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
}
fd := uintptr(fds[0])
return os.NewFile(fd, string(name)), nil
}
// SendFd sends a file descriptor over the given AF_UNIX socket. In
// addition, the file.Name() of the given file will also be sent as
// non-auxiliary data in the same payload (allowing to send contextual
// information for a file descriptor).
func SendFd(socket *os.File, name string, fd uintptr) error {
if len(name) >= MaxNameLen {
return fmt.Errorf("sendfd: filename too long: %s", name)
}
return SendFds(socket, []byte(name), int(fd))
}
// SendFds sends a list of files descriptor and msg over the given AF_UNIX socket.
func SendFds(socket *os.File, msg []byte, fds ...int) error {
oob := unix.UnixRights(fds...)
return unix.Sendmsg(int(socket.Fd()), msg, oob, nil, 0)
}

View file

@ -0,0 +1,167 @@
package utils
import (
"encoding/binary"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"unsafe"
securejoin "github.com/cyphar/filepath-securejoin"
"golang.org/x/sys/unix"
)
const (
exitSignalOffset = 128
)
// NativeEndian is the native byte order of the host system.
var NativeEndian binary.ByteOrder
func init() {
// Copied from <golang.org/x/net/internal/socket/sys.go>.
i := uint32(1)
b := (*[4]byte)(unsafe.Pointer(&i))
if b[0] == 1 {
NativeEndian = binary.LittleEndian
} else {
NativeEndian = binary.BigEndian
}
}
// ExitStatus returns the correct exit status for a process based on if it
// was signaled or exited cleanly
func ExitStatus(status unix.WaitStatus) int {
if status.Signaled() {
return exitSignalOffset + int(status.Signal())
}
return status.ExitStatus()
}
// WriteJSON writes the provided struct v to w using standard json marshaling
func WriteJSON(w io.Writer, v interface{}) error {
data, err := json.Marshal(v)
if err != nil {
return err
}
_, err = w.Write(data)
return err
}
// CleanPath makes a path safe for use with filepath.Join. This is done by not
// only cleaning the path, but also (if the path is relative) adding a leading
// '/' and cleaning it (then removing the leading '/'). This ensures that a
// path resulting from prepending another path will always resolve to lexically
// be a subdirectory of the prefixed path. This is all done lexically, so paths
// that include symlinks won't be safe as a result of using CleanPath.
func CleanPath(path string) string {
// Deal with empty strings nicely.
if path == "" {
return ""
}
// Ensure that all paths are cleaned (especially problematic ones like
// "/../../../../../" which can cause lots of issues).
path = filepath.Clean(path)
// If the path isn't absolute, we need to do more processing to fix paths
// such as "../../../../<etc>/some/path". We also shouldn't convert absolute
// paths to relative ones.
if !filepath.IsAbs(path) {
path = filepath.Clean(string(os.PathSeparator) + path)
// This can't fail, as (by definition) all paths are relative to root.
path, _ = filepath.Rel(string(os.PathSeparator), path)
}
// Clean the path again for good measure.
return filepath.Clean(path)
}
// stripRoot returns the passed path, stripping the root path if it was
// (lexicially) inside it. Note that both passed paths will always be treated
// as absolute, and the returned path will also always be absolute. In
// addition, the paths are cleaned before stripping the root.
func stripRoot(root, path string) string {
// Make the paths clean and absolute.
root, path = CleanPath("/"+root), CleanPath("/"+path)
switch {
case path == root:
path = "/"
case root == "/":
// do nothing
case strings.HasPrefix(path, root+"/"):
path = strings.TrimPrefix(path, root+"/")
}
return CleanPath("/" + path)
}
// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
// corresponding to the unsafePath resolved within the root. Before passing the
// fd, this path is verified to have been inside the root -- so operating on it
// through the passed fdpath should be safe. Do not access this path through
// the original path strings, and do not attempt to use the pathname outside of
// the passed closure (the file handle will be freed once the closure returns).
func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
// Remove the root then forcefully resolve inside the root.
unsafePath = stripRoot(root, unsafePath)
path, err := securejoin.SecureJoin(root, unsafePath)
if err != nil {
return fmt.Errorf("resolving path inside rootfs failed: %w", err)
}
// Open the target path.
fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
if err != nil {
return fmt.Errorf("open o_path procfd: %w", err)
}
defer fh.Close()
// Double-check the path is the one we expected.
procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd()))
if realpath, err := os.Readlink(procfd); err != nil {
return fmt.Errorf("procfd verification failed: %w", err)
} else if realpath != path {
return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
}
// Run the closure.
return fn(procfd)
}
// SearchLabels searches a list of key-value pairs for the provided key and
// returns the corresponding value. The pairs must be separated with '='.
func SearchLabels(labels []string, query string) string {
for _, l := range labels {
parts := strings.SplitN(l, "=", 2)
if len(parts) < 2 {
continue
}
if parts[0] == query {
return parts[1]
}
}
return ""
}
// Annotations returns the bundle path and user defined annotations from the
// libcontainer state. We need to remove the bundle because that is a label
// added by libcontainer.
func Annotations(labels []string) (bundle string, userAnnotations map[string]string) {
userAnnotations = make(map[string]string)
for _, l := range labels {
parts := strings.SplitN(l, "=", 2)
if len(parts) < 2 {
continue
}
if parts[0] == "bundle" {
bundle = parts[1]
} else {
userAnnotations[parts[0]] = parts[1]
}
}
return
}

View file

@ -0,0 +1,117 @@
//go:build !windows
// +build !windows
package utils
import (
"fmt"
"os"
"strconv"
_ "unsafe" // for go:linkname
"golang.org/x/sys/unix"
)
// EnsureProcHandle returns whether or not the given file handle is on procfs.
func EnsureProcHandle(fh *os.File) error {
var buf unix.Statfs_t
if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil {
return fmt.Errorf("ensure %s is on procfs: %w", fh.Name(), err)
}
if buf.Type != unix.PROC_SUPER_MAGIC {
return fmt.Errorf("%s is not on procfs", fh.Name())
}
return nil
}
type fdFunc func(fd int)
// fdRangeFrom calls the passed fdFunc for each file descriptor that is open in
// the current process.
func fdRangeFrom(minFd int, fn fdFunc) error {
fdDir, err := os.Open("/proc/self/fd")
if err != nil {
return err
}
defer fdDir.Close()
if err := EnsureProcHandle(fdDir); err != nil {
return err
}
fdList, err := fdDir.Readdirnames(-1)
if err != nil {
return err
}
for _, fdStr := range fdList {
fd, err := strconv.Atoi(fdStr)
// Ignore non-numeric file names.
if err != nil {
continue
}
// Ignore descriptors lower than our specified minimum.
if fd < minFd {
continue
}
// Ignore the file descriptor we used for readdir, as it will be closed
// when we return.
if uintptr(fd) == fdDir.Fd() {
continue
}
// Run the closure.
fn(fd)
}
return nil
}
// CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or
// equal to minFd in the current process.
func CloseExecFrom(minFd int) error {
return fdRangeFrom(minFd, unix.CloseOnExec)
}
//go:linkname runtime_IsPollDescriptor internal/poll.IsPollDescriptor
// In order to make sure we do not close the internal epoll descriptors the Go
// runtime uses, we need to ensure that we skip descriptors that match
// "internal/poll".IsPollDescriptor. Yes, this is a Go runtime internal thing,
// unfortunately there's no other way to be sure we're only keeping the file
// descriptors the Go runtime needs. Hopefully nothing blows up doing this...
func runtime_IsPollDescriptor(fd uintptr) bool //nolint:revive
// UnsafeCloseFrom closes all file descriptors greater or equal to minFd in the
// current process, except for those critical to Go's runtime (such as the
// netpoll management descriptors).
//
// NOTE: That this function is incredibly dangerous to use in most Go code, as
// closing file descriptors from underneath *os.File handles can lead to very
// bad behaviour (the closed file descriptor can be re-used and then any
// *os.File operations would apply to the wrong file). This function is only
// intended to be called from the last stage of runc init.
func UnsafeCloseFrom(minFd int) error {
// We must not close some file descriptors.
return fdRangeFrom(minFd, func(fd int) {
if runtime_IsPollDescriptor(uintptr(fd)) {
// These are the Go runtimes internal netpoll file descriptors.
// These file descriptors are operated on deep in the Go scheduler,
// and closing those files from underneath Go can result in panics.
// There is no issue with keeping them because they are not
// executable and are not useful to an attacker anyway. Also we
// don't have any choice.
return
}
// There's nothing we can do about errors from close(2), and the
// only likely error to be seen is EBADF which indicates the fd was
// already closed (in which case, we got what we wanted).
_ = unix.Close(fd)
})
}
// NewSockPair returns a new unix socket pair
func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
}

5
vendor/modules.txt vendored
View file

@ -713,13 +713,14 @@ github.com/opencontainers/go-digest/digestset
github.com/opencontainers/image-spec/identity
github.com/opencontainers/image-spec/specs-go
github.com/opencontainers/image-spec/specs-go/v1
# github.com/opencontainers/runc v1.1.5
## explicit; go 1.16
# github.com/opencontainers/runc v1.1.12
## explicit; go 1.17
github.com/opencontainers/runc/libcontainer/cgroups
github.com/opencontainers/runc/libcontainer/configs
github.com/opencontainers/runc/libcontainer/devices
github.com/opencontainers/runc/libcontainer/user
github.com/opencontainers/runc/libcontainer/userns
github.com/opencontainers/runc/libcontainer/utils
# github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417
## explicit
github.com/opencontainers/runtime-spec/specs-go