vendor runc 67169a9d43456ff0d5ae12b967acb8e366e2f181

v1.0.0-rc91-48-g67169a9d

Signed-off-by: Jintao Zhang <zhangjintao9020@gmail.com>
This commit is contained in:
Jintao Zhang 2020-07-30 06:34:33 +00:00
parent 9424eccfcd
commit 9ad35b7e69
37 changed files with 807 additions and 602 deletions

View file

@ -30,7 +30,7 @@ func deviceCgroup(d *configs.Device) specs.LinuxDeviceCgroup {
Type: string(d.Type),
Major: &d.Major,
Minor: &d.Minor,
Access: d.Permissions,
Access: string(d.Permissions),
}
}

View file

@ -13,7 +13,7 @@ github.com/konsorten/go-windows-terminal-sequences edb144dfd453055e1e49a3d8b410
github.com/sirupsen/logrus 60c74ad9be0d874af0ab0daef6ab07c5c5911f0d # v1.6.0
github.com/tchap/go-patricia a7f0089c6f496e8e70402f61733606daa326cac5 # v2.3.0
golang.org/x/net 0de0cce0169b09b364e001f108dc0399ea8630b3
golang.org/x/sys 85ca7c5b95cdf1e557abb38a283d1e61a5959c31
golang.org/x/sys 9dae0f8f577553e0f21298e18926efc9644c281d
github.com/docker/go-units 519db1ee28dcc9fd2474ae59fca29a810482bfb1 # v0.4.0
github.com/docker/go-connections 7395e3f8aa162843a74ed6d48e79627d9792ac55 # v0.4.0
github.com/moby/sys 6154f11e6840c0d6b0dbb23f4125a6134b3013c9 # mountinfo/v0.1.3
@ -83,8 +83,8 @@ google.golang.org/grpc f495f5b15ae7ccda3b38c53a1bfc
# the containerd project first, and update both after that is merged.
# This commit does not need to match RUNC_COMMIT as it is used for helper
# packages but should be newer or equal.
github.com/opencontainers/runc dc9208a3303feef5b3839f4323d9beb36df0a9dd # v1.0.0-rc10
github.com/opencontainers/runtime-spec c4ee7d12c742ffe806cd9350b6af3b4b19faed6f # v1.0.2
github.com/opencontainers/runc 67169a9d43456ff0d5ae12b967acb8e366e2f181 # v1.0.0-rc91-48-g67169a9d
github.com/opencontainers/runtime-spec 237cc4f519e2e8f9b235bacccfa8ef5a84df2875 # v1.0.3-0.20200520003142-237cc4f519e2
github.com/opencontainers/image-spec d60099175f88c47cd379c4738d158884749ed235 # v1.0.1
github.com/seccomp/libseccomp-golang 689e3c1541a84461afc49c1c87352a6cedf72e9c # v0.9.1

View file

@ -3,6 +3,7 @@
[![Build Status](https://travis-ci.org/opencontainers/runc.svg?branch=master)](https://travis-ci.org/opencontainers/runc)
[![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runc)](https://goreportcard.com/report/github.com/opencontainers/runc)
[![GoDoc](https://godoc.org/github.com/opencontainers/runc?status.svg)](https://godoc.org/github.com/opencontainers/runc)
[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/588/badge)](https://bestpractices.coreinfrastructure.org/projects/588)
## Introduction
@ -18,22 +19,23 @@ You can find official releases of `runc` on the [release](https://github.com/ope
Currently, the following features are not considered to be production-ready:
* Support for cgroup v2
* [Support for cgroup v2](./docs/cgroup-v2.md)
## Security
The reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/).
The reporting process and disclosure communications are outlined [here](https://github.com/opencontainers/org/blob/master/SECURITY.md).
### Security Audit
A third party security audit was performed by Cure53, you can see the full report [here](https://github.com/opencontainers/runc/blob/master/docs/Security-Audit.pdf).
## Building
`runc` currently supports the Linux platform with various architecture support.
It must be built with Go version 1.6 or higher in order for some features to function properly.
It must be built with Go version 1.13 or higher.
In order to enable seccomp support you will need to install `libseccomp` on your platform.
> e.g. `libseccomp-devel` for CentOS, or `libseccomp-dev` for Ubuntu
Otherwise, if you do not want to build `runc` with seccomp support you can add `BUILDTAGS=""` when running make.
```bash
# create a 'github.com/opencontainers' in your GOPATH/src
cd github.com/opencontainers
@ -58,20 +60,22 @@ sudo make install
#### Build Tags
`runc` supports optional build tags for compiling support of various features.
To add build tags to the make option the `BUILDTAGS` variable must be set.
`runc` supports optional build tags for compiling support of various features,
with some of them enabled by default (see `BUILDTAGS` in top-level `Makefile`).
To change build tags from the default, set the `BUILDTAGS` variable for make,
e.g.
```bash
make BUILDTAGS='seccomp apparmor'
```
| Build Tag | Feature | Dependency |
|-----------|------------------------------------|-------------|
| seccomp | Syscall filtering | libseccomp |
| selinux | selinux process and mount labeling | <none> |
| apparmor | apparmor profile support | <none> |
| ambient | ambient capability support | kernel 4.3 |
| nokmem | disable kernel memory account | <none> |
| Build Tag | Feature | Enabled by default | Dependency |
|-----------|------------------------------------|--------------------|------------|
| seccomp | Syscall filtering | yes | libseccomp |
| selinux | selinux process and mount labeling | yes | <none> |
| apparmor | apparmor profile support | yes | <none> |
| nokmem | disable kernel memory accounting | no | <none> |
### Running the test suite
@ -97,17 +101,30 @@ You can run a specific integration test by setting the `TESTPATH` variable.
# make test TESTPATH="/checkpoint.bats"
```
You can run a test in your proxy environment by setting `DOCKER_BUILD_PROXY` and `DOCKER_RUN_PROXY` variables.
You can run a specific rootless integration test by setting the `ROOTLESS_TESTPATH` variable.
```bash
# make test DOCKER_BUILD_PROXY="--build-arg HTTP_PROXY=http://yourproxy/" DOCKER_RUN_PROXY="-e HTTP_PROXY=http://yourproxy/"
# make test ROOTLESS_TESTPATH="/checkpoint.bats"
```
You can run a test using your container engine's flags by setting `CONTAINER_ENGINE_BUILD_FLAGS` and `CONTAINER_ENGINE_RUN_FLAGS` variables.
```bash
# make test CONTAINER_ENGINE_BUILD_FLAGS="--build-arg http_proxy=http://yourproxy/" CONTAINER_ENGINE_RUN_FLAGS="-e http_proxy=http://yourproxy/"
```
### Dependencies Management
`runc` uses [vndr](https://github.com/LK4D4/vndr) for dependencies management.
Please refer to [vndr](https://github.com/LK4D4/vndr) for how to add or update
new dependencies.
`runc` uses [Go Modules](https://github.com/golang/go/wiki/Modules) for dependencies management.
Please refer to [Go Modules](https://github.com/golang/go/wiki/Modules) for how to add or update
new dependencies. When updating dependencies, be sure that you are running Go `1.14` or newer.
```
# Update vendored dependencies
make vendor
# Verify all dependencies
make verify-dependencies
```
## Using runc
@ -275,6 +292,9 @@ PIDFile=/run/mycontainerid.pid
WantedBy=multi-user.target
```
#### cgroup v2
See [`./docs/cgroup-v2.md`](./docs/cgroup-v2.md).
## License
The code and docs are released under the [Apache 2.0 license](LICENSE).

26
vendor/github.com/opencontainers/runc/go.mod generated vendored Normal file
View file

@ -0,0 +1,26 @@
module github.com/opencontainers/runc
go 1.14
require (
github.com/checkpoint-restore/go-criu/v4 v4.0.2
github.com/cilium/ebpf v0.0.0-20200702112145-1c8d4c9ef775
github.com/containerd/console v1.0.0
github.com/coreos/go-systemd/v22 v22.0.0
github.com/cyphar/filepath-securejoin v0.2.2
github.com/docker/go-units v0.4.0
github.com/godbus/dbus/v5 v5.0.3
github.com/golang/protobuf v1.3.5
github.com/moby/sys/mountinfo v0.1.3
github.com/mrunalp/fileutils v0.0.0-20171103030105-7d4729fb3618
github.com/opencontainers/runtime-spec v1.0.3-0.20200520003142-237cc4f519e2
github.com/opencontainers/selinux v1.5.1
github.com/pkg/errors v0.9.1
github.com/seccomp/libseccomp-golang v0.9.1
github.com/sirupsen/logrus v1.6.0
github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2
// NOTE: urfave/cli must be <= v1.22.1 due to a regression: https://github.com/urfave/cli/issues/1092
github.com/urfave/cli v1.22.1
github.com/vishvananda/netlink v1.1.0
golang.org/x/sys v0.0.0-20200327173247-9dae0f8f5775
)

View file

@ -155,8 +155,7 @@ config := &configs.Config{
Parent: "system",
Resources: &configs.Resources{
MemorySwappiness: nil,
AllowAllDevices: nil,
AllowedDevices: configs.DefaultAllowedDevices,
Devices: specconv.AllowedDevices,
},
},
MaskPaths: []string{
@ -166,7 +165,7 @@ config := &configs.Config{
ReadonlyPaths: []string{
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
},
Devices: configs.DefaultAutoCreatedDevices,
Devices: specconv.AllowedDevices,
Hostname: "testing",
Mounts: []*configs.Mount{
{

View file

@ -3,8 +3,6 @@
package cgroups
import (
"fmt"
"github.com/opencontainers/runc/libcontainer/configs"
)
@ -27,48 +25,27 @@ type Manager interface {
// Destroys the cgroup set
Destroy() error
// The option func SystemdCgroups() and Cgroupfs() require following attributes:
// Paths map[string]string
// Cgroups *configs.Cgroup
// Paths maps cgroup subsystem to path at which it is mounted.
// Cgroups specifies specific cgroup settings for the various subsystems
// Returns cgroup paths to save in a state file and to be able to
// restore the object later.
GetPaths() map[string]string
// GetUnifiedPath returns the unified path when running in unified mode.
// The value corresponds to the all values of GetPaths() map.
//
// GetUnifiedPath returns error when running in hybrid mode as well as
// in legacy mode.
GetUnifiedPath() (string, error)
// Path returns a cgroup path to the specified controller/subsystem.
// For cgroupv2, the argument is unused and can be empty.
Path(string) string
// Sets the cgroup as configured.
Set(container *configs.Config) error
// Gets the cgroup as configured.
// GetPaths returns cgroup path(s) to save in a state file in order to restore later.
//
// For cgroup v1, a key is cgroup subsystem name, and the value is the path
// to the cgroup for this subsystem.
//
// For cgroup v2 unified hierarchy, a key is "", and the value is the unified path.
GetPaths() map[string]string
// GetCgroups returns the cgroup data as configured.
GetCgroups() (*configs.Cgroup, error)
}
type NotFoundError struct {
Subsystem string
}
// GetFreezerState retrieves the current FreezerState of the cgroup.
GetFreezerState() (configs.FreezerState, error)
func (e *NotFoundError) Error() string {
return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
}
func NewNotFoundError(sub string) error {
return &NotFoundError{
Subsystem: sub,
}
}
func IsNotFound(err error) bool {
if err == nil {
return false
}
_, ok := err.(*NotFoundError)
return ok
// Whether the cgroup path exists or not
Exists() bool
}

View file

@ -20,6 +20,12 @@ type CpuUsage struct {
// Total CPU time consumed per core.
// Units: nanoseconds.
PercpuUsage []uint64 `json:"percpu_usage,omitempty"`
// CPU time consumed per core in kernel mode
// Units: nanoseconds.
PercpuUsageInKernelmode []uint64 `json:"percpu_usage_in_kernelmode"`
// CPU time consumed per core in user mode
// Units: nanoseconds.
PercpuUsageInUsermode []uint64 `json:"percpu_usage_in_usermode"`
// Time spent by tasks of the cgroup in kernel mode.
// Units: nanoseconds.
UsageInKernelmode uint64 `json:"usage_in_kernelmode"`
@ -51,12 +57,33 @@ type MemoryStats struct {
KernelUsage MemoryData `json:"kernel_usage,omitempty"`
// usage of kernel TCP memory
KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"`
// usage of memory pages by NUMA node
// see chapter 5.6 of memory controller documentation
PageUsageByNUMA PageUsageByNUMA `json:"page_usage_by_numa,omitempty"`
// if true, memory usage is accounted for throughout a hierarchy of cgroups.
UseHierarchy bool `json:"use_hierarchy"`
Stats map[string]uint64 `json:"stats,omitempty"`
}
type PageUsageByNUMA struct {
// Embedding is used as types can't be recursive.
PageUsageByNUMAInner
Hierarchical PageUsageByNUMAInner `json:"hierarchical,omitempty"`
}
type PageUsageByNUMAInner struct {
Total PageStats `json:"total,omitempty"`
File PageStats `json:"file,omitempty"`
Anon PageStats `json:"anon,omitempty"`
Unevictable PageStats `json:"unevictable,omitempty"`
}
type PageStats struct {
Total uint64 `json:"total,omitempty"`
Nodes map[uint8]uint64 `json:"nodes,omitempty"`
}
type PidsStats struct {
// number of pids in the cgroup
Current uint64 `json:"current,omitempty"`

View file

@ -4,6 +4,7 @@ package cgroups
import (
"bufio"
"errors"
"fmt"
"io"
"io/ioutil"
@ -12,7 +13,6 @@ import (
"strconv"
"strings"
"sync"
"syscall"
"time"
units "github.com/docker/go-units"
@ -20,7 +20,6 @@ import (
)
const (
CgroupNamePrefix = "name="
CgroupProcesses = "cgroup.procs"
unifiedMountpoint = "/sys/fs/cgroup"
)
@ -40,8 +39,8 @@ var HugePageSizeUnitList = []string{"B", "KB", "MB", "GB", "TB", "PB"}
// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode.
func IsCgroup2UnifiedMode() bool {
isUnifiedOnce.Do(func() {
var st syscall.Statfs_t
if err := syscall.Statfs(unifiedMountpoint, &st); err != nil {
var st unix.Statfs_t
if err := unix.Statfs(unifiedMountpoint, &st); err != nil {
panic("cannot statfs cgroup root")
}
isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC
@ -49,191 +48,19 @@ func IsCgroup2UnifiedMode() bool {
return isUnified
}
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
if IsCgroup2UnifiedMode() {
return unifiedMountpoint, nil
}
mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
return mnt, err
}
func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
// We are not using mount.GetMounts() because it's super-inefficient,
// parsing it directly sped up x10 times because of not using Sscanf.
// It was one of two major performance drawbacks in container start.
if !isSubsystemAvailable(subsystem) {
return "", "", NewNotFoundError(subsystem)
}
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", "", err
}
defer f.Close()
if IsCgroup2UnifiedMode() {
subsystem = ""
}
return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem)
}
func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) {
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
txt := scanner.Text()
fields := strings.Fields(txt)
if len(fields) < 9 {
continue
}
if strings.HasPrefix(fields[4], cgroupPath) {
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if (subsystem == "" && fields[9] == "cgroup2") || opt == subsystem {
return fields[4], fields[3], nil
}
}
}
}
if err := scanner.Err(); err != nil {
return "", "", err
}
return "", "", NewNotFoundError(subsystem)
}
func isSubsystemAvailable(subsystem string) bool {
if IsCgroup2UnifiedMode() {
controllers, err := GetAllSubsystems()
if err != nil {
return false
}
for _, c := range controllers {
if c == subsystem {
return true
}
}
return false
}
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return false
}
_, avail := cgroups[subsystem]
return avail
}
func GetClosestMountpointAncestor(dir, mountinfo string) string {
deepestMountPoint := ""
for _, mountInfoEntry := range strings.Split(mountinfo, "\n") {
mountInfoParts := strings.Fields(mountInfoEntry)
if len(mountInfoParts) < 5 {
continue
}
mountPoint := mountInfoParts[4]
if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) {
deepestMountPoint = mountPoint
}
}
return deepestMountPoint
}
func FindCgroupMountpointDir() (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
text := scanner.Text()
fields := strings.Split(text, " ")
// Safe as mountinfo encodes mountpoints with spaces as \040.
index := strings.Index(text, " - ")
postSeparatorFields := strings.Fields(text[index+3:])
numPostFields := len(postSeparatorFields)
// This is an error as we can't detect if the mount is for "cgroup"
if numPostFields == 0 {
return "", fmt.Errorf("Found no fields post '-' in %q", text)
}
if postSeparatorFields[0] == "cgroup" || postSeparatorFields[0] == "cgroup2" {
// Check that the mount is properly formatted.
if numPostFields < 3 {
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
}
return filepath.Dir(fields[4]), nil
}
}
if err := scanner.Err(); err != nil {
return "", err
}
return "", NewNotFoundError("cgroup")
}
type Mount struct {
Mountpoint string
Root string
Subsystems []string
}
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
if len(m.Subsystems) == 0 {
return "", fmt.Errorf("no subsystem for mount")
}
return getControllerPath(m.Subsystems[0], cgroups)
}
func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
res := make([]Mount, 0, len(ss))
scanner := bufio.NewScanner(mi)
numFound := 0
for scanner.Scan() && numFound < len(ss) {
txt := scanner.Text()
sepIdx := strings.Index(txt, " - ")
if sepIdx == -1 {
return nil, fmt.Errorf("invalid mountinfo format")
}
if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
continue
}
fields := strings.Split(txt, " ")
m := Mount{
Mountpoint: fields[4],
Root: fields[3],
}
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
seen, known := ss[opt]
if !known || (!all && seen) {
continue
}
ss[opt] = true
if strings.HasPrefix(opt, CgroupNamePrefix) {
opt = opt[len(CgroupNamePrefix):]
}
m.Subsystems = append(m.Subsystems, opt)
numFound++
}
if len(m.Subsystems) > 0 || all {
res = append(res, m)
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
return res, nil
}
// GetCgroupMounts returns the mounts for the cgroup subsystems.
// all indicates whether to return just the first instance or all the mounts.
// This function should not be used from cgroupv2 code, as in this case
// all the controllers are available under the constant unifiedMountpoint.
func GetCgroupMounts(all bool) ([]Mount, error) {
if IsCgroup2UnifiedMode() {
// TODO: remove cgroupv2 case once all external users are converted
availableControllers, err := GetAllSubsystems()
if err != nil {
return nil, err
@ -246,22 +73,7 @@ func GetCgroupMounts(all bool) ([]Mount, error) {
return []Mount{m}, nil
}
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return nil, err
}
defer f.Close()
allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return nil, err
}
allMap := make(map[string]bool)
for s := range allSubsystems {
allMap[s] = false
}
return getCgroupMountsHelper(allMap, f, all)
return getCgroupMountsV1(all)
}
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
@ -305,61 +117,8 @@ func GetAllSubsystems() ([]string, error) {
return subsystems, nil
}
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
func GetOwnCgroup(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return "", err
}
return getControllerPath(subsystem, cgroups)
}
func GetOwnCgroupPath(subsystem string) (string, error) {
cgroup, err := GetOwnCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func GetInitCgroup(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
if err != nil {
return "", err
}
return getControllerPath(subsystem, cgroups)
}
func GetInitCgroupPath(subsystem string) (string, error) {
cgroup, err := GetInitCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see paths from host, which don't exist in container.
relCgroup, err := filepath.Rel(root, cgroup)
if err != nil {
return "", err
}
return filepath.Join(mnt, relCgroup), nil
}
func readProcsFile(dir string) ([]int, error) {
f, err := os.Open(filepath.Join(dir, CgroupProcesses))
func readProcsFile(file string) ([]int, error) {
f, err := os.Open(file)
if err != nil {
return nil, err
}
@ -379,11 +138,18 @@ func readProcsFile(dir string) ([]int, error) {
out = append(out, pid)
}
}
return out, nil
return out, s.Err()
}
// ParseCgroupFile parses the given cgroup file, typically from
// /proc/<pid>/cgroup, into a map of subgroups to cgroup names.
// ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup
// or /proc/<pid>/cgroup, into a map of subsystems to cgroup paths, e.g.
// "cpu": "/user.slice/user-1000.slice"
// "pids": "/user.slice/user-1000.slice"
// etc.
//
// Note that for cgroup v2 unified hierarchy, there are no per-controller
// cgroup paths, so the resulting map will have a single element where the key
// is empty string ("") and the value is the cgroup path the <pid> is in.
func ParseCgroupFile(path string) (map[string]string, error) {
f, err := os.Open(path)
if err != nil {
@ -423,22 +189,6 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
return cgroups, nil
}
func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
if IsCgroup2UnifiedMode() {
return "/", nil
}
if p, ok := cgroups[subsystem]; ok {
return p, nil
}
if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
return p, nil
}
return "", NewNotFoundError(subsystem)
}
func PathExists(path string) bool {
if _, err := os.Stat(path); err != nil {
return false
@ -514,8 +264,8 @@ func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
}
// GetPids returns all pids, that were added to cgroup at path.
func GetPids(path string) ([]int, error) {
return readProcsFile(path)
func GetPids(dir string) ([]int, error) {
return readProcsFile(filepath.Join(dir, CgroupProcesses))
}
// GetAllPids returns all pids, that were added to cgroup at path and to all its
@ -524,14 +274,13 @@ func GetAllPids(path string) ([]int, error) {
var pids []int
// collect pids from all sub-cgroups
err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
dir, file := filepath.Split(p)
if file != CgroupProcesses {
return nil
}
if iErr != nil {
return iErr
}
cPids, err := readProcsFile(dir)
if info.IsDir() || info.Name() != CgroupProcesses {
return nil
}
cPids, err := readProcsFile(p)
if err != nil {
return err
}
@ -568,7 +317,7 @@ func WriteCgroupProc(dir string, pid int) error {
// EINVAL might mean that the task being added to cgroup.procs is in state
// TASK_NEW. We should attempt to do so again.
if isEINVAL(err) {
if errors.Is(err, unix.EINVAL) {
time.Sleep(30 * time.Millisecond)
continue
}
@ -578,11 +327,53 @@ func WriteCgroupProc(dir string, pid int) error {
return err
}
func isEINVAL(err error) bool {
switch err := err.(type) {
case *os.PathError:
return err.Err == unix.EINVAL
default:
return false
// Since the OCI spec is designed for cgroup v1, in some cases
// there is need to convert from the cgroup v1 configuration to cgroup v2
// the formula for BlkIOWeight is y = (1 + (x - 10) * 9999 / 990)
// convert linearly from [10-1000] to [1-10000]
func ConvertBlkIOToCgroupV2Value(blkIoWeight uint16) uint64 {
if blkIoWeight == 0 {
return 0
}
return uint64(1 + (uint64(blkIoWeight)-10)*9999/990)
}
// Since the OCI spec is designed for cgroup v1, in some cases
// there is need to convert from the cgroup v1 configuration to cgroup v2
// the formula for cpuShares is y = (1 + ((x - 2) * 9999) / 262142)
// convert from [2-262144] to [1-10000]
// 262144 comes from Linux kernel definition "#define MAX_SHARES (1UL << 18)"
func ConvertCPUSharesToCgroupV2Value(cpuShares uint64) uint64 {
if cpuShares == 0 {
return 0
}
return (1 + ((cpuShares-2)*9999)/262142)
}
// ConvertMemorySwapToCgroupV2Value converts MemorySwap value from OCI spec
// for use by cgroup v2 drivers. A conversion is needed since Resources.MemorySwap
// is defined as memory+swap combined, while in cgroup v2 swap is a separate value.
func ConvertMemorySwapToCgroupV2Value(memorySwap, memory int64) (int64, error) {
// for compatibility with cgroup1 controller, set swap to unlimited in
// case the memory is set to unlimited, and swap is not explicitly set,
// treating the request as "set both memory and swap to unlimited".
if memory == -1 && memorySwap == 0 {
return -1, nil
}
if memorySwap == -1 || memorySwap == 0 {
// -1 is "max", 0 is "unset", so treat as is
return memorySwap, nil
}
// sanity checks
if memory == 0 || memory == -1 {
return 0, errors.New("unable to set swap limit without memory limit")
}
if memory < 0 {
return 0, fmt.Errorf("invalid memory value: %d", memory)
}
if memorySwap < memory {
return 0, errors.New("memory+swap limit should be >= memory limit")
}
return memorySwap - memory, nil
}

View file

@ -0,0 +1,250 @@
package cgroups
import (
"bufio"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
)
// Code in this source file are specific to cgroup v1,
// and must not be used from any cgroup v2 code.
const (
CgroupNamePrefix = "name="
)
var (
errUnified = errors.New("not implemented for cgroup v2 unified hierarchy")
)
type NotFoundError struct {
Subsystem string
}
func (e *NotFoundError) Error() string {
return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
}
func NewNotFoundError(sub string) error {
return &NotFoundError{
Subsystem: sub,
}
}
func IsNotFound(err error) bool {
if err == nil {
return false
}
_, ok := err.(*NotFoundError)
return ok
}
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
if IsCgroup2UnifiedMode() {
return "", errUnified
}
mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
return mnt, err
}
func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
if IsCgroup2UnifiedMode() {
return "", "", errUnified
}
// We are not using mount.GetMounts() because it's super-inefficient,
// parsing it directly sped up x10 times because of not using Sscanf.
// It was one of two major performance drawbacks in container start.
if !isSubsystemAvailable(subsystem) {
return "", "", NewNotFoundError(subsystem)
}
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", "", err
}
defer f.Close()
return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem)
}
func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) {
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
txt := scanner.Text()
fields := strings.Fields(txt)
if len(fields) < 9 {
continue
}
if strings.HasPrefix(fields[4], cgroupPath) {
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if opt == subsystem {
return fields[4], fields[3], nil
}
}
}
}
if err := scanner.Err(); err != nil {
return "", "", err
}
return "", "", NewNotFoundError(subsystem)
}
func isSubsystemAvailable(subsystem string) bool {
if IsCgroup2UnifiedMode() {
panic("don't call isSubsystemAvailable from cgroupv2 code")
}
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return false
}
_, avail := cgroups[subsystem]
return avail
}
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
if len(m.Subsystems) == 0 {
return "", fmt.Errorf("no subsystem for mount")
}
return getControllerPath(m.Subsystems[0], cgroups)
}
func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
res := make([]Mount, 0, len(ss))
scanner := bufio.NewScanner(mi)
numFound := 0
for scanner.Scan() && numFound < len(ss) {
txt := scanner.Text()
sepIdx := strings.Index(txt, " - ")
if sepIdx == -1 {
return nil, fmt.Errorf("invalid mountinfo format")
}
if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
continue
}
fields := strings.Split(txt, " ")
m := Mount{
Mountpoint: fields[4],
Root: fields[3],
}
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
seen, known := ss[opt]
if !known || (!all && seen) {
continue
}
ss[opt] = true
opt = strings.TrimPrefix(opt, CgroupNamePrefix)
m.Subsystems = append(m.Subsystems, opt)
numFound++
}
if len(m.Subsystems) > 0 || all {
res = append(res, m)
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
return res, nil
}
func getCgroupMountsV1(all bool) ([]Mount, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return nil, err
}
defer f.Close()
allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return nil, err
}
allMap := make(map[string]bool)
for s := range allSubsystems {
allMap[s] = false
}
return getCgroupMountsHelper(allMap, f, all)
}
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
func GetOwnCgroup(subsystem string) (string, error) {
if IsCgroup2UnifiedMode() {
return "", errUnified
}
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return "", err
}
return getControllerPath(subsystem, cgroups)
}
func GetOwnCgroupPath(subsystem string) (string, error) {
cgroup, err := GetOwnCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func GetInitCgroup(subsystem string) (string, error) {
if IsCgroup2UnifiedMode() {
return "", errUnified
}
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
if err != nil {
return "", err
}
return getControllerPath(subsystem, cgroups)
}
func GetInitCgroupPath(subsystem string) (string, error) {
cgroup, err := GetInitCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see paths from host, which don't exist in container.
relCgroup, err := filepath.Rel(root, cgroup)
if err != nil {
return "", err
}
return filepath.Join(mnt, relCgroup), nil
}
func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
if IsCgroup2UnifiedMode() {
return "", errUnified
}
if p, ok := cgroups[subsystem]; ok {
return p, nil
}
if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
return p, nil
}
return "", NewNotFoundError(subsystem)
}

View file

@ -1,5 +1,9 @@
package configs
import (
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
)
type FreezerState string
const (
@ -29,18 +33,16 @@ type Cgroup struct {
// Resources contains various cgroups settings to apply
*Resources
// SystemdProps are any additional properties for systemd,
// derived from org.systemd.property.xxx annotations.
// Ignored unless systemd is used for managing cgroups.
SystemdProps []systemdDbus.Property `json:"-"`
}
type Resources struct {
// If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
// Deprecated
AllowAllDevices *bool `json:"allow_all_devices,omitempty"`
// Deprecated
AllowedDevices []*Device `json:"allowed_devices,omitempty"`
// Deprecated
DeniedDevices []*Device `json:"denied_devices,omitempty"`
Devices []*Device `json:"devices"`
// Devices is the set of access rules for devices in the container.
Devices []*DeviceRule `json:"devices"`
// Memory limit (in bytes)
Memory int64 `json:"memory"`
@ -125,6 +127,10 @@ type Resources struct {
// CpuWeight sets a proportional bandwidth limit.
CpuWeight uint64 `json:"cpu_weight"`
// CpuMax sets she maximum bandwidth limit (format: max period).
CpuMax string `json:"cpu_max"`
// SkipDevices allows to skip configuring device permissions.
// Used by e.g. kubelet while creating a parent cgroup (kubepods)
// common for many containers.
//
// NOTE it is impossible to start a container which has this flag set.
SkipDevices bool `json:"skip_devices"`
}

View file

@ -8,7 +8,7 @@ import (
"time"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
@ -70,9 +70,10 @@ type Arg struct {
// Syscall is a rule to match a syscall in Seccomp
type Syscall struct {
Name string `json:"name"`
Action Action `json:"action"`
Args []*Arg `json:"args"`
Name string `json:"name"`
Action Action `json:"action"`
ErrnoRet *uint `json:"errnoRet"`
Args []*Arg `json:"args"`
}
// TODO Windows. Many of these fields should be factored out into those parts
@ -175,7 +176,7 @@ type Config struct {
// Hooks are a collection of actions to perform at various container lifecycle events.
// CommandHooks are serialized to JSON, but other hooks are not.
Hooks *Hooks
Hooks Hooks
// Version is the version of opencontainer specification that is supported.
Version string `json:"version"`
@ -202,17 +203,50 @@ type Config struct {
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
}
type Hooks struct {
type HookName string
type HookList []Hook
type Hooks map[HookName]HookList
const (
// Prestart commands are executed after the container namespaces are created,
// but before the user supplied command is executed from init.
Prestart []Hook
// Note: This hook is now deprecated
// Prestart commands are called in the Runtime namespace.
Prestart HookName = "prestart"
// CreateRuntime commands MUST be called as part of the create operation after
// the runtime environment has been created but before the pivot_root has been executed.
// CreateRuntime is called immediately after the deprecated Prestart hook.
// CreateRuntime commands are called in the Runtime Namespace.
CreateRuntime = "createRuntime"
// CreateContainer commands MUST be called as part of the create operation after
// the runtime environment has been created but before the pivot_root has been executed.
// CreateContainer commands are called in the Container namespace.
CreateContainer = "createContainer"
// StartContainer commands MUST be called as part of the start operation and before
// the container process is started.
// StartContainer commands are called in the Container namespace.
StartContainer = "startContainer"
// Poststart commands are executed after the container init process starts.
Poststart []Hook
// Poststart commands are called in the Runtime Namespace.
Poststart = "poststart"
// Poststop commands are executed after the container init process exits.
Poststop []Hook
}
// Poststop commands are called in the Runtime Namespace.
Poststop = "poststop"
)
// TODO move this to runtime-spec
// See: https://github.com/opencontainers/runtime-spec/pull/1046
const (
Creating = "creating"
Created = "created"
Running = "running"
Stopped = "stopped"
)
type Capabilities struct {
// Bounding is the set of capabilities checked by the kernel.
@ -227,32 +261,39 @@ type Capabilities struct {
Ambient []string
}
func (hooks *Hooks) UnmarshalJSON(b []byte) error {
var state struct {
Prestart []CommandHook
Poststart []CommandHook
Poststop []CommandHook
func (hooks HookList) RunHooks(state *specs.State) error {
for i, h := range hooks {
if err := h.Run(state); err != nil {
return errors.Wrapf(err, "Running hook #%d:", i)
}
}
return nil
}
func (hooks *Hooks) UnmarshalJSON(b []byte) error {
var state map[HookName][]CommandHook
if err := json.Unmarshal(b, &state); err != nil {
return err
}
deserialize := func(shooks []CommandHook) (hooks []Hook) {
for _, shook := range shooks {
hooks = append(hooks, shook)
*hooks = Hooks{}
for n, commandHooks := range state {
if len(commandHooks) == 0 {
continue
}
return hooks
(*hooks)[n] = HookList{}
for _, h := range commandHooks {
(*hooks)[n] = append((*hooks)[n], h)
}
}
hooks.Prestart = deserialize(state.Prestart)
hooks.Poststart = deserialize(state.Poststart)
hooks.Poststop = deserialize(state.Poststop)
return nil
}
func (hooks Hooks) MarshalJSON() ([]byte, error) {
func (hooks *Hooks) MarshalJSON() ([]byte, error) {
serialize := func(hooks []Hook) (serializableHooks []CommandHook) {
for _, hook := range hooks {
switch chook := hook.(type) {
@ -267,9 +308,12 @@ func (hooks Hooks) MarshalJSON() ([]byte, error) {
}
return json.Marshal(map[string]interface{}{
"prestart": serialize(hooks.Prestart),
"poststart": serialize(hooks.Poststart),
"poststop": serialize(hooks.Poststop),
"prestart": serialize((*hooks)[Prestart]),
"createRuntime": serialize((*hooks)[CreateRuntime]),
"createContainer": serialize((*hooks)[CreateContainer]),
"startContainer": serialize((*hooks)[StartContainer]),
"poststart": serialize((*hooks)[Poststart]),
"poststop": serialize((*hooks)[Poststop]),
})
}

View file

@ -3,30 +3,19 @@ package configs
import (
"fmt"
"os"
"strconv"
)
const (
Wildcard = -1
)
// TODO Windows: This can be factored out in the future
type Device struct {
// Device type, block, char, etc.
Type rune `json:"type"`
DeviceRule
// Path to the device.
Path string `json:"path"`
// Major is the device's major number.
Major int64 `json:"major"`
// Minor is the device's minor number.
Minor int64 `json:"minor"`
// Cgroup permissions format, rwm.
Permissions string `json:"permissions"`
// FileMode permission bits for the device.
FileMode os.FileMode `json:"file_mode"`
@ -35,23 +24,147 @@ type Device struct {
// Gid of the device.
Gid uint32 `json:"gid"`
}
// Write the file to the allowed list
// DevicePermissions is a cgroupv1-style string to represent device access. It
// has to be a string for backward compatibility reasons, hence why it has
// methods to do set operations.
type DevicePermissions string
const (
deviceRead uint = (1 << iota)
deviceWrite
deviceMknod
)
func (p DevicePermissions) toSet() uint {
var set uint
for _, perm := range p {
switch perm {
case 'r':
set |= deviceRead
case 'w':
set |= deviceWrite
case 'm':
set |= deviceMknod
}
}
return set
}
func fromSet(set uint) DevicePermissions {
var perm string
if set&deviceRead == deviceRead {
perm += "r"
}
if set&deviceWrite == deviceWrite {
perm += "w"
}
if set&deviceMknod == deviceMknod {
perm += "m"
}
return DevicePermissions(perm)
}
// Union returns the union of the two sets of DevicePermissions.
func (p DevicePermissions) Union(o DevicePermissions) DevicePermissions {
lhs := p.toSet()
rhs := o.toSet()
return fromSet(lhs | rhs)
}
// Difference returns the set difference of the two sets of DevicePermissions.
// In set notation, A.Difference(B) gives you A\B.
func (p DevicePermissions) Difference(o DevicePermissions) DevicePermissions {
lhs := p.toSet()
rhs := o.toSet()
return fromSet(lhs &^ rhs)
}
// Intersection computes the intersection of the two sets of DevicePermissions.
func (p DevicePermissions) Intersection(o DevicePermissions) DevicePermissions {
lhs := p.toSet()
rhs := o.toSet()
return fromSet(lhs & rhs)
}
// IsEmpty returns whether the set of permissions in a DevicePermissions is
// empty.
func (p DevicePermissions) IsEmpty() bool {
return p == DevicePermissions("")
}
// IsValid returns whether the set of permissions is a subset of valid
// permissions (namely, {r,w,m}).
func (p DevicePermissions) IsValid() bool {
return p == fromSet(p.toSet())
}
type DeviceType rune
const (
WildcardDevice DeviceType = 'a'
BlockDevice DeviceType = 'b'
CharDevice DeviceType = 'c' // or 'u'
FifoDevice DeviceType = 'p'
)
func (t DeviceType) IsValid() bool {
switch t {
case WildcardDevice, BlockDevice, CharDevice, FifoDevice:
return true
default:
return false
}
}
func (t DeviceType) CanMknod() bool {
switch t {
case BlockDevice, CharDevice, FifoDevice:
return true
default:
return false
}
}
func (t DeviceType) CanCgroup() bool {
switch t {
case WildcardDevice, BlockDevice, CharDevice:
return true
default:
return false
}
}
type DeviceRule struct {
// Type of device ('c' for char, 'b' for block). If set to 'a', this rule
// acts as a wildcard and all fields other than Allow are ignored.
Type DeviceType `json:"type"`
// Major is the device's major number.
Major int64 `json:"major"`
// Minor is the device's minor number.
Minor int64 `json:"minor"`
// Permissions is the set of permissions that this rule applies to (in the
// cgroupv1 format -- any combination of "rwm").
Permissions DevicePermissions `json:"permissions"`
// Allow specifies whether this rule is allowed.
Allow bool `json:"allow"`
}
func (d *Device) CgroupString() string {
return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions)
}
func (d *Device) Mkdev() int {
return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12))
}
// deviceNumberString converts the device number to a string return result.
func deviceNumberString(number int64) string {
if number == Wildcard {
return "*"
func (d *DeviceRule) CgroupString() string {
var (
major = strconv.FormatInt(d.Major, 10)
minor = strconv.FormatInt(d.Minor, 10)
)
if d.Major == Wildcard {
major = "*"
}
return fmt.Sprint(number)
if d.Minor == Wildcard {
minor = "*"
}
return fmt.Sprintf("%c %s:%s %s", d.Type, major, minor, d.Permissions)
}

View file

@ -1,111 +0,0 @@
// +build linux
package configs
var (
// DefaultSimpleDevices are devices that are to be both allowed and created.
DefaultSimpleDevices = []*Device{
// /dev/null and zero
{
Path: "/dev/null",
Type: 'c',
Major: 1,
Minor: 3,
Permissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/zero",
Type: 'c',
Major: 1,
Minor: 5,
Permissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/full",
Type: 'c',
Major: 1,
Minor: 7,
Permissions: "rwm",
FileMode: 0666,
},
// consoles and ttys
{
Path: "/dev/tty",
Type: 'c',
Major: 5,
Minor: 0,
Permissions: "rwm",
FileMode: 0666,
},
// /dev/urandom,/dev/random
{
Path: "/dev/urandom",
Type: 'c',
Major: 1,
Minor: 9,
Permissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/random",
Type: 'c',
Major: 1,
Minor: 8,
Permissions: "rwm",
FileMode: 0666,
},
}
DefaultAllowedDevices = append([]*Device{
// allow mknod for any device
{
Type: 'c',
Major: Wildcard,
Minor: Wildcard,
Permissions: "m",
},
{
Type: 'b',
Major: Wildcard,
Minor: Wildcard,
Permissions: "m",
},
{
Path: "/dev/console",
Type: 'c',
Major: 5,
Minor: 1,
Permissions: "rwm",
},
// /dev/pts/ - pts namespaces are "coming soon"
{
Path: "",
Type: 'c',
Major: 136,
Minor: Wildcard,
Permissions: "rwm",
},
{
Path: "",
Type: 'c',
Major: 5,
Minor: 2,
Permissions: "rwm",
},
// tuntap
{
Path: "",
Type: 'c',
Major: 10,
Minor: 200,
Permissions: "rwm",
},
}, DefaultSimpleDevices...)
DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...)
)

View file

@ -0,0 +1,16 @@
// +build !windows
package configs
import (
"errors"
"golang.org/x/sys/unix"
)
func (d *DeviceRule) Mkdev() (uint64, error) {
if d.Major == Wildcard || d.Minor == Wildcard {
return 0, errors.New("cannot mkdev() device with wildcards")
}
return unix.Mkdev(uint32(d.Major), uint32(d.Minor)), nil
}

View file

@ -0,0 +1,5 @@
package configs
func (d *DeviceRule) Mkdev() (uint64, error) {
return 0, nil
}

View file

@ -31,33 +31,33 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) {
}
var (
devType configs.DeviceType
mode = stat.Mode
devNumber = uint64(stat.Rdev)
major = unix.Major(devNumber)
minor = unix.Minor(devNumber)
)
if major == 0 {
switch mode & unix.S_IFMT {
case unix.S_IFBLK:
devType = configs.BlockDevice
case unix.S_IFCHR:
devType = configs.CharDevice
case unix.S_IFIFO:
devType = configs.FifoDevice
default:
return nil, ErrNotADevice
}
var (
devType rune
mode = stat.Mode
)
switch {
case mode&unix.S_IFBLK == unix.S_IFBLK:
devType = 'b'
case mode&unix.S_IFCHR == unix.S_IFCHR:
devType = 'c'
}
return &configs.Device{
Type: devType,
Path: path,
Major: int64(major),
Minor: int64(minor),
Permissions: permissions,
FileMode: os.FileMode(mode),
Uid: stat.Uid,
Gid: stat.Gid,
DeviceRule: configs.DeviceRule{
Type: devType,
Major: int64(major),
Minor: int64(minor),
Permissions: configs.DevicePermissions(permissions),
},
Path: path,
FileMode: os.FileMode(mode),
Uid: stat.Uid,
Gid: stat.Gid,
}, nil
}
@ -104,6 +104,9 @@ func GetDevices(path string) ([]*configs.Device, error) {
}
return nil, err
}
if device.Type == configs.FifoDevice {
continue
}
out = append(out, device)
}
return out, nil

View file

@ -1,7 +1,14 @@
// SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
/*
* Copyright (C) 2019 Aleksa Sarai <cyphar@cyphar.com>
* Copyright (C) 2019 SUSE LLC
*
* This work is dual licensed under the following licenses. You may use,
* redistribute, and/or modify the work under the conditions of either (or
* both) licenses.
*
* === Apache-2.0 ===
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@ -13,6 +20,23 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* === LGPL-2.1-or-later ===
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see
* <https://www.gnu.org/licenses/>.
*
*/
#define _GNU_SOURCE
@ -95,8 +119,10 @@ static int is_self_cloned(void)
struct statfs fsbuf = {};
fd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC);
if (fd < 0)
if (fd < 0) {
fprintf(stderr, "you have no read access to runc binary file\n");
return -ENOTRECOVERABLE;
}
/*
* Is the binary a fully-sealed memfd? We don't need CLONED_BINARY_ENV for

View file

@ -714,12 +714,12 @@ void nsexec(void)
* ready, so we can receive all possible error codes
* generated by children.
*/
syncfd = sync_child_pipe[1];
close(sync_child_pipe[0]);
while (!ready) {
enum sync_t s;
syncfd = sync_child_pipe[1];
close(sync_child_pipe[0]);
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
bail("failed to sync with child: next state");
@ -789,13 +789,13 @@ void nsexec(void)
/* Now sync with grandchild. */
syncfd = sync_grandchild_pipe[1];
close(sync_grandchild_pipe[0]);
ready = false;
while (!ready) {
enum sync_t s;
syncfd = sync_grandchild_pipe[1];
close(sync_grandchild_pipe[0]);
s = SYNC_GRANDCHILD;
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
kill(child, SIGKILL);

View file

@ -60,7 +60,7 @@ type Group struct {
// groupFromOS converts an os/user.(*Group) to local Group
//
// (This does not include Pass, Shell or Gecos)
// (This does not include Pass or List)
func groupFromOS(g *user.Group) (Group, error) {
newGroup := Group{
Name: g.Name,
@ -162,10 +162,6 @@ func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
line := strings.TrimSpace(s.Text())
if line == "" {
continue
@ -183,6 +179,9 @@ func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
out = append(out, p)
}
}
if err := s.Err(); err != nil {
return nil, err
}
return out, nil
}
@ -221,10 +220,6 @@ func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
text := s.Text()
if text == "" {
continue
@ -242,6 +237,9 @@ func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
out = append(out, p)
}
}
if err := s.Err(); err != nil {
return nil, err
}
return out, nil
}
@ -532,10 +530,6 @@ func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) {
)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
line := strings.TrimSpace(s.Text())
if line == "" {
continue
@ -549,6 +543,9 @@ func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) {
out = append(out, p)
}
}
if err := s.Err(); err != nil {
return nil, err
}
return out, nil
}
@ -586,10 +583,6 @@ func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) {
)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
line := strings.TrimSpace(s.Text())
if line == "" {
continue
@ -603,6 +596,9 @@ func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) {
out = append(out, p)
}
}
if err := s.Err(); err != nil {
return nil, err
}
return out, nil
}

View file

@ -1,31 +0,0 @@
# OCI runtime-spec. When updating this, make sure you use a version tag rather
# than a commit ID so it's much more obvious what version of the spec we are
# using.
github.com/opencontainers/runtime-spec 29686dbc5559d93fb1ef402eeda3e35c38d75af4 # v1.0.1-59-g29686db
# Core libcontainer functionality.
github.com/checkpoint-restore/go-criu 17b0214f6c48980c45dc47ecb0cfd6d9e02df723 # v3.11
github.com/mrunalp/fileutils 7d4729fb36185a7c1719923406c9d40e54fb93c7
github.com/opencontainers/selinux 5215b1806f52b1fcc2070a8826c542c9d33cd3cf # v1.3.0 (+ CVE-2019-16884)
github.com/seccomp/libseccomp-golang 689e3c1541a84461afc49c1c87352a6cedf72e9c # v0.9.1
github.com/sirupsen/logrus 8bdbc7bcc01dcbb8ec23dc8a28e332258d25251f # v1.4.1
github.com/syndtr/gocapability d98352740cb2c55f81556b63d4a1ec64c5a319c2
github.com/vishvananda/netlink 1e2e08e8a2dcdacaae3f14ac44c5cfa31361f270
# systemd integration.
github.com/coreos/go-systemd 95778dfbb74eb7e4dbaf43bf7d71809650ef8076 # v19
github.com/godbus/dbus 2ff6f7ffd60f0f2410b3105864bdd12c7894f844 # v5.0.1
github.com/golang/protobuf 925541529c1fa6821df4e44ce2723319eb2be768 # v1.0.0
# Command-line interface.
github.com/cyphar/filepath-securejoin a261ee33d7a517f054effbf451841abaafe3e0fd # v0.2.2
github.com/docker/go-units 47565b4f722fb6ceae66b95f853feed578a4a51c # v0.3.3
github.com/urfave/cli cfb38830724cc34fedffe9a2a29fb54fa9169cd1 # v1.20.0
golang.org/x/sys 9eafafc0a87e0fd0aeeba439a4573537970c44c7 https://github.com/golang/sys
# console dependencies
github.com/containerd/console 0650fd9eeb50bab4fc99dceb9f2e14cf58f36e7f
github.com/pkg/errors ba968bfe8b2f7e042a574c888954fccecfa385b4 # v0.8.1
# ebpf dependencies
github.com/cilium/ebpf 95b36a581eed7b0f127306ed1d16cc0ddc06cf67

View file

@ -667,9 +667,10 @@ type LinuxSeccompArg struct {
// LinuxSyscall is used to match a syscall in Seccomp
type LinuxSyscall struct {
Names []string `json:"names"`
Action LinuxSeccompAction `json:"action"`
Args []LinuxSeccompArg `json:"args,omitempty"`
Names []string `json:"names"`
Action LinuxSeccompAction `json:"action"`
ErrnoRet *uint `json:"errnoRet,omitempty"`
Args []LinuxSeccompArg `json:"args,omitempty"`
}
// LinuxIntelRdt has container runtime resource constraints for Intel RDT

View file

@ -11,7 +11,7 @@ const (
VersionPatch = 2
// VersionDev indicates development branch. Releases will be empty string.
VersionDev = ""
VersionDev = "-dev"
)
// Version is the specification version that the package types support.

View file

@ -671,6 +671,7 @@ const (
FS_IOC_ADD_ENCRYPTION_KEY = 0xc0506617
FS_IOC_GET_ENCRYPTION_KEY_STATUS = 0xc080661a
FS_IOC_GET_ENCRYPTION_POLICY_EX = 0xc0096616
FS_IOC_MEASURE_VERITY = 0xc0046686
FS_IOC_REMOVE_ENCRYPTION_KEY = 0xc0406618
FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS = 0xc0406619
FS_KEY_DESCRIPTOR_SIZE = 0x8
@ -683,6 +684,9 @@ const (
FS_POLICY_FLAGS_PAD_8 = 0x1
FS_POLICY_FLAGS_PAD_MASK = 0x3
FS_POLICY_FLAGS_VALID = 0xf
FS_VERITY_FL = 0x100000
FS_VERITY_HASH_ALG_SHA256 = 0x1
FS_VERITY_HASH_ALG_SHA512 = 0x2
FUTEXFS_SUPER_MAGIC = 0xbad1dea
F_ADD_SEALS = 0x409
F_DUPFD = 0x0

View file

@ -73,6 +73,8 @@ const (
FFDLY = 0x8000
FLUSHO = 0x1000
FP_XSTATE_MAGIC2 = 0x46505845
FS_IOC_ENABLE_VERITY = 0x40806685
FS_IOC_GETFLAGS = 0x80046601
FS_IOC_GET_ENCRYPTION_POLICY = 0x400c6615
FS_IOC_GET_ENCRYPTION_PWSALT = 0x40106614
FS_IOC_SET_ENCRYPTION_POLICY = 0x800c6613

View file

@ -73,6 +73,8 @@ const (
FFDLY = 0x8000
FLUSHO = 0x1000
FP_XSTATE_MAGIC2 = 0x46505845
FS_IOC_ENABLE_VERITY = 0x40806685
FS_IOC_GETFLAGS = 0x80086601
FS_IOC_GET_ENCRYPTION_POLICY = 0x400c6615
FS_IOC_GET_ENCRYPTION_PWSALT = 0x40106614
FS_IOC_SET_ENCRYPTION_POLICY = 0x800c6613

View file

@ -72,6 +72,8 @@ const (
FF1 = 0x8000
FFDLY = 0x8000
FLUSHO = 0x1000
FS_IOC_ENABLE_VERITY = 0x40806685
FS_IOC_GETFLAGS = 0x80046601
FS_IOC_GET_ENCRYPTION_POLICY = 0x400c6615
FS_IOC_GET_ENCRYPTION_PWSALT = 0x40106614
FS_IOC_SET_ENCRYPTION_POLICY = 0x800c6613

View file

@ -75,6 +75,8 @@ const (
FFDLY = 0x8000
FLUSHO = 0x1000
FPSIMD_MAGIC = 0x46508001
FS_IOC_ENABLE_VERITY = 0x40806685
FS_IOC_GETFLAGS = 0x80086601
FS_IOC_GET_ENCRYPTION_POLICY = 0x400c6615
FS_IOC_GET_ENCRYPTION_PWSALT = 0x40106614
FS_IOC_SET_ENCRYPTION_POLICY = 0x800c6613

View file

@ -72,6 +72,8 @@ const (
FF1 = 0x8000
FFDLY = 0x8000
FLUSHO = 0x2000
FS_IOC_ENABLE_VERITY = 0x80806685
FS_IOC_GETFLAGS = 0x40046601
FS_IOC_GET_ENCRYPTION_POLICY = 0x800c6615
FS_IOC_GET_ENCRYPTION_PWSALT = 0x80106614
FS_IOC_SET_ENCRYPTION_POLICY = 0x400c6613

View file

@ -72,6 +72,8 @@ const (
FF1 = 0x8000
FFDLY = 0x8000
FLUSHO = 0x2000
FS_IOC_ENABLE_VERITY = 0x80806685
FS_IOC_GETFLAGS = 0x40086601
FS_IOC_GET_ENCRYPTION_POLICY = 0x800c6615
FS_IOC_GET_ENCRYPTION_PWSALT = 0x80106614
FS_IOC_SET_ENCRYPTION_POLICY = 0x400c6613

View file

@ -72,6 +72,8 @@ const (
FF1 = 0x8000
FFDLY = 0x8000
FLUSHO = 0x2000
FS_IOC_ENABLE_VERITY = 0x80806685
FS_IOC_GETFLAGS = 0x40086601
FS_IOC_GET_ENCRYPTION_POLICY = 0x800c6615
FS_IOC_GET_ENCRYPTION_PWSALT = 0x80106614
FS_IOC_SET_ENCRYPTION_POLICY = 0x400c6613

View file

@ -72,6 +72,8 @@ const (
FF1 = 0x8000
FFDLY = 0x8000
FLUSHO = 0x2000
FS_IOC_ENABLE_VERITY = 0x80806685
FS_IOC_GETFLAGS = 0x40046601
FS_IOC_GET_ENCRYPTION_POLICY = 0x800c6615
FS_IOC_GET_ENCRYPTION_PWSALT = 0x80106614
FS_IOC_SET_ENCRYPTION_POLICY = 0x400c6613

View file

@ -72,6 +72,8 @@ const (
FF1 = 0x4000
FFDLY = 0x4000
FLUSHO = 0x800000
FS_IOC_ENABLE_VERITY = 0x80806685
FS_IOC_GETFLAGS = 0x40086601
FS_IOC_GET_ENCRYPTION_POLICY = 0x800c6615
FS_IOC_GET_ENCRYPTION_PWSALT = 0x80106614
FS_IOC_SET_ENCRYPTION_POLICY = 0x400c6613

View file

@ -72,6 +72,8 @@ const (
FF1 = 0x4000
FFDLY = 0x4000
FLUSHO = 0x800000
FS_IOC_ENABLE_VERITY = 0x80806685
FS_IOC_GETFLAGS = 0x40086601
FS_IOC_GET_ENCRYPTION_POLICY = 0x800c6615
FS_IOC_GET_ENCRYPTION_PWSALT = 0x80106614
FS_IOC_SET_ENCRYPTION_POLICY = 0x400c6613

View file

@ -72,6 +72,8 @@ const (
FF1 = 0x8000
FFDLY = 0x8000
FLUSHO = 0x1000
FS_IOC_ENABLE_VERITY = 0x40806685
FS_IOC_GETFLAGS = 0x80086601
FS_IOC_GET_ENCRYPTION_POLICY = 0x400c6615
FS_IOC_GET_ENCRYPTION_PWSALT = 0x40106614
FS_IOC_SET_ENCRYPTION_POLICY = 0x800c6613

View file

@ -72,6 +72,8 @@ const (
FF1 = 0x8000
FFDLY = 0x8000
FLUSHO = 0x1000
FS_IOC_ENABLE_VERITY = 0x40806685
FS_IOC_GETFLAGS = 0x80086601
FS_IOC_GET_ENCRYPTION_POLICY = 0x400c6615
FS_IOC_GET_ENCRYPTION_PWSALT = 0x40106614
FS_IOC_SET_ENCRYPTION_POLICY = 0x800c6613

View file

@ -76,6 +76,8 @@ const (
FF1 = 0x8000
FFDLY = 0x8000
FLUSHO = 0x1000
FS_IOC_ENABLE_VERITY = 0x80806685
FS_IOC_GETFLAGS = 0x40086601
FS_IOC_GET_ENCRYPTION_POLICY = 0x800c6615
FS_IOC_GET_ENCRYPTION_PWSALT = 0x80106614
FS_IOC_SET_ENCRYPTION_POLICY = 0x400c6613

View file

@ -2291,3 +2291,20 @@ const (
DEVLINK_DPIPE_HEADER_IPV4 = 0x1
DEVLINK_DPIPE_HEADER_IPV6 = 0x2
)
type FsverityDigest struct {
Algorithm uint16
Size uint16
}
type FsverityEnableArg struct {
Version uint32
Hash_algorithm uint32
Block_size uint32
Salt_size uint32
Salt_ptr uint64
Sig_size uint32
_ uint32
Sig_ptr uint64
_ [11]uint64
}