update runc library to v1.0.0-rc10 (CVE-2019-19921)

Notable changes:
* Fix CVE-2019-19921 (Volume mount race condition with shared mounts): https://github.com/opencontainers/runc/pull/2207
* Fix exec FIFO race: https://github.com/opencontainers/runc/pull/2185
* Basic support for cgroup v2.  Almost feature-complete, but still missing support for systemd mode in rootless.
  See also https://github.com/opencontainers/runc/issues/2209 for the known issues.

Full changes: https://github.com/opencontainers/runc/compare/v1.0.0-rc9...v1.0.0-rc10

Also updates go-selinux: 3a1f366feb...5215b1806f
(See https://github.com/containerd/cri/pull/1383#issuecomment-578227009)

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
(cherry picked from commit 6d68080907)
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
This commit is contained in:
Akihiro Suda 2020-01-25 04:02:21 +09:00 committed by Sebastiaan van Stijn
parent 3bd1759f80
commit d3dab1f618
No known key found for this signature in database
GPG key ID: 76698F39D527CE8C
13 changed files with 392 additions and 19 deletions

View file

@ -80,7 +80,7 @@ google.golang.org/grpc 6eaf6f47437a6b4e2153a190160e
# the containerd project first, and update both after that is merged.
# This commit does not need to match RUNC_COMMIT as it is used for helper
# packages but should be newer or equal.
github.com/opencontainers/runc 3e425f80a8c931f88e6d94a8c831b9d5aa481657 # v1.0.0-rc8-92-g84373aaa
github.com/opencontainers/runc dc9208a3303feef5b3839f4323d9beb36df0a9dd # v1.0.0-rc10
github.com/opencontainers/runtime-spec 29686dbc5559d93fb1ef402eeda3e35c38d75af4 # v1.0.1-59-g29686db
github.com/opencontainers/image-spec d60099175f88c47cd379c4738d158884749ed235 # v1.0.1
github.com/seccomp/libseccomp-golang 32f571b70023028bd57d9288c20efbcb237f3ce0
@ -161,6 +161,6 @@ github.com/morikuni/aec 39771216ff4c63d11f5e604076f9
# metrics
github.com/docker/go-metrics d466d4f6fd960e01820085bd7e1a24426ee7ef18
github.com/opencontainers/selinux 3a1f366feb7aecbf7a0e71ac4cea88b31597de9e # v1.2.2
github.com/opencontainers/selinux 5215b1806f52b1fcc2070a8826c542c9d33cd3cf
# DO NOT EDIT BELOW THIS LINE -------- reserved for downstream projects --------

View file

@ -16,9 +16,13 @@ This means that `runc` 1.0.0 should implement the 1.0 version of the specificati
You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page.
Currently, the following features are not considered to be production-ready:
* Support for cgroup v2
## Security
Reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/)
The reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/).
## Building
@ -229,7 +233,14 @@ runc delete mycontainerid
This allows higher level systems to augment the containers creation logic with setup of various settings after the container is created and/or before it is deleted. For example, the container's network stack is commonly set up after `create` but before `start`.
#### Rootless containers
`runc` has the ability to run containers without root privileges. This is called `rootless`. You need to pass some parameters to `runc` in order to run rootless containers. See below and compare with the previous version. Run the following commands as an ordinary user:
`runc` has the ability to run containers without root privileges. This is called `rootless`. You need to pass some parameters to `runc` in order to run rootless containers. See below and compare with the previous version.
**Note:** In order to use this feature, "User Namespaces" must be compiled and enabled in your kernel. There are various ways to do this depending on your distribution:
- Confirm `CONFIG_USER_NS=y` is set in your kernel configuration (normally found in `/proc/config.gz`)
- Arch/Debian: `echo 1 > /proc/sys/kernel/unprivileged_userns_clone`
- RHEL/CentOS 7: `echo 28633 > /proc/sys/user/max_user_namespaces`
Run the following commands as an ordinary user:
```bash
# Same as the first example
mkdir ~/mycontainer

View file

@ -6,6 +6,8 @@ import (
"fmt"
"io/ioutil"
"os"
"github.com/opencontainers/runc/libcontainer/utils"
)
// IsEnabled returns true if apparmor is enabled for the host.
@ -19,7 +21,7 @@ func IsEnabled() bool {
return false
}
func setprocattr(attr, value string) error {
func setProcAttr(attr, value string) error {
// Under AppArmor you can only change your own attr, so use /proc/self/
// instead of /proc/<tid>/ like libapparmor does
path := fmt.Sprintf("/proc/self/attr/%s", attr)
@ -30,6 +32,10 @@ func setprocattr(attr, value string) error {
}
defer f.Close()
if err := utils.EnsureProcHandle(f); err != nil {
return err
}
_, err = fmt.Fprintf(f, "%s", value)
return err
}
@ -37,7 +43,7 @@ func setprocattr(attr, value string) error {
// changeOnExec reimplements aa_change_onexec from libapparmor in Go
func changeOnExec(name string) error {
value := "exec " + name
if err := setprocattr("exec", value); err != nil {
if err := setProcAttr("exec", value); err != nil {
return fmt.Errorf("apparmor failed to apply profile: %s", err)
}
return nil

View file

@ -37,8 +37,18 @@ type Manager interface {
// restore the object later.
GetPaths() map[string]string
// GetUnifiedPath returns the unified path when running in unified mode.
// The value corresponds to the all values of GetPaths() map.
//
// GetUnifiedPath returns error when running in hybrid mode as well as
// in legacy mode.
GetUnifiedPath() (string, error)
// Sets the cgroup as configured.
Set(container *configs.Config) error
// Gets the cgroup as configured.
GetCgroups() (*configs.Cgroup, error)
}
type NotFoundError struct {

View file

@ -20,8 +20,9 @@ import (
)
const (
CgroupNamePrefix = "name="
CgroupProcesses = "cgroup.procs"
CgroupNamePrefix = "name="
CgroupProcesses = "cgroup.procs"
unifiedMountpoint = "/sys/fs/cgroup"
)
var (
@ -40,7 +41,7 @@ var HugePageSizeUnitList = []string{"B", "KB", "MB", "GB", "TB", "PB"}
func IsCgroup2UnifiedMode() bool {
isUnifiedOnce.Do(func() {
var st syscall.Statfs_t
if err := syscall.Statfs("/sys/fs/cgroup", &st); err != nil {
if err := syscall.Statfs(unifiedMountpoint, &st); err != nil {
panic("cannot statfs cgroup root")
}
isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC
@ -50,6 +51,9 @@ func IsCgroup2UnifiedMode() bool {
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
if IsCgroup2UnifiedMode() {
return unifiedMountpoint, nil
}
mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
return mnt, err
}
@ -235,8 +239,8 @@ func GetCgroupMounts(all bool) ([]Mount, error) {
return nil, err
}
m := Mount{
Mountpoint: "/sys/fs/cgroup",
Root: "/sys/fs/cgroup",
Mountpoint: unifiedMountpoint,
Root: unifiedMountpoint,
Subsystems: availableControllers,
}
return []Mount{m}, nil
@ -262,6 +266,21 @@ func GetCgroupMounts(all bool) ([]Mount, error) {
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
func GetAllSubsystems() ([]string, error) {
// /proc/cgroups is meaningless for v2
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#deprecated-v1-core-features
if IsCgroup2UnifiedMode() {
// "pseudo" controllers do not appear in /sys/fs/cgroup/cgroup.controllers.
// - devices: implemented in kernel 4.15
// - freezer: implemented in kernel 5.2
// We assume these are always available, as it is hard to detect availability.
pseudo := []string{"devices", "freezer"}
data, err := ioutil.ReadFile("/sys/fs/cgroup/cgroup.controllers")
if err != nil {
return nil, err
}
subsystems := append(pseudo, strings.Fields(string(data))...)
return subsystems, nil
}
f, err := os.Open("/proc/cgroups")
if err != nil {
return nil, err

View file

@ -44,6 +44,7 @@ const (
Trap
Allow
Trace
Log
)
// Operator is a comparison operator to be used when matching syscall arguments in Seccomp

View file

@ -0,0 +1,93 @@
// +build linux
package utils
/*
* Copyright 2016, 2017 SUSE LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import (
"fmt"
"os"
"golang.org/x/sys/unix"
)
// MaxSendfdLen is the maximum length of the name of a file descriptor being
// sent using SendFd. The name of the file handle returned by RecvFd will never
// be larger than this value.
const MaxNameLen = 4096
// oobSpace is the size of the oob slice required to store a single FD. Note
// that unix.UnixRights appears to make the assumption that fd is always int32,
// so sizeof(fd) = 4.
var oobSpace = unix.CmsgSpace(4)
// RecvFd waits for a file descriptor to be sent over the given AF_UNIX
// socket. The file name of the remote file descriptor will be recreated
// locally (it is sent as non-auxiliary data in the same payload).
func RecvFd(socket *os.File) (*os.File, error) {
// For some reason, unix.Recvmsg uses the length rather than the capacity
// when passing the msg_controllen and other attributes to recvmsg. So we
// have to actually set the length.
name := make([]byte, MaxNameLen)
oob := make([]byte, oobSpace)
sockfd := socket.Fd()
n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0)
if err != nil {
return nil, err
}
if n >= MaxNameLen || oobn != oobSpace {
return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
}
// Truncate.
name = name[:n]
oob = oob[:oobn]
scms, err := unix.ParseSocketControlMessage(oob)
if err != nil {
return nil, err
}
if len(scms) != 1 {
return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
}
scm := scms[0]
fds, err := unix.ParseUnixRights(&scm)
if err != nil {
return nil, err
}
if len(fds) != 1 {
return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
}
fd := uintptr(fds[0])
return os.NewFile(fd, string(name)), nil
}
// SendFd sends a file descriptor over the given AF_UNIX socket. In
// addition, the file.Name() of the given file will also be sent as
// non-auxiliary data in the same payload (allowing to send contextual
// information for a file descriptor).
func SendFd(socket *os.File, name string, fd uintptr) error {
if len(name) >= MaxNameLen {
return fmt.Errorf("sendfd: filename too long: %s", name)
}
oob := unix.UnixRights(int(fd))
return unix.Sendmsg(int(socket.Fd()), []byte(name), oob, nil, 0)
}

View file

@ -0,0 +1,112 @@
package utils
import (
"encoding/json"
"io"
"os"
"path/filepath"
"strings"
"unsafe"
"golang.org/x/sys/unix"
)
const (
exitSignalOffset = 128
)
// ResolveRootfs ensures that the current working directory is
// not a symlink and returns the absolute path to the rootfs
func ResolveRootfs(uncleanRootfs string) (string, error) {
rootfs, err := filepath.Abs(uncleanRootfs)
if err != nil {
return "", err
}
return filepath.EvalSymlinks(rootfs)
}
// ExitStatus returns the correct exit status for a process based on if it
// was signaled or exited cleanly
func ExitStatus(status unix.WaitStatus) int {
if status.Signaled() {
return exitSignalOffset + int(status.Signal())
}
return status.ExitStatus()
}
// WriteJSON writes the provided struct v to w using standard json marshaling
func WriteJSON(w io.Writer, v interface{}) error {
data, err := json.Marshal(v)
if err != nil {
return err
}
_, err = w.Write(data)
return err
}
// CleanPath makes a path safe for use with filepath.Join. This is done by not
// only cleaning the path, but also (if the path is relative) adding a leading
// '/' and cleaning it (then removing the leading '/'). This ensures that a
// path resulting from prepending another path will always resolve to lexically
// be a subdirectory of the prefixed path. This is all done lexically, so paths
// that include symlinks won't be safe as a result of using CleanPath.
func CleanPath(path string) string {
// Deal with empty strings nicely.
if path == "" {
return ""
}
// Ensure that all paths are cleaned (especially problematic ones like
// "/../../../../../" which can cause lots of issues).
path = filepath.Clean(path)
// If the path isn't absolute, we need to do more processing to fix paths
// such as "../../../../<etc>/some/path". We also shouldn't convert absolute
// paths to relative ones.
if !filepath.IsAbs(path) {
path = filepath.Clean(string(os.PathSeparator) + path)
// This can't fail, as (by definition) all paths are relative to root.
path, _ = filepath.Rel(string(os.PathSeparator), path)
}
// Clean the path again for good measure.
return filepath.Clean(path)
}
// SearchLabels searches a list of key-value pairs for the provided key and
// returns the corresponding value. The pairs must be separated with '='.
func SearchLabels(labels []string, query string) string {
for _, l := range labels {
parts := strings.SplitN(l, "=", 2)
if len(parts) < 2 {
continue
}
if parts[0] == query {
return parts[1]
}
}
return ""
}
// Annotations returns the bundle path and user defined annotations from the
// libcontainer state. We need to remove the bundle because that is a label
// added by libcontainer.
func Annotations(labels []string) (bundle string, userAnnotations map[string]string) {
userAnnotations = make(map[string]string)
for _, l := range labels {
parts := strings.SplitN(l, "=", 2)
if len(parts) < 2 {
continue
}
if parts[0] == "bundle" {
bundle = parts[1]
} else {
userAnnotations[parts[0]] = parts[1]
}
}
return
}
func GetIntSize() int {
return int(unsafe.Sizeof(1))
}

View file

@ -0,0 +1,68 @@
// +build !windows
package utils
import (
"fmt"
"os"
"strconv"
"golang.org/x/sys/unix"
)
// EnsureProcHandle returns whether or not the given file handle is on procfs.
func EnsureProcHandle(fh *os.File) error {
var buf unix.Statfs_t
if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil {
return fmt.Errorf("ensure %s is on procfs: %v", fh.Name(), err)
}
if buf.Type != unix.PROC_SUPER_MAGIC {
return fmt.Errorf("%s is not on procfs", fh.Name())
}
return nil
}
// CloseExecFrom applies O_CLOEXEC to all file descriptors currently open for
// the process (except for those below the given fd value).
func CloseExecFrom(minFd int) error {
fdDir, err := os.Open("/proc/self/fd")
if err != nil {
return err
}
defer fdDir.Close()
if err := EnsureProcHandle(fdDir); err != nil {
return err
}
fdList, err := fdDir.Readdirnames(-1)
if err != nil {
return err
}
for _, fdStr := range fdList {
fd, err := strconv.Atoi(fdStr)
// Ignore non-numeric file names.
if err != nil {
continue
}
// Ignore descriptors lower than our specified minimum.
if fd < minFd {
continue
}
// Intentionally ignore errors from unix.CloseOnExec -- the cases where
// this might fail are basically file descriptors that have already
// been closed (including and especially the one that was created when
// ioutil.ReadDir did the "opendir" syscall).
unix.CloseOnExec(fd)
}
return nil
}
// NewSockPair returns a new unix socket pair
func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
}

View file

@ -6,7 +6,7 @@ github.com/opencontainers/runtime-spec 29686dbc5559d93fb1ef402eeda3e35c38d75af4
# Core libcontainer functionality.
github.com/checkpoint-restore/go-criu 17b0214f6c48980c45dc47ecb0cfd6d9e02df723 # v3.11
github.com/mrunalp/fileutils 7d4729fb36185a7c1719923406c9d40e54fb93c7
github.com/opencontainers/selinux 3a1f366feb7aecbf7a0e71ac4cea88b31597de9e # v1.2.2
github.com/opencontainers/selinux 5215b1806f52b1fcc2070a8826c542c9d33cd3cf # v1.3.0 (+ CVE-2019-16884)
github.com/seccomp/libseccomp-golang 689e3c1541a84461afc49c1c87352a6cedf72e9c # v0.9.1
github.com/sirupsen/logrus 8bdbc7bcc01dcbb8ec23dc8a28e332258d25251f # v1.4.1
github.com/syndtr/gocapability d98352740cb2c55f81556b63d4a1ec64c5a319c2
@ -26,3 +26,6 @@ golang.org/x/sys 9eafafc0a87e0fd0aeeba439a4573537970c44c7
# console dependencies
github.com/containerd/console 0650fd9eeb50bab4fc99dceb9f2e14cf58f36e7f
github.com/pkg/errors ba968bfe8b2f7e042a574c888954fccecfa385b4 # v0.8.1
# ebpf dependencies
github.com/cilium/ebpf 95b36a581eed7b0f127306ed1d16cc0ddc06cf67

View file

@ -13,11 +13,12 @@ import (
// Valid Label Options
var validOptions = map[string]bool{
"disable": true,
"type": true,
"user": true,
"role": true,
"level": true,
"disable": true,
"type": true,
"filetype": true,
"user": true,
"role": true,
"level": true,
}
var ErrIncompatibleLabel = fmt.Errorf("Bad SELinux option z and Z can not be used together")
@ -51,13 +52,16 @@ func InitLabels(options []string) (plabel string, mlabel string, Err error) {
return "", mountLabel, nil
}
if i := strings.Index(opt, ":"); i == -1 {
return "", "", fmt.Errorf("Bad label option %q, valid options 'disable' or \n'user, role, level, type' followed by ':' and a value", opt)
return "", "", fmt.Errorf("Bad label option %q, valid options 'disable' or \n'user, role, level, type, filetype' followed by ':' and a value", opt)
}
con := strings.SplitN(opt, ":", 2)
if !validOptions[con[0]] {
return "", "", fmt.Errorf("Bad label option %q, valid options 'disable, user, role, level, type'", con[0])
return "", "", fmt.Errorf("Bad label option %q, valid options 'disable, user, role, level, type, filetype'", con[0])
}
if con[0] == "filetype" {
mcon["type"] = con[1]
}
pcon[con[0]] = con[1]
if con[0] == "level" || con[0] == "user" {
mcon[con[0]] = con[1]

View file

@ -18,6 +18,8 @@ import (
"strings"
"sync"
"syscall"
"golang.org/x/sys/unix"
)
const (
@ -252,6 +254,12 @@ func getSELinuxPolicyRoot() string {
return filepath.Join(selinuxDir, readConfig(selinuxTypeTag))
}
func isProcHandle(fh *os.File) (bool, error) {
var buf unix.Statfs_t
err := unix.Fstatfs(int(fh.Fd()), &buf)
return buf.Type == unix.PROC_SUPER_MAGIC, err
}
func readCon(fpath string) (string, error) {
if fpath == "" {
return "", ErrEmptyPath
@ -263,6 +271,12 @@ func readCon(fpath string) (string, error) {
}
defer in.Close()
if ok, err := isProcHandle(in); err != nil {
return "", err
} else if !ok {
return "", fmt.Errorf("%s not on procfs", fpath)
}
var retval string
if _, err := fmt.Fscanf(in, "%s", &retval); err != nil {
return "", err
@ -345,6 +359,12 @@ func writeCon(fpath string, val string) error {
}
defer out.Close()
if ok, err := isProcHandle(out); err != nil {
return err
} else if !ok {
return fmt.Errorf("%s not on procfs", fpath)
}
if val != "" {
_, err = out.Write([]byte(val))
} else {
@ -392,6 +412,14 @@ func SetExecLabel(label string) error {
return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid()), label)
}
/*
SetTaskLabel sets the SELinux label for the current thread, or an error.
This requires the dyntransition permission.
*/
func SetTaskLabel(label string) error {
return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/current", syscall.Gettid()), label)
}
// SetSocketLabel takes a process label and tells the kernel to assign the
// label to the next socket that gets created
func SetSocketLabel(label string) error {
@ -403,6 +431,11 @@ func SocketLabel() (string, error) {
return readCon(fmt.Sprintf("/proc/self/task/%d/attr/sockcreate", syscall.Gettid()))
}
// PeerLabel retrieves the label of the client on the other side of a socket
func PeerLabel(fd uintptr) (string, error) {
return unix.GetsockoptString(int(fd), syscall.SOL_SOCKET, syscall.SO_PEERSEC)
}
// SetKeyLabel takes a process label and tells the kernel to assign the
// label to the next kernel keyring that gets created
func SetKeyLabel(label string) error {

View file

@ -96,6 +96,14 @@ func SetExecLabel(label string) error {
return nil
}
/*
SetTaskLabel sets the SELinux label for the current thread, or an error.
This requires the dyntransition permission.
*/
func SetTaskLabel(label string) error {
return nil
}
/*
SetSocketLabel sets the SELinux label that the kernel will use for any programs
that are executed by the current process thread, or an error.
@ -109,6 +117,11 @@ func SocketLabel() (string, error) {
return "", nil
}
// PeerLabel retrieves the label of the client on the other side of a socket
func PeerLabel(fd uintptr) (string, error) {
return "", nil
}
// SetKeyLabel takes a process label and tells the kernel to assign the
// label to the next kernel keyring that gets created
func SetKeyLabel(label string) error {