diff --git a/vendor.conf b/vendor.conf index 831d0223b4..445a128a0c 100644 --- a/vendor.conf +++ b/vendor.conf @@ -75,8 +75,8 @@ github.com/pborman/uuid v1.0 google.golang.org/grpc v1.12.0 -# When updating, also update RUNC_COMMIT in hack/dockerfile/install/runc accordingly -github.com/opencontainers/runc 69663f0bd4b60df09991c08812a60108003fa340 +# This does not need to match RUNC_COMMIT as it is used for helper packages but should be newer or equal +github.com/opencontainers/runc 0e561642f81e84ebd0b3afd6ec510c75a2ccb71b github.com/opencontainers/runtime-spec v1.0.1 github.com/opencontainers/image-spec v1.0.1 github.com/seccomp/libseccomp-golang 32f571b70023028bd57d9288c20efbcb237f3ce0 diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go index 3cae4fd8d9..b1c4762fe2 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go @@ -141,9 +141,10 @@ type Config struct { // OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores // for a process. Valid values are between the range [-1000, '1000'], where processes with - // higher scores are preferred for being killed. + // higher scores are preferred for being killed. If it is unset then we don't touch the current + // value. // More information about kernel oom score calculation here: https://lwn.net/Articles/317814/ - OomScoreAdj int `json:"oom_score_adj"` + OomScoreAdj *int `json:"oom_score_adj,omitempty"` // UidMappings is an array of User ID mappings for User Namespaces UidMappings []IDMap `json:"uid_mappings"` diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c index 2c69cee5d6..a4cd1399d9 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c @@ -505,7 +505,8 @@ void join_namespaces(char *nslist) ns->fd = fd; ns->ns = nsflag(namespace); - strncpy(ns->path, path, PATH_MAX); + strncpy(ns->path, path, PATH_MAX - 1); + ns->path[PATH_MAX - 1] = '\0'; } while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL); /* @@ -678,17 +679,15 @@ void nsexec(void) /* * Enable setgroups(2) if we've been asked to. But we also * have to explicitly disable setgroups(2) if we're - * creating a rootless container (this is required since - * Linux 3.19). + * creating a rootless container for single-entry mapping. + * i.e. config.is_setgroup == false. + * (this is required since Linux 3.19). + * + * For rootless multi-entry mapping, config.is_setgroup shall be true and + * newuidmap/newgidmap shall be used. */ - if (config.is_rootless && config.is_setgroup) { - kill(child, SIGKILL); - bail("cannot allow setgroup in an unprivileged user namespace setup"); - } - if (config.is_setgroup) - update_setgroups(child, SETGROUPS_ALLOW); - if (config.is_rootless) + if (config.is_rootless && !config.is_setgroup) update_setgroups(child, SETGROUPS_DENY); /* Set up mappings. */ @@ -809,25 +808,30 @@ void nsexec(void) if (config.namespaces) join_namespaces(config.namespaces); - /* - * Unshare all of the namespaces. Now, it should be noted that this - * ordering might break in the future (especially with rootless - * containers). But for now, it's not possible to split this into - * CLONE_NEWUSER + [the rest] because of some RHEL SELinux issues. - * - * Note that we don't merge this with clone() because there were - * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID) - * was broken, so we'll just do it the long way anyway. - */ - if (unshare(config.cloneflags) < 0) - bail("failed to unshare namespaces"); - /* * Deal with user namespaces first. They are quite special, as they * affect our ability to unshare other namespaces and are used as * context for privilege checks. + * + * We don't unshare all namespaces in one go. The reason for this + * is that, while the kernel documentation may claim otherwise, + * there are certain cases where unsharing all namespaces at once + * will result in namespace objects being owned incorrectly. + * Ideally we should just fix these kernel bugs, but it's better to + * be safe than sorry, and fix them separately. + * + * A specific case of this is that the SELinux label of the + * internal kern-mount that mqueue uses will be incorrect if the + * UTS namespace is cloned before the USER namespace is mapped. + * I've also heard of similar problems with the network namespace + * in some scenarios. This also mirrors how LXC deals with this + * problem. */ if (config.cloneflags & CLONE_NEWUSER) { + if (unshare(CLONE_NEWUSER) < 0) + bail("failed to unshare user namespace"); + config.cloneflags &= ~CLONE_NEWUSER; + /* * We don't have the privileges to do any mapping here (see the * clone_parent rant). So signal our parent to hook us up. @@ -853,8 +857,21 @@ void nsexec(void) if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0) bail("failed to set process as dumpable"); } + + /* Become root in the namespace proper. */ + if (setresuid(0, 0, 0) < 0) + bail("failed to become root in user namespace"); } + /* + * Unshare all of the namespaces. Note that we don't merge this + * with clone() because there were some old kernel versions where + * clone(CLONE_PARENT | CLONE_NEWPID) was broken, so we'll just do + * it the long way. + */ + if (unshare(config.cloneflags) < 0) + bail("failed to unshare namespaces"); + /* * TODO: What about non-namespace clone flags that we're dropping here? * diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go b/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go index 5f124cd8bb..8d353d984b 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go @@ -3,13 +3,12 @@ package system import ( - "bufio" - "fmt" "os" "os/exec" "syscall" // only for exec "unsafe" + "github.com/opencontainers/runc/libcontainer/user" "golang.org/x/sys/unix" ) @@ -102,34 +101,43 @@ func Setctty() error { } // RunningInUserNS detects whether we are currently running in a user namespace. -// Copied from github.com/lxc/lxd/shared/util.go +// Originally copied from github.com/lxc/lxd/shared/util.go func RunningInUserNS() bool { - file, err := os.Open("/proc/self/uid_map") + uidmap, err := user.CurrentProcessUIDMap() if err != nil { // This kernel-provided file only exists if user namespaces are supported return false } - defer file.Close() + return UIDMapInUserNS(uidmap) +} - buf := bufio.NewReader(file) - l, _, err := buf.ReadLine() - if err != nil { - return false - } - - line := string(l) - var a, b, c int64 - fmt.Sscanf(line, "%d %d %d", &a, &b, &c) +func UIDMapInUserNS(uidmap []user.IDMap) bool { /* * We assume we are in the initial user namespace if we have a full * range - 4294967295 uids starting at uid 0. */ - if a == 0 && b == 0 && c == 4294967295 { + if len(uidmap) == 1 && uidmap[0].ID == 0 && uidmap[0].ParentID == 0 && uidmap[0].Count == 4294967295 { return false } return true } +// GetParentNSeuid returns the euid within the parent user namespace +func GetParentNSeuid() int { + euid := os.Geteuid() + uidmap, err := user.CurrentProcessUIDMap() + if err != nil { + // This kernel-provided file only exists if user namespaces are supported + return euid + } + for _, um := range uidmap { + if um.ID <= euid && euid <= um.ID+um.Count-1 { + return um.ParentID + euid - um.ID + } + } + return euid +} + // SetSubreaper sets the value i as the subreaper setting for the calling process func SetSubreaper(i int) error { return unix.Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0) diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/system/unsupported.go index e7cfd62b29..b94be74a66 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/system/unsupported.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/system/unsupported.go @@ -2,8 +2,26 @@ package system +import ( + "os" + + "github.com/opencontainers/runc/libcontainer/user" +) + // RunningInUserNS is a stub for non-Linux systems // Always returns false func RunningInUserNS() bool { return false } + +// UIDMapInUserNS is a stub for non-Linux systems +// Always returns false +func UIDMapInUserNS(uidmap []user.IDMap) bool { + return false +} + +// GetParentNSeuid returns the euid within the parent user namespace +// Always returns os.Geteuid on non-linux +func GetParentNSeuid() int { + return os.Geteuid() +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go index c45e300411..c1e634c949 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go @@ -114,3 +114,29 @@ func CurrentUser() (User, error) { func CurrentGroup() (Group, error) { return LookupGid(unix.Getgid()) } + +func CurrentUserSubUIDs() ([]SubID, error) { + u, err := CurrentUser() + if err != nil { + return nil, err + } + return ParseSubIDFileFilter("/etc/subuid", + func(entry SubID) bool { return entry.Name == u.Name }) +} + +func CurrentGroupSubGIDs() ([]SubID, error) { + g, err := CurrentGroup() + if err != nil { + return nil, err + } + return ParseSubIDFileFilter("/etc/subgid", + func(entry SubID) bool { return entry.Name == g.Name }) +} + +func CurrentProcessUIDMap() ([]IDMap, error) { + return ParseIDMapFile("/proc/self/uid_map") +} + +func CurrentProcessGIDMap() ([]IDMap, error) { + return ParseIDMapFile("/proc/self/gid_map") +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/user.go b/vendor/github.com/opencontainers/runc/libcontainer/user/user.go index 93414516ca..37993da833 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/user.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/user/user.go @@ -75,12 +75,29 @@ func groupFromOS(g *user.Group) (Group, error) { return newGroup, nil } +// SubID represents an entry in /etc/sub{u,g}id +type SubID struct { + Name string + SubID int + Count int +} + +// IDMap represents an entry in /proc/PID/{u,g}id_map +type IDMap struct { + ID int + ParentID int + Count int +} + func parseLine(line string, v ...interface{}) { - if line == "" { + parseParts(strings.Split(line, ":"), v...) +} + +func parseParts(parts []string, v ...interface{}) { + if len(parts) == 0 { return } - parts := strings.Split(line, ":") for i, p := range parts { // Ignore cases where we don't have enough fields to populate the arguments. // Some configuration files like to misbehave. @@ -479,3 +496,111 @@ func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int } return GetAdditionalGroups(additionalGroups, group) } + +func ParseSubIDFile(path string) ([]SubID, error) { + subid, err := os.Open(path) + if err != nil { + return nil, err + } + defer subid.Close() + return ParseSubID(subid) +} + +func ParseSubID(subid io.Reader) ([]SubID, error) { + return ParseSubIDFilter(subid, nil) +} + +func ParseSubIDFileFilter(path string, filter func(SubID) bool) ([]SubID, error) { + subid, err := os.Open(path) + if err != nil { + return nil, err + } + defer subid.Close() + return ParseSubIDFilter(subid, filter) +} + +func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) { + if r == nil { + return nil, fmt.Errorf("nil source for subid-formatted data") + } + + var ( + s = bufio.NewScanner(r) + out = []SubID{} + ) + + for s.Scan() { + if err := s.Err(); err != nil { + return nil, err + } + + line := strings.TrimSpace(s.Text()) + if line == "" { + continue + } + + // see: man 5 subuid + p := SubID{} + parseLine(line, &p.Name, &p.SubID, &p.Count) + + if filter == nil || filter(p) { + out = append(out, p) + } + } + + return out, nil +} + +func ParseIDMapFile(path string) ([]IDMap, error) { + r, err := os.Open(path) + if err != nil { + return nil, err + } + defer r.Close() + return ParseIDMap(r) +} + +func ParseIDMap(r io.Reader) ([]IDMap, error) { + return ParseIDMapFilter(r, nil) +} + +func ParseIDMapFileFilter(path string, filter func(IDMap) bool) ([]IDMap, error) { + r, err := os.Open(path) + if err != nil { + return nil, err + } + defer r.Close() + return ParseIDMapFilter(r, filter) +} + +func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) { + if r == nil { + return nil, fmt.Errorf("nil source for idmap-formatted data") + } + + var ( + s = bufio.NewScanner(r) + out = []IDMap{} + ) + + for s.Scan() { + if err := s.Err(); err != nil { + return nil, err + } + + line := strings.TrimSpace(s.Text()) + if line == "" { + continue + } + + // see: man 7 user_namespaces + p := IDMap{} + parseParts(strings.Fields(line), &p.ID, &p.ParentID, &p.Count) + + if filter == nil || filter(p) { + out = append(out, p) + } + } + + return out, nil +}