oci/caps: limit available capabilities to current environment

In situations where docker runs in an environment where capabilities are limited,
sucn as docker-in-docker in a container created by older versions of docker, or
in a container where some capabilities have been disabled, starting a privileged
container may fail, because even though the _kernel_ supports a capability, the
capability is not available.

This patch attempts to address this problem by limiting the list of "known" capa-
bilities on the set of effective capabilties for the current process. This code
is based on the code in containerd's "caps" package.

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
This commit is contained in:
Sebastiaan van Stijn 2021-10-13 14:27:41 +02:00
parent a80c450fb3
commit 485cf38d48
No known key found for this signature in database
GPG key ID: 76698F39D527CE8C
11 changed files with 67 additions and 1452 deletions

View file

@ -5,49 +5,38 @@ import (
"strings"
"github.com/docker/docker/errdefs"
"github.com/syndtr/gocapability/capability"
)
var (
allCaps []string
// capabilityList maps linux capability name to its value of capability.Cap
// type. This list contains nil entries for capabilities that are known, but
// not supported by the current kernel.
// Capabilities is one of the security systems in Linux Security Module (LSM)
// knownCapabilities is a map of all known capabilities, using capability
// name as index. Nil values indicate that the capability is known, but either
// not supported by the Kernel, or not available in the current environment,
// for example, when running Docker-in-Docker with restricted capabilities.
//
// Capabilities are one of the security systems in Linux Security Module (LSM)
// framework provided by the kernel.
// For more details on capabilities, see http://man7.org/linux/man-pages/man7/capabilities.7.html
capabilityList map[string]*capability.Cap
knownCaps map[string]*struct{}
)
func init() {
last := capability.CAP_LAST_CAP
rawCaps := capability.List()
allCaps = make([]string, min(int(last+1), len(rawCaps)))
capabilityList = make(map[string]*capability.Cap, len(rawCaps))
for i, c := range rawCaps {
capName := "CAP_" + strings.ToUpper(c.String())
if c > last {
capabilityList[capName] = nil
continue
}
allCaps[i] = capName
capabilityList[capName] = &c
}
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
// GetAllCapabilities returns all of the capabilities
// GetAllCapabilities returns all capabilities that are availeble in the current
// environment.
func GetAllCapabilities() []string {
initCaps()
return allCaps
}
// knownCapabilities returns a map of all known capabilities, using capability
// name as index. Nil values indicate that the capability is known, but either
// not supported by the Kernel, or not available in the current environment, for
// example, when running Docker-in-Docker with restricted capabilities.
func knownCapabilities() map[string]*struct{} {
initCaps()
return knownCaps
}
// inSlice tests whether a string is contained in a slice of strings or not.
func inSlice(slice []string, s string) bool {
for _, ss := range slice {
@ -65,7 +54,10 @@ const allCapabilities = "ALL"
//
// This function also accepts the "ALL" magic-value, that's used by CapAdd/CapDrop.
func NormalizeLegacyCapabilities(caps []string) ([]string, error) {
var normalized []string
var (
normalized []string
capabilityList = knownCapabilities()
)
for _, c := range caps {
c = strings.ToUpper(c)
@ -79,7 +71,7 @@ func NormalizeLegacyCapabilities(caps []string) ([]string, error) {
if v, ok := capabilityList[c]; !ok {
return nil, errdefs.InvalidParameter(fmt.Errorf("unknown capability: %q", c))
} else if v == nil {
return nil, errdefs.InvalidParameter(fmt.Errorf("capability not supported by your kernel: %q", c))
return nil, errdefs.InvalidParameter(fmt.Errorf("capability not supported by your kernel or not available in the current environment: %q", c))
}
normalized = append(normalized, c)
}

35
oci/caps/utils_linux.go Normal file
View file

@ -0,0 +1,35 @@
package caps // import "github.com/docker/docker/oci/caps"
import (
"sync"
ccaps "github.com/containerd/containerd/pkg/cap"
"github.com/sirupsen/logrus"
)
var initCapsOnce sync.Once
func initCaps() {
initCapsOnce.Do(func() {
rawCaps := ccaps.Known()
curCaps, err := ccaps.Current()
if err != nil {
logrus.WithError(err).Error("failed to get capabilities from current environment")
allCaps = rawCaps
} else {
allCaps = curCaps
}
knownCaps = make(map[string]*struct{}, len(rawCaps))
for _, capName := range rawCaps {
// For now, we assume the capability is available if we failed to
// get the capabilities from the current environment. This keeps the
// old (pre-detection) behavior, and prevents creating containers with
// no capabilities. The OCI runtime or kernel may still refuse capa-
// bilities that are not available, and produce an error in that case.
if len(curCaps) > 0 && !inSlice(curCaps, capName) {
knownCaps[capName] = nil
continue
}
knownCaps[capName] = &struct{}{}
}
})
}

8
oci/caps/utils_other.go Normal file
View file

@ -0,0 +1,8 @@
//go:build !linux
// +build !linux
package caps // import "github.com/docker/docker/oci/caps"
func initCaps() {
// no capabilities on Windows
}

View file

@ -25,7 +25,6 @@ github.com/docker/go-connections 7395e3f8aa162843a74ed6d48e79
golang.org/x/text 23ae387dee1f90d29a23c0e87ee0b46038fbed0e # v0.3.3
gotest.tools/v3 568bc57cc5c19a2ef85e5749870b49a4cc2ab54d # v3.0.3
github.com/google/go-cmp 3af367b6b30c263d47e8895973edcca9a49cf029 # v0.2.0
github.com/syndtr/gocapability 42c35b4376354fd554efc7ad35e0b7f94e3a0ffb
github.com/RackSec/srslog a4725f04ec91af1a91b380da679d6e0c2f061e59
github.com/imdario/mergo 1afb36080aec31e0d1528973ebe6721b191b0369 # v0.3.8

View file

@ -1,24 +0,0 @@
Copyright 2013 Suryandaru Triandana <syndtr@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -1,133 +0,0 @@
// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Package capability provides utilities for manipulating POSIX capabilities.
package capability
type Capabilities interface {
// Get check whether a capability present in the given
// capabilities set. The 'which' value should be one of EFFECTIVE,
// PERMITTED, INHERITABLE, BOUNDING or AMBIENT.
Get(which CapType, what Cap) bool
// Empty check whether all capability bits of the given capabilities
// set are zero. The 'which' value should be one of EFFECTIVE,
// PERMITTED, INHERITABLE, BOUNDING or AMBIENT.
Empty(which CapType) bool
// Full check whether all capability bits of the given capabilities
// set are one. The 'which' value should be one of EFFECTIVE,
// PERMITTED, INHERITABLE, BOUNDING or AMBIENT.
Full(which CapType) bool
// Set sets capabilities of the given capabilities sets. The
// 'which' value should be one or combination (OR'ed) of EFFECTIVE,
// PERMITTED, INHERITABLE, BOUNDING or AMBIENT.
Set(which CapType, caps ...Cap)
// Unset unsets capabilities of the given capabilities sets. The
// 'which' value should be one or combination (OR'ed) of EFFECTIVE,
// PERMITTED, INHERITABLE, BOUNDING or AMBIENT.
Unset(which CapType, caps ...Cap)
// Fill sets all bits of the given capabilities kind to one. The
// 'kind' value should be one or combination (OR'ed) of CAPS,
// BOUNDS or AMBS.
Fill(kind CapType)
// Clear sets all bits of the given capabilities kind to zero. The
// 'kind' value should be one or combination (OR'ed) of CAPS,
// BOUNDS or AMBS.
Clear(kind CapType)
// String return current capabilities state of the given capabilities
// set as string. The 'which' value should be one of EFFECTIVE,
// PERMITTED, INHERITABLE BOUNDING or AMBIENT
StringCap(which CapType) string
// String return current capabilities state as string.
String() string
// Load load actual capabilities value. This will overwrite all
// outstanding changes.
Load() error
// Apply apply the capabilities settings, so all changes will take
// effect.
Apply(kind CapType) error
}
// NewPid initializes a new Capabilities object for given pid when
// it is nonzero, or for the current process if pid is 0.
//
// Deprecated: Replace with NewPid2. For example, replace:
//
// c, err := NewPid(0)
// if err != nil {
// return err
// }
//
// with:
//
// c, err := NewPid2(0)
// if err != nil {
// return err
// }
// err = c.Load()
// if err != nil {
// return err
// }
func NewPid(pid int) (Capabilities, error) {
c, err := newPid(pid)
if err != nil {
return c, err
}
err = c.Load()
return c, err
}
// NewPid2 initializes a new Capabilities object for given pid when
// it is nonzero, or for the current process if pid is 0. This
// does not load the process's current capabilities; to do that you
// must call Load explicitly.
func NewPid2(pid int) (Capabilities, error) {
return newPid(pid)
}
// NewFile initializes a new Capabilities object for given file path.
//
// Deprecated: Replace with NewFile2. For example, replace:
//
// c, err := NewFile(path)
// if err != nil {
// return err
// }
//
// with:
//
// c, err := NewFile2(path)
// if err != nil {
// return err
// }
// err = c.Load()
// if err != nil {
// return err
// }
func NewFile(path string) (Capabilities, error) {
c, err := newFile(path)
if err != nil {
return c, err
}
err = c.Load()
return c, err
}
// NewFile2 creates a new initialized Capabilities object for given
// file path. This does not load the process's current capabilities;
// to do that you must call Load explicitly.
func NewFile2(path string) (Capabilities, error) {
return newFile(path)
}

View file

@ -1,642 +0,0 @@
// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package capability
import (
"bufio"
"errors"
"fmt"
"io"
"os"
"strings"
"syscall"
)
var errUnknownVers = errors.New("unknown capability version")
const (
linuxCapVer1 = 0x19980330
linuxCapVer2 = 0x20071026
linuxCapVer3 = 0x20080522
)
var (
capVers uint32
capLastCap Cap
)
func init() {
var hdr capHeader
capget(&hdr, nil)
capVers = hdr.version
if initLastCap() == nil {
CAP_LAST_CAP = capLastCap
if capLastCap > 31 {
capUpperMask = (uint32(1) << (uint(capLastCap) - 31)) - 1
} else {
capUpperMask = 0
}
}
}
func initLastCap() error {
if capLastCap != 0 {
return nil
}
f, err := os.Open("/proc/sys/kernel/cap_last_cap")
if err != nil {
return err
}
defer f.Close()
var b []byte = make([]byte, 11)
_, err = f.Read(b)
if err != nil {
return err
}
fmt.Sscanf(string(b), "%d", &capLastCap)
return nil
}
func mkStringCap(c Capabilities, which CapType) (ret string) {
for i, first := Cap(0), true; i <= CAP_LAST_CAP; i++ {
if !c.Get(which, i) {
continue
}
if first {
first = false
} else {
ret += ", "
}
ret += i.String()
}
return
}
func mkString(c Capabilities, max CapType) (ret string) {
ret = "{"
for i := CapType(1); i <= max; i <<= 1 {
ret += " " + i.String() + "=\""
if c.Empty(i) {
ret += "empty"
} else if c.Full(i) {
ret += "full"
} else {
ret += c.StringCap(i)
}
ret += "\""
}
ret += " }"
return
}
func newPid(pid int) (c Capabilities, err error) {
switch capVers {
case linuxCapVer1:
p := new(capsV1)
p.hdr.version = capVers
p.hdr.pid = int32(pid)
c = p
case linuxCapVer2, linuxCapVer3:
p := new(capsV3)
p.hdr.version = capVers
p.hdr.pid = int32(pid)
c = p
default:
err = errUnknownVers
return
}
return
}
type capsV1 struct {
hdr capHeader
data capData
}
func (c *capsV1) Get(which CapType, what Cap) bool {
if what > 32 {
return false
}
switch which {
case EFFECTIVE:
return (1<<uint(what))&c.data.effective != 0
case PERMITTED:
return (1<<uint(what))&c.data.permitted != 0
case INHERITABLE:
return (1<<uint(what))&c.data.inheritable != 0
}
return false
}
func (c *capsV1) getData(which CapType) (ret uint32) {
switch which {
case EFFECTIVE:
ret = c.data.effective
case PERMITTED:
ret = c.data.permitted
case INHERITABLE:
ret = c.data.inheritable
}
return
}
func (c *capsV1) Empty(which CapType) bool {
return c.getData(which) == 0
}
func (c *capsV1) Full(which CapType) bool {
return (c.getData(which) & 0x7fffffff) == 0x7fffffff
}
func (c *capsV1) Set(which CapType, caps ...Cap) {
for _, what := range caps {
if what > 32 {
continue
}
if which&EFFECTIVE != 0 {
c.data.effective |= 1 << uint(what)
}
if which&PERMITTED != 0 {
c.data.permitted |= 1 << uint(what)
}
if which&INHERITABLE != 0 {
c.data.inheritable |= 1 << uint(what)
}
}
}
func (c *capsV1) Unset(which CapType, caps ...Cap) {
for _, what := range caps {
if what > 32 {
continue
}
if which&EFFECTIVE != 0 {
c.data.effective &= ^(1 << uint(what))
}
if which&PERMITTED != 0 {
c.data.permitted &= ^(1 << uint(what))
}
if which&INHERITABLE != 0 {
c.data.inheritable &= ^(1 << uint(what))
}
}
}
func (c *capsV1) Fill(kind CapType) {
if kind&CAPS == CAPS {
c.data.effective = 0x7fffffff
c.data.permitted = 0x7fffffff
c.data.inheritable = 0
}
}
func (c *capsV1) Clear(kind CapType) {
if kind&CAPS == CAPS {
c.data.effective = 0
c.data.permitted = 0
c.data.inheritable = 0
}
}
func (c *capsV1) StringCap(which CapType) (ret string) {
return mkStringCap(c, which)
}
func (c *capsV1) String() (ret string) {
return mkString(c, BOUNDING)
}
func (c *capsV1) Load() (err error) {
return capget(&c.hdr, &c.data)
}
func (c *capsV1) Apply(kind CapType) error {
if kind&CAPS == CAPS {
return capset(&c.hdr, &c.data)
}
return nil
}
type capsV3 struct {
hdr capHeader
data [2]capData
bounds [2]uint32
ambient [2]uint32
}
func (c *capsV3) Get(which CapType, what Cap) bool {
var i uint
if what > 31 {
i = uint(what) >> 5
what %= 32
}
switch which {
case EFFECTIVE:
return (1<<uint(what))&c.data[i].effective != 0
case PERMITTED:
return (1<<uint(what))&c.data[i].permitted != 0
case INHERITABLE:
return (1<<uint(what))&c.data[i].inheritable != 0
case BOUNDING:
return (1<<uint(what))&c.bounds[i] != 0
case AMBIENT:
return (1<<uint(what))&c.ambient[i] != 0
}
return false
}
func (c *capsV3) getData(which CapType, dest []uint32) {
switch which {
case EFFECTIVE:
dest[0] = c.data[0].effective
dest[1] = c.data[1].effective
case PERMITTED:
dest[0] = c.data[0].permitted
dest[1] = c.data[1].permitted
case INHERITABLE:
dest[0] = c.data[0].inheritable
dest[1] = c.data[1].inheritable
case BOUNDING:
dest[0] = c.bounds[0]
dest[1] = c.bounds[1]
case AMBIENT:
dest[0] = c.ambient[0]
dest[1] = c.ambient[1]
}
}
func (c *capsV3) Empty(which CapType) bool {
var data [2]uint32
c.getData(which, data[:])
return data[0] == 0 && data[1] == 0
}
func (c *capsV3) Full(which CapType) bool {
var data [2]uint32
c.getData(which, data[:])
if (data[0] & 0xffffffff) != 0xffffffff {
return false
}
return (data[1] & capUpperMask) == capUpperMask
}
func (c *capsV3) Set(which CapType, caps ...Cap) {
for _, what := range caps {
var i uint
if what > 31 {
i = uint(what) >> 5
what %= 32
}
if which&EFFECTIVE != 0 {
c.data[i].effective |= 1 << uint(what)
}
if which&PERMITTED != 0 {
c.data[i].permitted |= 1 << uint(what)
}
if which&INHERITABLE != 0 {
c.data[i].inheritable |= 1 << uint(what)
}
if which&BOUNDING != 0 {
c.bounds[i] |= 1 << uint(what)
}
if which&AMBIENT != 0 {
c.ambient[i] |= 1 << uint(what)
}
}
}
func (c *capsV3) Unset(which CapType, caps ...Cap) {
for _, what := range caps {
var i uint
if what > 31 {
i = uint(what) >> 5
what %= 32
}
if which&EFFECTIVE != 0 {
c.data[i].effective &= ^(1 << uint(what))
}
if which&PERMITTED != 0 {
c.data[i].permitted &= ^(1 << uint(what))
}
if which&INHERITABLE != 0 {
c.data[i].inheritable &= ^(1 << uint(what))
}
if which&BOUNDING != 0 {
c.bounds[i] &= ^(1 << uint(what))
}
if which&AMBIENT != 0 {
c.ambient[i] &= ^(1 << uint(what))
}
}
}
func (c *capsV3) Fill(kind CapType) {
if kind&CAPS == CAPS {
c.data[0].effective = 0xffffffff
c.data[0].permitted = 0xffffffff
c.data[0].inheritable = 0
c.data[1].effective = 0xffffffff
c.data[1].permitted = 0xffffffff
c.data[1].inheritable = 0
}
if kind&BOUNDS == BOUNDS {
c.bounds[0] = 0xffffffff
c.bounds[1] = 0xffffffff
}
if kind&AMBS == AMBS {
c.ambient[0] = 0xffffffff
c.ambient[1] = 0xffffffff
}
}
func (c *capsV3) Clear(kind CapType) {
if kind&CAPS == CAPS {
c.data[0].effective = 0
c.data[0].permitted = 0
c.data[0].inheritable = 0
c.data[1].effective = 0
c.data[1].permitted = 0
c.data[1].inheritable = 0
}
if kind&BOUNDS == BOUNDS {
c.bounds[0] = 0
c.bounds[1] = 0
}
if kind&AMBS == AMBS {
c.ambient[0] = 0
c.ambient[1] = 0
}
}
func (c *capsV3) StringCap(which CapType) (ret string) {
return mkStringCap(c, which)
}
func (c *capsV3) String() (ret string) {
return mkString(c, BOUNDING)
}
func (c *capsV3) Load() (err error) {
err = capget(&c.hdr, &c.data[0])
if err != nil {
return
}
var status_path string
if c.hdr.pid == 0 {
status_path = fmt.Sprintf("/proc/self/status")
} else {
status_path = fmt.Sprintf("/proc/%d/status", c.hdr.pid)
}
f, err := os.Open(status_path)
if err != nil {
return
}
b := bufio.NewReader(f)
for {
line, e := b.ReadString('\n')
if e != nil {
if e != io.EOF {
err = e
}
break
}
if strings.HasPrefix(line, "CapB") {
fmt.Sscanf(line[4:], "nd: %08x%08x", &c.bounds[1], &c.bounds[0])
continue
}
if strings.HasPrefix(line, "CapA") {
fmt.Sscanf(line[4:], "mb: %08x%08x", &c.ambient[1], &c.ambient[0])
continue
}
}
f.Close()
return
}
func (c *capsV3) Apply(kind CapType) (err error) {
if kind&BOUNDS == BOUNDS {
var data [2]capData
err = capget(&c.hdr, &data[0])
if err != nil {
return
}
if (1<<uint(CAP_SETPCAP))&data[0].effective != 0 {
for i := Cap(0); i <= CAP_LAST_CAP; i++ {
if c.Get(BOUNDING, i) {
continue
}
err = prctl(syscall.PR_CAPBSET_DROP, uintptr(i), 0, 0, 0)
if err != nil {
// Ignore EINVAL since the capability may not be supported in this system.
if errno, ok := err.(syscall.Errno); ok && errno == syscall.EINVAL {
err = nil
continue
}
return
}
}
}
}
if kind&CAPS == CAPS {
err = capset(&c.hdr, &c.data[0])
if err != nil {
return
}
}
if kind&AMBS == AMBS {
for i := Cap(0); i <= CAP_LAST_CAP; i++ {
action := pr_CAP_AMBIENT_LOWER
if c.Get(AMBIENT, i) {
action = pr_CAP_AMBIENT_RAISE
}
err := prctl(pr_CAP_AMBIENT, action, uintptr(i), 0, 0)
// Ignore EINVAL as not supported on kernels before 4.3
if errno, ok := err.(syscall.Errno); ok && errno == syscall.EINVAL {
err = nil
continue
}
}
}
return
}
func newFile(path string) (c Capabilities, err error) {
c = &capsFile{path: path}
return
}
type capsFile struct {
path string
data vfscapData
}
func (c *capsFile) Get(which CapType, what Cap) bool {
var i uint
if what > 31 {
if c.data.version == 1 {
return false
}
i = uint(what) >> 5
what %= 32
}
switch which {
case EFFECTIVE:
return (1<<uint(what))&c.data.effective[i] != 0
case PERMITTED:
return (1<<uint(what))&c.data.data[i].permitted != 0
case INHERITABLE:
return (1<<uint(what))&c.data.data[i].inheritable != 0
}
return false
}
func (c *capsFile) getData(which CapType, dest []uint32) {
switch which {
case EFFECTIVE:
dest[0] = c.data.effective[0]
dest[1] = c.data.effective[1]
case PERMITTED:
dest[0] = c.data.data[0].permitted
dest[1] = c.data.data[1].permitted
case INHERITABLE:
dest[0] = c.data.data[0].inheritable
dest[1] = c.data.data[1].inheritable
}
}
func (c *capsFile) Empty(which CapType) bool {
var data [2]uint32
c.getData(which, data[:])
return data[0] == 0 && data[1] == 0
}
func (c *capsFile) Full(which CapType) bool {
var data [2]uint32
c.getData(which, data[:])
if c.data.version == 0 {
return (data[0] & 0x7fffffff) == 0x7fffffff
}
if (data[0] & 0xffffffff) != 0xffffffff {
return false
}
return (data[1] & capUpperMask) == capUpperMask
}
func (c *capsFile) Set(which CapType, caps ...Cap) {
for _, what := range caps {
var i uint
if what > 31 {
if c.data.version == 1 {
continue
}
i = uint(what) >> 5
what %= 32
}
if which&EFFECTIVE != 0 {
c.data.effective[i] |= 1 << uint(what)
}
if which&PERMITTED != 0 {
c.data.data[i].permitted |= 1 << uint(what)
}
if which&INHERITABLE != 0 {
c.data.data[i].inheritable |= 1 << uint(what)
}
}
}
func (c *capsFile) Unset(which CapType, caps ...Cap) {
for _, what := range caps {
var i uint
if what > 31 {
if c.data.version == 1 {
continue
}
i = uint(what) >> 5
what %= 32
}
if which&EFFECTIVE != 0 {
c.data.effective[i] &= ^(1 << uint(what))
}
if which&PERMITTED != 0 {
c.data.data[i].permitted &= ^(1 << uint(what))
}
if which&INHERITABLE != 0 {
c.data.data[i].inheritable &= ^(1 << uint(what))
}
}
}
func (c *capsFile) Fill(kind CapType) {
if kind&CAPS == CAPS {
c.data.effective[0] = 0xffffffff
c.data.data[0].permitted = 0xffffffff
c.data.data[0].inheritable = 0
if c.data.version == 2 {
c.data.effective[1] = 0xffffffff
c.data.data[1].permitted = 0xffffffff
c.data.data[1].inheritable = 0
}
}
}
func (c *capsFile) Clear(kind CapType) {
if kind&CAPS == CAPS {
c.data.effective[0] = 0
c.data.data[0].permitted = 0
c.data.data[0].inheritable = 0
if c.data.version == 2 {
c.data.effective[1] = 0
c.data.data[1].permitted = 0
c.data.data[1].inheritable = 0
}
}
}
func (c *capsFile) StringCap(which CapType) (ret string) {
return mkStringCap(c, which)
}
func (c *capsFile) String() (ret string) {
return mkString(c, INHERITABLE)
}
func (c *capsFile) Load() (err error) {
return getVfsCap(c.path, &c.data)
}
func (c *capsFile) Apply(kind CapType) (err error) {
if kind&CAPS == CAPS {
return setVfsCap(c.path, &c.data)
}
return
}

View file

@ -1,19 +0,0 @@
// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// +build !linux
package capability
import "errors"
func newPid(pid int) (Capabilities, error) {
return nil, errors.New("not supported")
}
func newFile(path string) (Capabilities, error) {
return nil, errors.New("not supported")
}

View file

@ -1,309 +0,0 @@
// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package capability
type CapType uint
func (c CapType) String() string {
switch c {
case EFFECTIVE:
return "effective"
case PERMITTED:
return "permitted"
case INHERITABLE:
return "inheritable"
case BOUNDING:
return "bounding"
case CAPS:
return "caps"
case AMBIENT:
return "ambient"
}
return "unknown"
}
const (
EFFECTIVE CapType = 1 << iota
PERMITTED
INHERITABLE
BOUNDING
AMBIENT
CAPS = EFFECTIVE | PERMITTED | INHERITABLE
BOUNDS = BOUNDING
AMBS = AMBIENT
)
//go:generate go run enumgen/gen.go
type Cap int
// POSIX-draft defined capabilities and Linux extensions.
//
// Defined in https://github.com/torvalds/linux/blob/master/include/uapi/linux/capability.h
const (
// In a system with the [_POSIX_CHOWN_RESTRICTED] option defined, this
// overrides the restriction of changing file ownership and group
// ownership.
CAP_CHOWN = Cap(0)
// Override all DAC access, including ACL execute access if
// [_POSIX_ACL] is defined. Excluding DAC access covered by
// CAP_LINUX_IMMUTABLE.
CAP_DAC_OVERRIDE = Cap(1)
// Overrides all DAC restrictions regarding read and search on files
// and directories, including ACL restrictions if [_POSIX_ACL] is
// defined. Excluding DAC access covered by CAP_LINUX_IMMUTABLE.
CAP_DAC_READ_SEARCH = Cap(2)
// Overrides all restrictions about allowed operations on files, where
// file owner ID must be equal to the user ID, except where CAP_FSETID
// is applicable. It doesn't override MAC and DAC restrictions.
CAP_FOWNER = Cap(3)
// Overrides the following restrictions that the effective user ID
// shall match the file owner ID when setting the S_ISUID and S_ISGID
// bits on that file; that the effective group ID (or one of the
// supplementary group IDs) shall match the file owner ID when setting
// the S_ISGID bit on that file; that the S_ISUID and S_ISGID bits are
// cleared on successful return from chown(2) (not implemented).
CAP_FSETID = Cap(4)
// Overrides the restriction that the real or effective user ID of a
// process sending a signal must match the real or effective user ID
// of the process receiving the signal.
CAP_KILL = Cap(5)
// Allows setgid(2) manipulation
// Allows setgroups(2)
// Allows forged gids on socket credentials passing.
CAP_SETGID = Cap(6)
// Allows set*uid(2) manipulation (including fsuid).
// Allows forged pids on socket credentials passing.
CAP_SETUID = Cap(7)
// Linux-specific capabilities
// Without VFS support for capabilities:
// Transfer any capability in your permitted set to any pid,
// remove any capability in your permitted set from any pid
// With VFS support for capabilities (neither of above, but)
// Add any capability from current's capability bounding set
// to the current process' inheritable set
// Allow taking bits out of capability bounding set
// Allow modification of the securebits for a process
CAP_SETPCAP = Cap(8)
// Allow modification of S_IMMUTABLE and S_APPEND file attributes
CAP_LINUX_IMMUTABLE = Cap(9)
// Allows binding to TCP/UDP sockets below 1024
// Allows binding to ATM VCIs below 32
CAP_NET_BIND_SERVICE = Cap(10)
// Allow broadcasting, listen to multicast
CAP_NET_BROADCAST = Cap(11)
// Allow interface configuration
// Allow administration of IP firewall, masquerading and accounting
// Allow setting debug option on sockets
// Allow modification of routing tables
// Allow setting arbitrary process / process group ownership on
// sockets
// Allow binding to any address for transparent proxying (also via NET_RAW)
// Allow setting TOS (type of service)
// Allow setting promiscuous mode
// Allow clearing driver statistics
// Allow multicasting
// Allow read/write of device-specific registers
// Allow activation of ATM control sockets
CAP_NET_ADMIN = Cap(12)
// Allow use of RAW sockets
// Allow use of PACKET sockets
// Allow binding to any address for transparent proxying (also via NET_ADMIN)
CAP_NET_RAW = Cap(13)
// Allow locking of shared memory segments
// Allow mlock and mlockall (which doesn't really have anything to do
// with IPC)
CAP_IPC_LOCK = Cap(14)
// Override IPC ownership checks
CAP_IPC_OWNER = Cap(15)
// Insert and remove kernel modules - modify kernel without limit
CAP_SYS_MODULE = Cap(16)
// Allow ioperm/iopl access
// Allow sending USB messages to any device via /proc/bus/usb
CAP_SYS_RAWIO = Cap(17)
// Allow use of chroot()
CAP_SYS_CHROOT = Cap(18)
// Allow ptrace() of any process
CAP_SYS_PTRACE = Cap(19)
// Allow configuration of process accounting
CAP_SYS_PACCT = Cap(20)
// Allow configuration of the secure attention key
// Allow administration of the random device
// Allow examination and configuration of disk quotas
// Allow setting the domainname
// Allow setting the hostname
// Allow calling bdflush()
// Allow mount() and umount(), setting up new smb connection
// Allow some autofs root ioctls
// Allow nfsservctl
// Allow VM86_REQUEST_IRQ
// Allow to read/write pci config on alpha
// Allow irix_prctl on mips (setstacksize)
// Allow flushing all cache on m68k (sys_cacheflush)
// Allow removing semaphores
// Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores
// and shared memory
// Allow locking/unlocking of shared memory segment
// Allow turning swap on/off
// Allow forged pids on socket credentials passing
// Allow setting readahead and flushing buffers on block devices
// Allow setting geometry in floppy driver
// Allow turning DMA on/off in xd driver
// Allow administration of md devices (mostly the above, but some
// extra ioctls)
// Allow tuning the ide driver
// Allow access to the nvram device
// Allow administration of apm_bios, serial and bttv (TV) device
// Allow manufacturer commands in isdn CAPI support driver
// Allow reading non-standardized portions of pci configuration space
// Allow DDI debug ioctl on sbpcd driver
// Allow setting up serial ports
// Allow sending raw qic-117 commands
// Allow enabling/disabling tagged queuing on SCSI controllers and sending
// arbitrary SCSI commands
// Allow setting encryption key on loopback filesystem
// Allow setting zone reclaim policy
// Allow everything under CAP_BPF and CAP_PERFMON for backward compatibility
CAP_SYS_ADMIN = Cap(21)
// Allow use of reboot()
CAP_SYS_BOOT = Cap(22)
// Allow raising priority and setting priority on other (different
// UID) processes
// Allow use of FIFO and round-robin (realtime) scheduling on own
// processes and setting the scheduling algorithm used by another
// process.
// Allow setting cpu affinity on other processes
CAP_SYS_NICE = Cap(23)
// Override resource limits. Set resource limits.
// Override quota limits.
// Override reserved space on ext2 filesystem
// Modify data journaling mode on ext3 filesystem (uses journaling
// resources)
// NOTE: ext2 honors fsuid when checking for resource overrides, so
// you can override using fsuid too
// Override size restrictions on IPC message queues
// Allow more than 64hz interrupts from the real-time clock
// Override max number of consoles on console allocation
// Override max number of keymaps
// Control memory reclaim behavior
CAP_SYS_RESOURCE = Cap(24)
// Allow manipulation of system clock
// Allow irix_stime on mips
// Allow setting the real-time clock
CAP_SYS_TIME = Cap(25)
// Allow configuration of tty devices
// Allow vhangup() of tty
CAP_SYS_TTY_CONFIG = Cap(26)
// Allow the privileged aspects of mknod()
CAP_MKNOD = Cap(27)
// Allow taking of leases on files
CAP_LEASE = Cap(28)
CAP_AUDIT_WRITE = Cap(29)
CAP_AUDIT_CONTROL = Cap(30)
CAP_SETFCAP = Cap(31)
// Override MAC access.
// The base kernel enforces no MAC policy.
// An LSM may enforce a MAC policy, and if it does and it chooses
// to implement capability based overrides of that policy, this is
// the capability it should use to do so.
CAP_MAC_OVERRIDE = Cap(32)
// Allow MAC configuration or state changes.
// The base kernel requires no MAC configuration.
// An LSM may enforce a MAC policy, and if it does and it chooses
// to implement capability based checks on modifications to that
// policy or the data required to maintain it, this is the
// capability it should use to do so.
CAP_MAC_ADMIN = Cap(33)
// Allow configuring the kernel's syslog (printk behaviour)
CAP_SYSLOG = Cap(34)
// Allow triggering something that will wake the system
CAP_WAKE_ALARM = Cap(35)
// Allow preventing system suspends
CAP_BLOCK_SUSPEND = Cap(36)
// Allow reading the audit log via multicast netlink socket
CAP_AUDIT_READ = Cap(37)
// Allow system performance and observability privileged operations
// using perf_events, i915_perf and other kernel subsystems
CAP_PERFMON = Cap(38)
// CAP_BPF allows the following BPF operations:
// - Creating all types of BPF maps
// - Advanced verifier features
// - Indirect variable access
// - Bounded loops
// - BPF to BPF function calls
// - Scalar precision tracking
// - Larger complexity limits
// - Dead code elimination
// - And potentially other features
// - Loading BPF Type Format (BTF) data
// - Retrieve xlated and JITed code of BPF programs
// - Use bpf_spin_lock() helper
//
// CAP_PERFMON relaxes the verifier checks further:
// - BPF progs can use of pointer-to-integer conversions
// - speculation attack hardening measures are bypassed
// - bpf_probe_read to read arbitrary kernel memory is allowed
// - bpf_trace_printk to print kernel memory is allowed
//
// CAP_SYS_ADMIN is required to use bpf_probe_write_user.
//
// CAP_SYS_ADMIN is required to iterate system wide loaded
// programs, maps, links, BTFs and convert their IDs to file descriptors.
//
// CAP_PERFMON and CAP_BPF are required to load tracing programs.
// CAP_NET_ADMIN and CAP_BPF are required to load networking programs.
CAP_BPF = Cap(39)
// Allow checkpoint/restore related operations.
// Introduced in kernel 5.9
CAP_CHECKPOINT_RESTORE = Cap(40)
)
var (
// Highest valid capability of the running kernel.
CAP_LAST_CAP = Cap(63)
capUpperMask = ^uint32(0)
)

View file

@ -1,138 +0,0 @@
// generated file; DO NOT EDIT - use go generate in directory with source
package capability
func (c Cap) String() string {
switch c {
case CAP_CHOWN:
return "chown"
case CAP_DAC_OVERRIDE:
return "dac_override"
case CAP_DAC_READ_SEARCH:
return "dac_read_search"
case CAP_FOWNER:
return "fowner"
case CAP_FSETID:
return "fsetid"
case CAP_KILL:
return "kill"
case CAP_SETGID:
return "setgid"
case CAP_SETUID:
return "setuid"
case CAP_SETPCAP:
return "setpcap"
case CAP_LINUX_IMMUTABLE:
return "linux_immutable"
case CAP_NET_BIND_SERVICE:
return "net_bind_service"
case CAP_NET_BROADCAST:
return "net_broadcast"
case CAP_NET_ADMIN:
return "net_admin"
case CAP_NET_RAW:
return "net_raw"
case CAP_IPC_LOCK:
return "ipc_lock"
case CAP_IPC_OWNER:
return "ipc_owner"
case CAP_SYS_MODULE:
return "sys_module"
case CAP_SYS_RAWIO:
return "sys_rawio"
case CAP_SYS_CHROOT:
return "sys_chroot"
case CAP_SYS_PTRACE:
return "sys_ptrace"
case CAP_SYS_PACCT:
return "sys_pacct"
case CAP_SYS_ADMIN:
return "sys_admin"
case CAP_SYS_BOOT:
return "sys_boot"
case CAP_SYS_NICE:
return "sys_nice"
case CAP_SYS_RESOURCE:
return "sys_resource"
case CAP_SYS_TIME:
return "sys_time"
case CAP_SYS_TTY_CONFIG:
return "sys_tty_config"
case CAP_MKNOD:
return "mknod"
case CAP_LEASE:
return "lease"
case CAP_AUDIT_WRITE:
return "audit_write"
case CAP_AUDIT_CONTROL:
return "audit_control"
case CAP_SETFCAP:
return "setfcap"
case CAP_MAC_OVERRIDE:
return "mac_override"
case CAP_MAC_ADMIN:
return "mac_admin"
case CAP_SYSLOG:
return "syslog"
case CAP_WAKE_ALARM:
return "wake_alarm"
case CAP_BLOCK_SUSPEND:
return "block_suspend"
case CAP_AUDIT_READ:
return "audit_read"
case CAP_PERFMON:
return "perfmon"
case CAP_BPF:
return "bpf"
case CAP_CHECKPOINT_RESTORE:
return "checkpoint_restore"
}
return "unknown"
}
// List returns list of all supported capabilities
func List() []Cap {
return []Cap{
CAP_CHOWN,
CAP_DAC_OVERRIDE,
CAP_DAC_READ_SEARCH,
CAP_FOWNER,
CAP_FSETID,
CAP_KILL,
CAP_SETGID,
CAP_SETUID,
CAP_SETPCAP,
CAP_LINUX_IMMUTABLE,
CAP_NET_BIND_SERVICE,
CAP_NET_BROADCAST,
CAP_NET_ADMIN,
CAP_NET_RAW,
CAP_IPC_LOCK,
CAP_IPC_OWNER,
CAP_SYS_MODULE,
CAP_SYS_RAWIO,
CAP_SYS_CHROOT,
CAP_SYS_PTRACE,
CAP_SYS_PACCT,
CAP_SYS_ADMIN,
CAP_SYS_BOOT,
CAP_SYS_NICE,
CAP_SYS_RESOURCE,
CAP_SYS_TIME,
CAP_SYS_TTY_CONFIG,
CAP_MKNOD,
CAP_LEASE,
CAP_AUDIT_WRITE,
CAP_AUDIT_CONTROL,
CAP_SETFCAP,
CAP_MAC_OVERRIDE,
CAP_MAC_ADMIN,
CAP_SYSLOG,
CAP_WAKE_ALARM,
CAP_BLOCK_SUSPEND,
CAP_AUDIT_READ,
CAP_PERFMON,
CAP_BPF,
CAP_CHECKPOINT_RESTORE,
}
}

View file

@ -1,154 +0,0 @@
// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package capability
import (
"syscall"
"unsafe"
)
type capHeader struct {
version uint32
pid int32
}
type capData struct {
effective uint32
permitted uint32
inheritable uint32
}
func capget(hdr *capHeader, data *capData) (err error) {
_, _, e1 := syscall.Syscall(syscall.SYS_CAPGET, uintptr(unsafe.Pointer(hdr)), uintptr(unsafe.Pointer(data)), 0)
if e1 != 0 {
err = e1
}
return
}
func capset(hdr *capHeader, data *capData) (err error) {
_, _, e1 := syscall.Syscall(syscall.SYS_CAPSET, uintptr(unsafe.Pointer(hdr)), uintptr(unsafe.Pointer(data)), 0)
if e1 != 0 {
err = e1
}
return
}
// not yet in syscall
const (
pr_CAP_AMBIENT = 47
pr_CAP_AMBIENT_IS_SET = uintptr(1)
pr_CAP_AMBIENT_RAISE = uintptr(2)
pr_CAP_AMBIENT_LOWER = uintptr(3)
pr_CAP_AMBIENT_CLEAR_ALL = uintptr(4)
)
func prctl(option int, arg2, arg3, arg4, arg5 uintptr) (err error) {
_, _, e1 := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0)
if e1 != 0 {
err = e1
}
return
}
const (
vfsXattrName = "security.capability"
vfsCapVerMask = 0xff000000
vfsCapVer1 = 0x01000000
vfsCapVer2 = 0x02000000
vfsCapFlagMask = ^vfsCapVerMask
vfsCapFlageffective = 0x000001
vfscapDataSizeV1 = 4 * (1 + 2*1)
vfscapDataSizeV2 = 4 * (1 + 2*2)
)
type vfscapData struct {
magic uint32
data [2]struct {
permitted uint32
inheritable uint32
}
effective [2]uint32
version int8
}
var (
_vfsXattrName *byte
)
func init() {
_vfsXattrName, _ = syscall.BytePtrFromString(vfsXattrName)
}
func getVfsCap(path string, dest *vfscapData) (err error) {
var _p0 *byte
_p0, err = syscall.BytePtrFromString(path)
if err != nil {
return
}
r0, _, e1 := syscall.Syscall6(syscall.SYS_GETXATTR, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_vfsXattrName)), uintptr(unsafe.Pointer(dest)), vfscapDataSizeV2, 0, 0)
if e1 != 0 {
if e1 == syscall.ENODATA {
dest.version = 2
return
}
err = e1
}
switch dest.magic & vfsCapVerMask {
case vfsCapVer1:
dest.version = 1
if r0 != vfscapDataSizeV1 {
return syscall.EINVAL
}
dest.data[1].permitted = 0
dest.data[1].inheritable = 0
case vfsCapVer2:
dest.version = 2
if r0 != vfscapDataSizeV2 {
return syscall.EINVAL
}
default:
return syscall.EINVAL
}
if dest.magic&vfsCapFlageffective != 0 {
dest.effective[0] = dest.data[0].permitted | dest.data[0].inheritable
dest.effective[1] = dest.data[1].permitted | dest.data[1].inheritable
} else {
dest.effective[0] = 0
dest.effective[1] = 0
}
return
}
func setVfsCap(path string, data *vfscapData) (err error) {
var _p0 *byte
_p0, err = syscall.BytePtrFromString(path)
if err != nil {
return
}
var size uintptr
if data.version == 1 {
data.magic = vfsCapVer1
size = vfscapDataSizeV1
} else if data.version == 2 {
data.magic = vfsCapVer2
if data.effective[0] != 0 || data.effective[1] != 0 {
data.magic |= vfsCapFlageffective
}
size = vfscapDataSizeV2
} else {
return syscall.EINVAL
}
_, _, e1 := syscall.Syscall6(syscall.SYS_SETXATTR, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_vfsXattrName)), uintptr(unsafe.Pointer(data)), size, 0, 0)
if e1 != 0 {
err = e1
}
return
}