15ff09395c
As soon as the initial executable in the container is executed as a non root user, permitted and effective capabilities are dropped. Drop them earlier than this, so that they are dropped before executing the file. The main effect of this is that if `CAP_DAC_OVERRIDE` is set (the default) the user will not be able to execute files they do not have permission to execute, which previously they could. The old behaviour was somewhat surprising and the new one is definitely correct, but it is not in any meaningful way exploitable, and I do not think it is necessary to backport this fix. It is unlikely to have any negative effects as almost all executables have world execute permission anyway. Use the bounding set not the effective set as the canonical set of capabilities, as effective will now vary. Signed-off-by: Justin Cormack <justin.cormack@docker.com>
160 lines
4.3 KiB
Go
160 lines
4.3 KiB
Go
// +build linux
|
|
|
|
package seccomp // import "github.com/docker/docker/profiles/seccomp"
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
|
|
"github.com/docker/docker/api/types"
|
|
"github.com/opencontainers/runtime-spec/specs-go"
|
|
libseccomp "github.com/seccomp/libseccomp-golang"
|
|
)
|
|
|
|
//go:generate go run -tags 'seccomp' generate.go
|
|
|
|
// GetDefaultProfile returns the default seccomp profile.
|
|
func GetDefaultProfile(rs *specs.Spec) (*specs.LinuxSeccomp, error) {
|
|
return setupSeccomp(DefaultProfile(), rs)
|
|
}
|
|
|
|
// LoadProfile takes a json string and decodes the seccomp profile.
|
|
func LoadProfile(body string, rs *specs.Spec) (*specs.LinuxSeccomp, error) {
|
|
var config types.Seccomp
|
|
if err := json.Unmarshal([]byte(body), &config); err != nil {
|
|
return nil, fmt.Errorf("Decoding seccomp profile failed: %v", err)
|
|
}
|
|
return setupSeccomp(&config, rs)
|
|
}
|
|
|
|
var nativeToSeccomp = map[string]types.Arch{
|
|
"amd64": types.ArchX86_64,
|
|
"arm64": types.ArchAARCH64,
|
|
"mips64": types.ArchMIPS64,
|
|
"mips64n32": types.ArchMIPS64N32,
|
|
"mipsel64": types.ArchMIPSEL64,
|
|
"mipsel64n32": types.ArchMIPSEL64N32,
|
|
"s390x": types.ArchS390X,
|
|
}
|
|
|
|
// inSlice tests whether a string is contained in a slice of strings or not.
|
|
// Comparison is case sensitive
|
|
func inSlice(slice []string, s string) bool {
|
|
for _, ss := range slice {
|
|
if s == ss {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func setupSeccomp(config *types.Seccomp, rs *specs.Spec) (*specs.LinuxSeccomp, error) {
|
|
if config == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
// No default action specified, no syscalls listed, assume seccomp disabled
|
|
if config.DefaultAction == "" && len(config.Syscalls) == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
newConfig := &specs.LinuxSeccomp{}
|
|
|
|
var arch string
|
|
var native, err = libseccomp.GetNativeArch()
|
|
if err == nil {
|
|
arch = native.String()
|
|
}
|
|
|
|
if len(config.Architectures) != 0 && len(config.ArchMap) != 0 {
|
|
return nil, errors.New("'architectures' and 'archMap' were specified in the seccomp profile, use either 'architectures' or 'archMap'")
|
|
}
|
|
|
|
// if config.Architectures == 0 then libseccomp will figure out the architecture to use
|
|
if len(config.Architectures) != 0 {
|
|
for _, a := range config.Architectures {
|
|
newConfig.Architectures = append(newConfig.Architectures, specs.Arch(a))
|
|
}
|
|
}
|
|
|
|
if len(config.ArchMap) != 0 {
|
|
for _, a := range config.ArchMap {
|
|
seccompArch, ok := nativeToSeccomp[arch]
|
|
if ok {
|
|
if a.Arch == seccompArch {
|
|
newConfig.Architectures = append(newConfig.Architectures, specs.Arch(a.Arch))
|
|
for _, sa := range a.SubArches {
|
|
newConfig.Architectures = append(newConfig.Architectures, specs.Arch(sa))
|
|
}
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
newConfig.DefaultAction = specs.LinuxSeccompAction(config.DefaultAction)
|
|
|
|
Loop:
|
|
// Loop through all syscall blocks and convert them to libcontainer format after filtering them
|
|
for _, call := range config.Syscalls {
|
|
if len(call.Excludes.Arches) > 0 {
|
|
if inSlice(call.Excludes.Arches, arch) {
|
|
continue Loop
|
|
}
|
|
}
|
|
if len(call.Excludes.Caps) > 0 {
|
|
for _, c := range call.Excludes.Caps {
|
|
if inSlice(rs.Process.Capabilities.Bounding, c) {
|
|
continue Loop
|
|
}
|
|
}
|
|
}
|
|
if len(call.Includes.Arches) > 0 {
|
|
if !inSlice(call.Includes.Arches, arch) {
|
|
continue Loop
|
|
}
|
|
}
|
|
if len(call.Includes.Caps) > 0 {
|
|
for _, c := range call.Includes.Caps {
|
|
if !inSlice(rs.Process.Capabilities.Bounding, c) {
|
|
continue Loop
|
|
}
|
|
}
|
|
}
|
|
|
|
if call.Name != "" && len(call.Names) != 0 {
|
|
return nil, errors.New("'name' and 'names' were specified in the seccomp profile, use either 'name' or 'names'")
|
|
}
|
|
|
|
if call.Name != "" {
|
|
newConfig.Syscalls = append(newConfig.Syscalls, createSpecsSyscall(call.Name, call.Action, call.Args))
|
|
}
|
|
|
|
for _, n := range call.Names {
|
|
newConfig.Syscalls = append(newConfig.Syscalls, createSpecsSyscall(n, call.Action, call.Args))
|
|
}
|
|
}
|
|
|
|
return newConfig, nil
|
|
}
|
|
|
|
func createSpecsSyscall(name string, action types.Action, args []*types.Arg) specs.LinuxSyscall {
|
|
newCall := specs.LinuxSyscall{
|
|
Names: []string{name},
|
|
Action: specs.LinuxSeccompAction(action),
|
|
}
|
|
|
|
// Loop through all the arguments of the syscall and convert them
|
|
for _, arg := range args {
|
|
newArg := specs.LinuxSeccompArg{
|
|
Index: arg.Index,
|
|
Value: arg.Value,
|
|
ValueTwo: arg.ValueTwo,
|
|
Op: specs.LinuxSeccompOperator(arg.Op),
|
|
}
|
|
|
|
newCall.Args = append(newCall.Args, newArg)
|
|
}
|
|
return newCall
|
|
}
|