moby/profiles/seccomp/seccomp.go
Justin Cormack 15ff09395c
If container will run as non root user, drop permitted, effective caps early
As soon as the initial executable in the container is executed as a non root user,
permitted and effective capabilities are dropped. Drop them earlier than this, so
that they are dropped before executing the file. The main effect of this is that
if `CAP_DAC_OVERRIDE` is set (the default) the user will not be able to execute
files they do not have permission to execute, which previously they could.

The old behaviour was somewhat surprising and the new one is definitely correct,
but it is not in any meaningful way exploitable, and I do not think it is
necessary to backport this fix. It is unlikely to have any negative effects as
almost all executables have world execute permission anyway.

Use the bounding set not the effective set as the canonical set of capabilities, as
effective will now vary.

Signed-off-by: Justin Cormack <justin.cormack@docker.com>
2018-03-19 14:45:27 -07:00

160 lines
4.3 KiB
Go

// +build linux
package seccomp // import "github.com/docker/docker/profiles/seccomp"
import (
"encoding/json"
"errors"
"fmt"
"github.com/docker/docker/api/types"
"github.com/opencontainers/runtime-spec/specs-go"
libseccomp "github.com/seccomp/libseccomp-golang"
)
//go:generate go run -tags 'seccomp' generate.go
// GetDefaultProfile returns the default seccomp profile.
func GetDefaultProfile(rs *specs.Spec) (*specs.LinuxSeccomp, error) {
return setupSeccomp(DefaultProfile(), rs)
}
// LoadProfile takes a json string and decodes the seccomp profile.
func LoadProfile(body string, rs *specs.Spec) (*specs.LinuxSeccomp, error) {
var config types.Seccomp
if err := json.Unmarshal([]byte(body), &config); err != nil {
return nil, fmt.Errorf("Decoding seccomp profile failed: %v", err)
}
return setupSeccomp(&config, rs)
}
var nativeToSeccomp = map[string]types.Arch{
"amd64": types.ArchX86_64,
"arm64": types.ArchAARCH64,
"mips64": types.ArchMIPS64,
"mips64n32": types.ArchMIPS64N32,
"mipsel64": types.ArchMIPSEL64,
"mipsel64n32": types.ArchMIPSEL64N32,
"s390x": types.ArchS390X,
}
// inSlice tests whether a string is contained in a slice of strings or not.
// Comparison is case sensitive
func inSlice(slice []string, s string) bool {
for _, ss := range slice {
if s == ss {
return true
}
}
return false
}
func setupSeccomp(config *types.Seccomp, rs *specs.Spec) (*specs.LinuxSeccomp, error) {
if config == nil {
return nil, nil
}
// No default action specified, no syscalls listed, assume seccomp disabled
if config.DefaultAction == "" && len(config.Syscalls) == 0 {
return nil, nil
}
newConfig := &specs.LinuxSeccomp{}
var arch string
var native, err = libseccomp.GetNativeArch()
if err == nil {
arch = native.String()
}
if len(config.Architectures) != 0 && len(config.ArchMap) != 0 {
return nil, errors.New("'architectures' and 'archMap' were specified in the seccomp profile, use either 'architectures' or 'archMap'")
}
// if config.Architectures == 0 then libseccomp will figure out the architecture to use
if len(config.Architectures) != 0 {
for _, a := range config.Architectures {
newConfig.Architectures = append(newConfig.Architectures, specs.Arch(a))
}
}
if len(config.ArchMap) != 0 {
for _, a := range config.ArchMap {
seccompArch, ok := nativeToSeccomp[arch]
if ok {
if a.Arch == seccompArch {
newConfig.Architectures = append(newConfig.Architectures, specs.Arch(a.Arch))
for _, sa := range a.SubArches {
newConfig.Architectures = append(newConfig.Architectures, specs.Arch(sa))
}
break
}
}
}
}
newConfig.DefaultAction = specs.LinuxSeccompAction(config.DefaultAction)
Loop:
// Loop through all syscall blocks and convert them to libcontainer format after filtering them
for _, call := range config.Syscalls {
if len(call.Excludes.Arches) > 0 {
if inSlice(call.Excludes.Arches, arch) {
continue Loop
}
}
if len(call.Excludes.Caps) > 0 {
for _, c := range call.Excludes.Caps {
if inSlice(rs.Process.Capabilities.Bounding, c) {
continue Loop
}
}
}
if len(call.Includes.Arches) > 0 {
if !inSlice(call.Includes.Arches, arch) {
continue Loop
}
}
if len(call.Includes.Caps) > 0 {
for _, c := range call.Includes.Caps {
if !inSlice(rs.Process.Capabilities.Bounding, c) {
continue Loop
}
}
}
if call.Name != "" && len(call.Names) != 0 {
return nil, errors.New("'name' and 'names' were specified in the seccomp profile, use either 'name' or 'names'")
}
if call.Name != "" {
newConfig.Syscalls = append(newConfig.Syscalls, createSpecsSyscall(call.Name, call.Action, call.Args))
}
for _, n := range call.Names {
newConfig.Syscalls = append(newConfig.Syscalls, createSpecsSyscall(n, call.Action, call.Args))
}
}
return newConfig, nil
}
func createSpecsSyscall(name string, action types.Action, args []*types.Arg) specs.LinuxSyscall {
newCall := specs.LinuxSyscall{
Names: []string{name},
Action: specs.LinuxSeccompAction(action),
}
// Loop through all the arguments of the syscall and convert them
for _, arg := range args {
newArg := specs.LinuxSeccompArg{
Index: arg.Index,
Value: arg.Value,
ValueTwo: arg.ValueTwo,
Op: specs.LinuxSeccompOperator(arg.Op),
}
newCall.Args = append(newCall.Args, newArg)
}
return newCall
}