Align default seccomp profile with selected capabilities

Currently the default seccomp profile is fixed. This changes it
so that it varies depending on the Linux capabilities selected with
the --cap-add and --cap-drop options. Without this, if a user adds
privileges, eg to allow ptrace with --cap-add sys_ptrace then still
cannot actually use ptrace as it is still blocked by seccomp, so
they will probably disable seccomp or use --privileged. With this
change the syscalls that are needed for the capability are also
allowed by the seccomp profile based on the selected capabilities.

While this patch makes it easier to do things with for example
cap_sys_admin enabled, as it will now allow creating new namespaces
and use of mount, it still allows less than --cap-add cap_sys_admin
--security-opt seccomp:unconfined would have previously. It is not
recommended that users run containers with cap_sys_admin as this does
give full access to the host machine.

It also cleans up some architecture specific system calls to be
only selected when needed.

Signed-off-by: Justin Cormack <justin.cormack@docker.com>
This commit is contained in:
Justin Cormack 2016-05-06 15:17:41 +01:00
parent af60a9e599
commit a01c4dc8f8
8 changed files with 464 additions and 197 deletions

View file

@ -35,7 +35,7 @@ func setSeccomp(daemon *Daemon, rs *specs.Spec, c *container.Container) error {
return err
}
} else {
profile, err = seccomp.GetDefaultProfile()
profile, err = seccomp.GetDefaultProfile(rs)
if err != nil {
return err
}

View file

@ -1071,14 +1071,6 @@ one can use this flag:
--privileged=false: Give extended privileges to this container
--device=[]: Allows you to run devices inside the container without the --privileged flag.
> **Note:**
> With Docker 1.10 and greater, the default seccomp profile will also block
> syscalls, regardless of `--cap-add` passed to the container. We recommend in
> these cases to create your own custom seccomp profile based off our
> [default](https://github.com/docker/docker/blob/master/profiles/seccomp/default.json).
> Or if you don't want to run with the default seccomp profile, you can pass
> `--security-opt=seccomp=unconfined` on run.
By default, Docker containers are "unprivileged" and cannot, for
example, run a Docker daemon inside a Docker container. This is because
by default a container is not allowed to access any devices, but a
@ -1196,6 +1188,11 @@ To mount a FUSE based filesystem, you need to combine both `--cap-add` and
-rw-rw-r-- 1 1000 1000 461 Dec 4 06:08 .gitignore
....
The default seccomp profile will adjust to the selected capabilities, in order to allow
use of facilities allowed by the capabilities, so you should not have to adjust this,
since Docker 1.12. In Docker 1.10 and 1.11 this did not happen and it may be necessary
to use a custom seccomp profile or use `--security-opt seccomp=unconfined` when adding
capabilities.
## Logging drivers (--log-driver)

View file

@ -948,10 +948,10 @@ func (s *DockerSuite) TestRunSeccompDefaultProfile(c *check.C) {
testRequires(c, SameHostDaemon, seccompEnabled, NotUserNamespace)
var group sync.WaitGroup
group.Add(4)
group.Add(11)
errChan := make(chan error, 4)
go func() {
out, _, err := dockerCmdWithError("run", "--cap-add", "ALL", "syscall-test", "acct-test")
out, _, err := dockerCmdWithError("run", "syscall-test", "acct-test")
if err == nil || !strings.Contains(out, "Operation not permitted") {
errChan <- fmt.Errorf("expected Operation not permitted, got: %s", out)
}
@ -959,13 +959,69 @@ func (s *DockerSuite) TestRunSeccompDefaultProfile(c *check.C) {
}()
go func() {
out, _, err := dockerCmdWithError("run", "--cap-add", "ALL", "syscall-test", "ns-test", "echo", "hello")
out, _, err := dockerCmdWithError("run", "--cap-add", "sys_admin", "syscall-test", "acct-test")
if err == nil || !strings.Contains(out, "Operation not permitted") {
errChan <- fmt.Errorf("expected Operation not permitted, got: %s", out)
}
group.Done()
}()
go func() {
out, _, err := dockerCmdWithError("run", "--cap-add", "sys_pacct", "syscall-test", "acct-test")
if err == nil || !strings.Contains(out, "No such file or directory") {
errChan <- fmt.Errorf("expected No such file or directory, got: %s", out)
}
group.Done()
}()
go func() {
out, _, err := dockerCmdWithError("run", "--cap-add", "ALL", "syscall-test", "acct-test")
if err == nil || !strings.Contains(out, "No such file or directory") {
errChan <- fmt.Errorf("expected No such file or directory, got: %s", out)
}
group.Done()
}()
go func() {
out, _, err := dockerCmdWithError("run", "--cap-drop", "ALL", "--cap-add", "sys_pacct", "syscall-test", "acct-test")
if err == nil || !strings.Contains(out, "No such file or directory") {
errChan <- fmt.Errorf("expected No such file or directory, got: %s", out)
}
group.Done()
}()
go func() {
out, _, err := dockerCmdWithError("run", "syscall-test", "ns-test", "echo", "hello0")
if err == nil || !strings.Contains(out, "Operation not permitted") {
errChan <- fmt.Errorf("expected Operation not permitted, got: %s", out)
}
group.Done()
}()
go func() {
out, _, err := dockerCmdWithError("run", "--cap-add", "sys_admin", "syscall-test", "ns-test", "echo", "hello1")
if err != nil || !strings.Contains(out, "hello1") {
errChan <- fmt.Errorf("expected hello1, got: %s, %v", out, err)
}
group.Done()
}()
go func() {
out, _, err := dockerCmdWithError("run", "--cap-drop", "all", "--cap-add", "sys_admin", "syscall-test", "ns-test", "echo", "hello2")
if err != nil || !strings.Contains(out, "hello2") {
errChan <- fmt.Errorf("expected hello2, got: %s, %v", out, err)
}
group.Done()
}()
go func() {
out, _, err := dockerCmdWithError("run", "--cap-add", "ALL", "syscall-test", "ns-test", "echo", "hello3")
if err != nil || !strings.Contains(out, "hello3") {
errChan <- fmt.Errorf("expected hello3, got: %s, %v", out, err)
}
group.Done()
}()
go func() {
out, _, err := dockerCmdWithError("run", "--cap-add", "ALL", "--security-opt", "seccomp=unconfined", "syscall-test", "acct-test")
if err == nil || !strings.Contains(out, "No such file or directory") {
@ -975,9 +1031,9 @@ func (s *DockerSuite) TestRunSeccompDefaultProfile(c *check.C) {
}()
go func() {
out, _, err := dockerCmdWithError("run", "--cap-add", "ALL", "--security-opt", "seccomp=unconfined", "syscall-test", "ns-test", "echo", "hello")
if err != nil || !strings.Contains(out, "hello") {
errChan <- fmt.Errorf("expected hello, got: %s, %v", out, err)
out, _, err := dockerCmdWithError("run", "--cap-add", "ALL", "--security-opt", "seccomp=unconfined", "syscall-test", "ns-test", "echo", "hello4")
if err != nil || !strings.Contains(out, "hello4") {
errChan <- fmt.Errorf("expected hello4, got: %s, %v", out, err)
}
group.Done()
}()

View file

@ -26,11 +26,6 @@
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "arch_prctl",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "bind",
"action": "SCMP_ACT_ALLOW",
@ -61,21 +56,6 @@
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "chown",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "chown32",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "chroot",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "clock_getres",
"action": "SCMP_ACT_ALLOW",
@ -91,18 +71,6 @@
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "clone",
"action": "SCMP_ACT_ALLOW",
"args": [
{
"index": 0,
"value": 2080505856,
"valueTwo": 0,
"op": "SCMP_CMP_MASKED_EQ"
}
]
},
{
"name": "close",
"action": "SCMP_ACT_ALLOW",
@ -223,11 +191,6 @@
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "fanotify_init",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "fanotify_mark",
"action": "SCMP_ACT_ALLOW",
@ -248,21 +211,6 @@
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "fchown",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "fchown32",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "fchownat",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "fcntl",
"action": "SCMP_ACT_ALLOW",
@ -608,16 +556,6 @@
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "lchown",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "lchown32",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "lgetxattr",
"action": "SCMP_ACT_ALLOW",
@ -1164,11 +1102,6 @@
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "setdomainname",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "setfsgid",
"action": "SCMP_ACT_ALLOW",
@ -1209,11 +1142,6 @@
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "sethostname",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "setitimer",
"action": "SCMP_ACT_ALLOW",
@ -1579,23 +1507,70 @@
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "arch_prctl",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "modify_ldt",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "breakpoint",
"name": "chown",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "cacheflush",
"name": "chown32",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "set_tls",
"name": "fchown",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "fchown32",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "fchownat",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "lchown",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "lchown32",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "chroot",
"action": "SCMP_ACT_ALLOW",
"args": []
},
{
"name": "clone",
"action": "SCMP_ACT_ALLOW",
"args": [
{
"index": 0,
"value": 2080505856,
"valueTwo": 0,
"op": "SCMP_CMP_MASKED_EQ"
}
]
},
{
"name": "fchown",
"action": "SCMP_ACT_ALLOW",
"args": []
}

View file

@ -8,6 +8,7 @@ import (
"os"
"path/filepath"
"github.com/docker/docker/oci"
"github.com/docker/docker/profiles/seccomp"
)
@ -20,8 +21,10 @@ func main() {
}
f := filepath.Join(wd, "default.json")
rs := oci.DefaultSpec()
// write the default profile to the file
b, err := json.MarshalIndent(seccomp.DefaultProfile, "", "\t")
b, err := json.MarshalIndent(seccomp.DefaultProfile(&rs), "", "\t")
if err != nil {
panic(err)
}

View file

@ -13,8 +13,8 @@ import (
//go:generate go run -tags 'seccomp' generate.go
// GetDefaultProfile returns the default seccomp profile.
func GetDefaultProfile() (*specs.Seccomp, error) {
return setupSeccomp(DefaultProfile)
func GetDefaultProfile(rs *specs.Spec) (*specs.Seccomp, error) {
return setupSeccomp(DefaultProfile(rs))
}
// LoadProfile takes a file path and decodes the seccomp profile.

View file

@ -6,6 +6,7 @@ import (
"syscall"
"github.com/docker/engine-api/types"
"github.com/opencontainers/specs/specs-go"
libseccomp "github.com/seccomp/libseccomp-golang"
)
@ -34,10 +35,9 @@ func arches() []types.Arch {
}
// DefaultProfile defines the whitelist for the default seccomp profile.
var DefaultProfile = &types.Seccomp{
DefaultAction: types.ActErrno,
Architectures: arches(),
Syscalls: []*types.Syscall{
func DefaultProfile(rs *specs.Spec) *types.Seccomp {
syscalls := []*types.Syscall{
{
Name: "accept",
Action: types.ActAllow,
@ -58,11 +58,6 @@ var DefaultProfile = &types.Seccomp{
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "arch_prctl",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "bind",
Action: types.ActAllow,
@ -93,21 +88,6 @@ var DefaultProfile = &types.Seccomp{
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "chown",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "chown32",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "chroot",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "clock_getres",
Action: types.ActAllow,
@ -123,18 +103,6 @@ var DefaultProfile = &types.Seccomp{
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "clone",
Action: types.ActAllow,
Args: []*types.Arg{
{
Index: 0,
Value: syscall.CLONE_NEWNS | syscall.CLONE_NEWUTS | syscall.CLONE_NEWIPC | syscall.CLONE_NEWUSER | syscall.CLONE_NEWPID | syscall.CLONE_NEWNET,
ValueTwo: 0,
Op: types.OpMaskedEqual,
},
},
},
{
Name: "close",
Action: types.ActAllow,
@ -255,11 +223,6 @@ var DefaultProfile = &types.Seccomp{
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "fanotify_init",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "fanotify_mark",
Action: types.ActAllow,
@ -280,21 +243,6 @@ var DefaultProfile = &types.Seccomp{
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "fchown",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "fchown32",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "fchownat",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "fcntl",
Action: types.ActAllow,
@ -640,16 +588,6 @@ var DefaultProfile = &types.Seccomp{
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "lchown",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "lchown32",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "lgetxattr",
Action: types.ActAllow,
@ -1193,11 +1131,6 @@ var DefaultProfile = &types.Seccomp{
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "setdomainname",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "setfsgid",
Action: types.ActAllow,
@ -1238,11 +1171,6 @@ var DefaultProfile = &types.Seccomp{
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "sethostname",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "setitimer",
Action: types.ActAllow,
@ -1608,27 +1536,332 @@ var DefaultProfile = &types.Seccomp{
Action: types.ActAllow,
Args: []*types.Arg{},
},
// i386 specific syscalls
{
Name: "modify_ldt",
Action: types.ActAllow,
Args: []*types.Arg{},
},
// arm specific syscalls
{
Name: "breakpoint",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "cacheflush",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "set_tls",
Action: types.ActAllow,
Args: []*types.Arg{},
},
},
}
var arch string
var native, err = libseccomp.GetNativeArch()
if err == nil {
arch = native.String()
}
switch arch {
case "arm", "arm64":
syscalls = append(syscalls, []*types.Syscall{
{
Name: "breakpoint",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "cacheflush",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "set_tls",
Action: types.ActAllow,
Args: []*types.Arg{},
},
}...)
case "amd64", "x32":
syscalls = append(syscalls, []*types.Syscall{
{
Name: "arch_prctl",
Action: types.ActAllow,
Args: []*types.Arg{},
},
}...)
fallthrough
case "x86":
syscalls = append(syscalls, []*types.Syscall{
{
Name: "modify_ldt",
Action: types.ActAllow,
Args: []*types.Arg{},
},
}...)
}
capSysAdmin := false
var cap string
for _, cap = range rs.Process.Capabilities {
switch cap {
case "CAP_CHOWN":
syscalls = append(syscalls, []*types.Syscall{
{
Name: "chown",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "chown32",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "fchown",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "fchown32",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "fchownat",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "lchown",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "lchown32",
Action: types.ActAllow,
Args: []*types.Arg{},
},
}...)
case "CAP_DAC_READ_SEARCH":
syscalls = append(syscalls, []*types.Syscall{
{
Name: "name_to_handle_at",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "open_by_handle_at",
Action: types.ActAllow,
Args: []*types.Arg{},
},
}...)
case "CAP_IPC_LOCK":
syscalls = append(syscalls, []*types.Syscall{
{
Name: "mlock",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "mlock2",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "mlockall",
Action: types.ActAllow,
Args: []*types.Arg{},
},
}...)
case "CAP_SYS_ADMIN":
capSysAdmin = true
syscalls = append(syscalls, []*types.Syscall{
{
Name: "bpf",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "clone",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "fanotify_init",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "lookup_dcookie",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "mount",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "perf_event_open",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "setdomainname",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "sethostname",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "setns",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "umount",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "umount2",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "unshare",
Action: types.ActAllow,
Args: []*types.Arg{},
},
}...)
case "CAP_SYS_BOOT":
syscalls = append(syscalls, []*types.Syscall{
{
Name: "reboot",
Action: types.ActAllow,
Args: []*types.Arg{},
},
}...)
case "CAP_SYS_CHROOT":
syscalls = append(syscalls, []*types.Syscall{
{
Name: "chroot",
Action: types.ActAllow,
Args: []*types.Arg{},
},
}...)
case "CAP_SYS_MODULE":
syscalls = append(syscalls, []*types.Syscall{
{
Name: "delete_module",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "init_module",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "finit_module",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "query_module",
Action: types.ActAllow,
Args: []*types.Arg{},
},
}...)
case "CAP_SYS_PACCT":
syscalls = append(syscalls, []*types.Syscall{
{
Name: "acct",
Action: types.ActAllow,
Args: []*types.Arg{},
},
}...)
case "CAP_SYS_PTRACE":
syscalls = append(syscalls, []*types.Syscall{
{
Name: "kcmp",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "process_vm_readv",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "process_vm_writev",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "ptrace",
Action: types.ActAllow,
Args: []*types.Arg{},
},
}...)
case "CAP_SYS_RAWIO":
syscalls = append(syscalls, []*types.Syscall{
{
Name: "iopl",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "ioperm",
Action: types.ActAllow,
Args: []*types.Arg{},
},
}...)
case "CAP_SYS_TIME":
syscalls = append(syscalls, []*types.Syscall{
{
Name: "settimeofday",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "stime",
Action: types.ActAllow,
Args: []*types.Arg{},
},
{
Name: "adjtimex",
Action: types.ActAllow,
Args: []*types.Arg{},
},
}...)
case "CAP_SYS_TTY_CONFIG":
syscalls = append(syscalls, []*types.Syscall{
{
Name: "vhangup",
Action: types.ActAllow,
Args: []*types.Arg{},
},
}...)
}
}
if !capSysAdmin {
syscalls = append(syscalls, []*types.Syscall{
{
Name: "clone",
Action: types.ActAllow,
Args: []*types.Arg{
{
Index: 0,
Value: syscall.CLONE_NEWNS | syscall.CLONE_NEWUTS | syscall.CLONE_NEWIPC | syscall.CLONE_NEWUSER | syscall.CLONE_NEWPID | syscall.CLONE_NEWNET,
ValueTwo: 0,
Op: types.OpMaskedEqual,
},
},
},
}...)
}
// We need some additional syscalls in this case see #22252
if !rs.Process.NoNewPrivileges {
syscalls = append(syscalls, []*types.Syscall{
{
Name: "fchown",
Action: types.ActAllow,
Args: []*types.Arg{},
},
}...)
}
return &types.Seccomp{
DefaultAction: types.ActErrno,
Architectures: arches(),
Syscalls: syscalls,
}
}

View file

@ -2,9 +2,12 @@
package seccomp
import "github.com/docker/engine-api/types"
var (
// DefaultProfile is a nil pointer on unsupported systems.
DefaultProfile *types.Seccomp
import (
"github.com/docker/engine-api/types"
"github.com/opencontainers/specs/specs-go"
)
// DefaultProfile returns a nil pointer on unsupported systems.
func DefaultProfile(rs *specs.Spec) *types.Seccomp {
return nil
}