瀏覽代碼

set default seccomp profile

Signed-off-by: Jessica Frazelle <acidburn@docker.com>
Jessica Frazelle 9 年之前
父節點
當前提交
947293a280

+ 5 - 0
daemon/execdriver/native/create.go

@@ -69,6 +69,10 @@ func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks)
 		if err := d.setCapabilities(container, c); err != nil {
 			return nil, err
 		}
+
+		if c.SeccompProfile == "" {
+			container.Seccomp = getDefaultSeccompProfile()
+		}
 	}
 	// add CAP_ prefix to all caps for new libcontainer update to match
 	// the spec format.
@@ -89,6 +93,7 @@ func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks)
 			return nil, err
 		}
 	}
+
 	if err := execdriver.SetupCgroups(container, c); err != nil {
 		return nil, err
 	}

+ 4 - 0
daemon/execdriver/native/seccomp.go

@@ -12,6 +12,10 @@ import (
 	"github.com/opencontainers/specs"
 )
 
+func getDefaultSeccompProfile() *configs.Seccomp {
+	return defaultSeccompProfile
+}
+
 func loadSeccompProfile(path string) (*configs.Seccomp, error) {
 	f, err := ioutil.ReadFile(path)
 	if err != nil {

+ 319 - 0
daemon/execdriver/native/seccomp_default.go

@@ -0,0 +1,319 @@
+// +build linux
+
+package native
+
+import "github.com/opencontainers/runc/libcontainer/configs"
+
+var defaultSeccompProfile = &configs.Seccomp{
+	DefaultAction: configs.Allow,
+	Syscalls: []*configs.Syscall{
+		{
+			// Quota and Accounting syscalls which could let containers
+			// disable their own resource limits or process accounting
+			Name:   "acct",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Prevent containers from using the kernel keyring,
+			// which is not namespaced
+			Name:   "add_key",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Similar to clock_settime and settimeofday
+			// Time/Date is not namespaced
+			Name:   "adjtimex",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Time/Date is not namespaced
+			Name:   "clock_settime",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Deny cloning new namespaces
+			Name:   "clone",
+			Action: configs.Errno,
+			Args: []*configs.Arg{
+				{
+					// flags from sched.h
+					// CLONE_NEWUTS		0x04000000
+					// CLONE_NEWIPC		0x08000000
+					// CLONE_NEWUSER	0x10000000
+					// CLONE_NEWPID		0x20000000
+					// CLONE_NEWNET		0x40000000
+					Index: 0,
+					Value: uint64(0x04000000),
+					Op:    configs.GreaterThanOrEqualTo,
+				},
+				{
+					// flags from sched.h
+					// CLONE_NEWNS		0x00020000
+					Index: 0,
+					Value: uint64(0x00020000),
+					Op:    configs.EqualTo,
+				},
+			},
+		},
+		{
+			// Deny manipulation and functions on kernel modules.
+			Name:   "create_module",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Deny manipulation and functions on kernel modules.
+			Name:   "delete_module",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Deny retrieval of exported kernel and module symbols
+			Name:   "get_kernel_syms",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Terrifying syscalls that modify kernel memory and NUMA settings.
+			// They're gated by CAP_SYS_NICE,
+			// which we do not retain by default in containers.
+			Name:   "get_mempolicy",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Deny getting the list of robust futexes
+			Name:   "get_robust_list",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Deny manipulation and functions on kernel modules.
+			Name:   "init_module",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Prevent containers from modifying kernel I/O privilege levels.
+			// Already restricted as containers drop CAP_SYS_RAWIO by default.
+			Name:   "ioperm",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Prevent containers from modifying kernel I/O privilege levels.
+			// Already restricted as containers drop CAP_SYS_RAWIO by default.
+			Name:   "iopl",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Sister syscall of kexec_load that does the same thing,
+			// slightly different arguments
+			Name:   "kexec_file_load",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Deny loading a new kernel for later execution
+			Name:   "kexec_load",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Prevent containers from using the kernel keyring,
+			// which is not namespaced
+			Name:   "keyctl",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Tracing/profiling syscalls,
+			// which could leak a lot of information on the host
+			Name:   "lookup_dcookie",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Terrifying syscalls that modify kernel memory and NUMA settings.
+			// They're gated by CAP_SYS_NICE,
+			// which we do not retain by default in containers.
+			Name:   "mbind",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Terrifying syscalls that modify kernel memory and NUMA settings.
+			// They're gated by CAP_SYS_NICE,
+			// which we do not retain by default in containers.
+			Name:   "migrate_pages",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Old syscall only used in 16-bit code,
+			// and a potential information leak
+			Name:   "modify_ldt",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Deny mount
+			Name:   "mount",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Terrifying syscalls that modify kernel memory and NUMA settings.
+			// They're gated by CAP_SYS_NICE,
+			// which we do not retain by default in containers.
+			Name:   "move_pages",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Deny interaction with the kernel nfs daemon
+			Name:   "nfsservctl",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Cause of an old container breakout,
+			// might as well restrict it to be on the safe side
+			Name:   "open_by_handle_at",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Tracing/profiling syscalls,
+			// which could leak a lot of information on the host
+			Name:   "perf_event_open",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Prevent container from enabling BSD emulation.
+			// Not inherently dangerous, but poorly tested,
+			// potential for a lot of kernel vulns in this.
+			Name:   "personality",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Deny pivot_root
+			Name:   "pivot_root",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Already blocked by dropping CAP_PTRACE
+			Name:   "ptrace",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Deny manipulation and functions on kernel modules.
+			Name:   "query_module",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Quota and Accounting syscalls which could let containers
+			// disable their own resource limits or process accounting
+			Name:   "quotactl",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Probably a bad idea to let containers reboot the host
+			Name:   "reboot",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Probably a bad idea to let containers restart
+			Name:   "restart_syscall",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Prevent containers from using the kernel keyring,
+			// which is not namespaced
+			Name:   "request_key",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// meta, deny seccomp
+			Name:   "seccomp",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Terrifying syscalls that modify kernel memory and NUMA settings.
+			// They're gated by CAP_SYS_NICE,
+			// which we do not retain by default in containers.
+			Name:   "set_mempolicy",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// deny associating a thread with a namespace
+			Name:   "setns",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Deny setting the list of robust futexes
+			Name:   "set_robust_list",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Time/Date is not namespaced
+			Name:   "settimeofday",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Deny start/stop swapping to file/device
+			Name:   "swapon",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Deny start/stop swapping to file/device
+			Name:   "swapoff",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Deny read/write system parameters
+			Name:   "_sysctl",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Deny umount
+			Name:   "umount2",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Same as clone
+			Name:   "unshare",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+		{
+			// Older syscall related to shared libraries, unused for a long time
+			Name:   "uselib",
+			Action: configs.Errno,
+			Args:   []*configs.Arg{},
+		},
+	},
+}

+ 10 - 3
integration-cli/docker_cli_run_test.go

@@ -2858,18 +2858,25 @@ func (s *DockerSuite) TestRunUnshareProc(c *check.C) {
 	testRequires(c, Apparmor, DaemonIsLinux, NotUserNamespace)
 
 	name := "acidburn"
-	if out, _, err := dockerCmdWithError("run", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "--mount-proc=/proc", "mount"); err == nil || !strings.Contains(out, "Permission denied") {
+	out, _, err := dockerCmdWithError("run", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "--mount-proc=/proc", "mount")
+	if err == nil ||
+		!(strings.Contains(strings.ToLower(out), "permission denied") ||
+			strings.Contains(strings.ToLower(out), "operation not permitted")) {
 		c.Fatalf("unshare with --mount-proc should have failed with permission denied, got: %s, %v", out, err)
 	}
 
 	name = "cereal"
-	if out, _, err := dockerCmdWithError("run", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc"); err == nil || !strings.Contains(out, "Permission denied") {
+	out, _, err = dockerCmdWithError("run", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc")
+	if err == nil ||
+		!(strings.Contains(strings.ToLower(out), "permission denied") ||
+			strings.Contains(strings.ToLower(out), "operation not permitted")) {
 		c.Fatalf("unshare and mount of /proc should have failed with permission denied, got: %s, %v", out, err)
 	}
 
 	/* Ensure still fails if running privileged with the default policy */
 	name = "crashoverride"
-	if out, _, err := dockerCmdWithError("run", "--privileged", "--security-opt", "apparmor:docker-default", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc"); err == nil || !(strings.Contains(strings.ToLower(out), "permission denied") || strings.Contains(strings.ToLower(out), "operation not permitted")) {
+	out, _, err = dockerCmdWithError("run", "--privileged", "--security-opt", "apparmor:docker-default", "--name", name, "jess/unshare", "unshare", "-p", "-m", "-f", "-r", "mount", "-t", "proc", "none", "/proc")
+	if err == nil || !(strings.Contains(strings.ToLower(out), "permission denied") || strings.Contains(strings.ToLower(out), "operation not permitted")) {
 		c.Fatalf("privileged unshare with apparmor should have failed with permission denied, got: %s, %v", out, err)
 	}
 }

+ 36 - 0
integration-cli/docker_cli_run_unix_test.go

@@ -548,3 +548,39 @@ func (s *DockerSuite) TestRunSeccompProfileDenyChmod(c *check.C) {
 		c.Fatalf("expected chmod with seccomp profile denied to fail, got %s", out)
 	}
 }
+
+// TestRunSeccompProfileDenyUserns checks that 'docker run jess/unshare unshare --map-root-user --user sh -c whoami' exits with operation not permitted.
+func (s *DockerSuite) TestRunSeccompProfileDenyUserns(c *check.C) {
+	testRequires(c, SameHostDaemon, seccompEnabled)
+	// from sched.h
+	jsonData := fmt.Sprintf(`{
+	"defaultAction": "SCMP_ACT_ALLOW",
+	"syscalls": [
+		{
+			"name": "unshare",
+			"action": "SCMP_ACT_ERRNO",
+			"args": [
+				{
+					"index": 0,
+					"value": %d,
+					"op": "SCMP_CMP_EQ"
+				}
+			]
+		}
+	]
+}`, uint64(0x10000000))
+	tmpFile, err := ioutil.TempFile("", "profile.json")
+	defer tmpFile.Close()
+	if err != nil {
+		c.Fatal(err)
+	}
+
+	if _, err := tmpFile.Write([]byte(jsonData)); err != nil {
+		c.Fatal(err)
+	}
+	runCmd := exec.Command(dockerBinary, "run", "--security-opt", "seccomp:"+tmpFile.Name(), "jess/unshare", "unshare", "--map-root-user", "--user", "sh", "-c", "whoami")
+	out, _, _ := runCommandWithOutput(runCmd)
+	if !strings.Contains(out, "Operation not permitted") {
+		c.Fatalf("expected unshare userns with seccomp profile denied to fail, got %s", out)
+	}
+}