|
@@ -0,0 +1,319 @@
|
|
|
+// +build linux
|
|
|
+
|
|
|
+package native
|
|
|
+
|
|
|
+import "github.com/opencontainers/runc/libcontainer/configs"
|
|
|
+
|
|
|
+var defaultSeccompProfile = &configs.Seccomp{
|
|
|
+ DefaultAction: configs.Allow,
|
|
|
+ Syscalls: []*configs.Syscall{
|
|
|
+ {
|
|
|
+ // Quota and Accounting syscalls which could let containers
|
|
|
+ // disable their own resource limits or process accounting
|
|
|
+ Name: "acct",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Prevent containers from using the kernel keyring,
|
|
|
+ // which is not namespaced
|
|
|
+ Name: "add_key",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Similar to clock_settime and settimeofday
|
|
|
+ // Time/Date is not namespaced
|
|
|
+ Name: "adjtimex",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Time/Date is not namespaced
|
|
|
+ Name: "clock_settime",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Deny cloning new namespaces
|
|
|
+ Name: "clone",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{
|
|
|
+ {
|
|
|
+ // flags from sched.h
|
|
|
+ // CLONE_NEWUTS 0x04000000
|
|
|
+ // CLONE_NEWIPC 0x08000000
|
|
|
+ // CLONE_NEWUSER 0x10000000
|
|
|
+ // CLONE_NEWPID 0x20000000
|
|
|
+ // CLONE_NEWNET 0x40000000
|
|
|
+ Index: 0,
|
|
|
+ Value: uint64(0x04000000),
|
|
|
+ Op: configs.GreaterThanOrEqualTo,
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // flags from sched.h
|
|
|
+ // CLONE_NEWNS 0x00020000
|
|
|
+ Index: 0,
|
|
|
+ Value: uint64(0x00020000),
|
|
|
+ Op: configs.EqualTo,
|
|
|
+ },
|
|
|
+ },
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Deny manipulation and functions on kernel modules.
|
|
|
+ Name: "create_module",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Deny manipulation and functions on kernel modules.
|
|
|
+ Name: "delete_module",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Deny retrieval of exported kernel and module symbols
|
|
|
+ Name: "get_kernel_syms",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Terrifying syscalls that modify kernel memory and NUMA settings.
|
|
|
+ // They're gated by CAP_SYS_NICE,
|
|
|
+ // which we do not retain by default in containers.
|
|
|
+ Name: "get_mempolicy",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Deny getting the list of robust futexes
|
|
|
+ Name: "get_robust_list",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Deny manipulation and functions on kernel modules.
|
|
|
+ Name: "init_module",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Prevent containers from modifying kernel I/O privilege levels.
|
|
|
+ // Already restricted as containers drop CAP_SYS_RAWIO by default.
|
|
|
+ Name: "ioperm",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Prevent containers from modifying kernel I/O privilege levels.
|
|
|
+ // Already restricted as containers drop CAP_SYS_RAWIO by default.
|
|
|
+ Name: "iopl",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Sister syscall of kexec_load that does the same thing,
|
|
|
+ // slightly different arguments
|
|
|
+ Name: "kexec_file_load",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Deny loading a new kernel for later execution
|
|
|
+ Name: "kexec_load",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Prevent containers from using the kernel keyring,
|
|
|
+ // which is not namespaced
|
|
|
+ Name: "keyctl",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Tracing/profiling syscalls,
|
|
|
+ // which could leak a lot of information on the host
|
|
|
+ Name: "lookup_dcookie",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Terrifying syscalls that modify kernel memory and NUMA settings.
|
|
|
+ // They're gated by CAP_SYS_NICE,
|
|
|
+ // which we do not retain by default in containers.
|
|
|
+ Name: "mbind",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Terrifying syscalls that modify kernel memory and NUMA settings.
|
|
|
+ // They're gated by CAP_SYS_NICE,
|
|
|
+ // which we do not retain by default in containers.
|
|
|
+ Name: "migrate_pages",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Old syscall only used in 16-bit code,
|
|
|
+ // and a potential information leak
|
|
|
+ Name: "modify_ldt",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Deny mount
|
|
|
+ Name: "mount",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Terrifying syscalls that modify kernel memory and NUMA settings.
|
|
|
+ // They're gated by CAP_SYS_NICE,
|
|
|
+ // which we do not retain by default in containers.
|
|
|
+ Name: "move_pages",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Deny interaction with the kernel nfs daemon
|
|
|
+ Name: "nfsservctl",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Cause of an old container breakout,
|
|
|
+ // might as well restrict it to be on the safe side
|
|
|
+ Name: "open_by_handle_at",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Tracing/profiling syscalls,
|
|
|
+ // which could leak a lot of information on the host
|
|
|
+ Name: "perf_event_open",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Prevent container from enabling BSD emulation.
|
|
|
+ // Not inherently dangerous, but poorly tested,
|
|
|
+ // potential for a lot of kernel vulns in this.
|
|
|
+ Name: "personality",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Deny pivot_root
|
|
|
+ Name: "pivot_root",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Already blocked by dropping CAP_PTRACE
|
|
|
+ Name: "ptrace",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Deny manipulation and functions on kernel modules.
|
|
|
+ Name: "query_module",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Quota and Accounting syscalls which could let containers
|
|
|
+ // disable their own resource limits or process accounting
|
|
|
+ Name: "quotactl",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Probably a bad idea to let containers reboot the host
|
|
|
+ Name: "reboot",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Probably a bad idea to let containers restart
|
|
|
+ Name: "restart_syscall",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Prevent containers from using the kernel keyring,
|
|
|
+ // which is not namespaced
|
|
|
+ Name: "request_key",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // meta, deny seccomp
|
|
|
+ Name: "seccomp",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Terrifying syscalls that modify kernel memory and NUMA settings.
|
|
|
+ // They're gated by CAP_SYS_NICE,
|
|
|
+ // which we do not retain by default in containers.
|
|
|
+ Name: "set_mempolicy",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // deny associating a thread with a namespace
|
|
|
+ Name: "setns",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Deny setting the list of robust futexes
|
|
|
+ Name: "set_robust_list",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Time/Date is not namespaced
|
|
|
+ Name: "settimeofday",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Deny start/stop swapping to file/device
|
|
|
+ Name: "swapon",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Deny start/stop swapping to file/device
|
|
|
+ Name: "swapoff",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Deny read/write system parameters
|
|
|
+ Name: "_sysctl",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Deny umount
|
|
|
+ Name: "umount2",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Same as clone
|
|
|
+ Name: "unshare",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ {
|
|
|
+ // Older syscall related to shared libraries, unused for a long time
|
|
|
+ Name: "uselib",
|
|
|
+ Action: configs.Errno,
|
|
|
+ Args: []*configs.Arg{},
|
|
|
+ },
|
|
|
+ },
|
|
|
+}
|