瀏覽代碼

Merge pull request #16244 from calavera/libcontainer_0_0_4

Vendor libcontainer v0.0.4
Phil Estes 9 年之前
父節點
當前提交
dac92a8afb
共有 28 個文件被更改,包括 524 次插入643 次删除
  1. 4 7
      daemon/container_unix.go
  2. 1 1
      daemon/create_unix.go
  3. 2 1
      daemon/volumes_unix.go
  4. 1 1
      hack/vendor.sh
  5. 6 6
      vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go
  6. 9 8
      vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
  7. 21 5
      vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
  8. 103 9
      vendor/src/github.com/opencontainers/runc/libcontainer/configs/config.go
  9. 0 7
      vendor/src/github.com/opencontainers/runc/libcontainer/configs/mount.go
  10. 25 5
      vendor/src/github.com/opencontainers/runc/libcontainer/container_linux.go
  11. 6 0
      vendor/src/github.com/opencontainers/runc/libcontainer/criu_opts.go
  12. 7 59
      vendor/src/github.com/opencontainers/runc/libcontainer/init_linux.go
  13. 11 1
      vendor/src/github.com/opencontainers/runc/libcontainer/label/label.go
  14. 28 17
      vendor/src/github.com/opencontainers/runc/libcontainer/label/label_selinux.go
  15. 1 1
      vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_armppc64.go
  16. 1 1
      vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_notarm.go
  17. 18 9
      vendor/src/github.com/opencontainers/runc/libcontainer/process_linux.go
  18. 26 11
      vendor/src/github.com/opencontainers/runc/libcontainer/rootfs_linux.go
  19. 0 34
      vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/bpf.go
  20. 53 0
      vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/config.go
  21. 0 146
      vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/context.go
  22. 0 118
      vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/filter.go
  23. 0 68
      vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/jump_amd64.go
  24. 165 0
      vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
  25. 0 124
      vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unix.go
  26. 17 1
      vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go
  27. 9 0
      vendor/src/github.com/opencontainers/runc/libcontainer/setns_init_linux.go
  28. 10 3
      vendor/src/github.com/opencontainers/runc/libcontainer/standard_init_linux.go

+ 4 - 7
daemon/container_unix.go

@@ -1112,12 +1112,9 @@ func (container *Container) unmountVolumes(forceSyscall bool) error {
 
 
 func (container *Container) networkMounts() []execdriver.Mount {
 func (container *Container) networkMounts() []execdriver.Mount {
 	var mounts []execdriver.Mount
 	var mounts []execdriver.Mount
-	mode := "Z"
-	if container.hostConfig.NetworkMode.IsContainer() {
-		mode = "z"
-	}
+	shared := container.hostConfig.NetworkMode.IsContainer()
 	if container.ResolvConfPath != "" {
 	if container.ResolvConfPath != "" {
-		label.Relabel(container.ResolvConfPath, container.MountLabel, mode)
+		label.Relabel(container.ResolvConfPath, container.MountLabel, shared)
 		writable := !container.hostConfig.ReadonlyRootfs
 		writable := !container.hostConfig.ReadonlyRootfs
 		if m, exists := container.MountPoints["/etc/resolv.conf"]; exists {
 		if m, exists := container.MountPoints["/etc/resolv.conf"]; exists {
 			writable = m.RW
 			writable = m.RW
@@ -1130,7 +1127,7 @@ func (container *Container) networkMounts() []execdriver.Mount {
 		})
 		})
 	}
 	}
 	if container.HostnamePath != "" {
 	if container.HostnamePath != "" {
-		label.Relabel(container.HostnamePath, container.MountLabel, mode)
+		label.Relabel(container.HostnamePath, container.MountLabel, shared)
 		writable := !container.hostConfig.ReadonlyRootfs
 		writable := !container.hostConfig.ReadonlyRootfs
 		if m, exists := container.MountPoints["/etc/hostname"]; exists {
 		if m, exists := container.MountPoints["/etc/hostname"]; exists {
 			writable = m.RW
 			writable = m.RW
@@ -1143,7 +1140,7 @@ func (container *Container) networkMounts() []execdriver.Mount {
 		})
 		})
 	}
 	}
 	if container.HostsPath != "" {
 	if container.HostsPath != "" {
-		label.Relabel(container.HostsPath, container.MountLabel, mode)
+		label.Relabel(container.HostsPath, container.MountLabel, shared)
 		writable := !container.hostConfig.ReadonlyRootfs
 		writable := !container.hostConfig.ReadonlyRootfs
 		if m, exists := container.MountPoints["/etc/hosts"]; exists {
 		if m, exists := container.MountPoints["/etc/hosts"]; exists {
 			writable = m.RW
 			writable = m.RW

+ 1 - 1
daemon/create_unix.go

@@ -59,7 +59,7 @@ func createContainerPlatformSpecificSettings(container *Container, config *runco
 			return err
 			return err
 		}
 		}
 
 
-		if err := label.Relabel(v.Path(), container.MountLabel, "z"); err != nil {
+		if err := label.Relabel(v.Path(), container.MountLabel, true); err != nil {
 			return err
 			return err
 		}
 		}
 
 

+ 2 - 1
daemon/volumes_unix.go

@@ -355,7 +355,8 @@ func (daemon *Daemon) registerMountPoints(container *Container, hostConfig *runc
 			}
 			}
 		}
 		}
 
 
-		if err := label.Relabel(bind.Source, container.MountLabel, bind.Mode); err != nil {
+		shared := label.IsShared(bind.Mode)
+		if err := label.Relabel(bind.Source, container.MountLabel, shared); err != nil {
 			return err
 			return err
 		}
 		}
 		binds[bind.Destination] = true
 		binds[bind.Destination] = true

+ 1 - 1
hack/vendor.sh

@@ -42,7 +42,7 @@ clone git github.com/endophage/gotuf 9bcdad0308e34a49f38448b8ad436ad8860825ce
 clone git github.com/jfrazelle/go 6e461eb70cb4187b41a84e9a567d7137bdbe0f16
 clone git github.com/jfrazelle/go 6e461eb70cb4187b41a84e9a567d7137bdbe0f16
 clone git github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c
 clone git github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c
 
 
-clone git github.com/opencontainers/runc v0.0.3 # libcontainer
+clone git github.com/opencontainers/runc v0.0.4 # libcontainer
 # libcontainer deps (see src/github.com/docker/libcontainer/update-vendor.sh)
 # libcontainer deps (see src/github.com/docker/libcontainer/update-vendor.sh)
 clone git github.com/coreos/go-systemd v3
 clone git github.com/coreos/go-systemd v3
 clone git github.com/godbus/dbus v2
 clone git github.com/godbus/dbus v2

+ 6 - 6
vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go

@@ -83,7 +83,7 @@ type data struct {
 	pid    int
 	pid    int
 }
 }
 
 
-func (m *Manager) Apply(pid int) error {
+func (m *Manager) Apply(pid int) (err error) {
 	if m.Cgroups == nil {
 	if m.Cgroups == nil {
 		return nil
 		return nil
 	}
 	}
@@ -235,12 +235,12 @@ func getCgroupData(c *configs.Cgroup, pid int) (*data, error) {
 	}, nil
 	}, nil
 }
 }
 
 
-func (raw *data) parent(subsystem, mountpoint, src string) (string, error) {
-	initPath, err := cgroups.GetInitCgroupDir(subsystem)
+func (raw *data) parent(subsystem, mountpoint, root string) (string, error) {
+	initPath, err := cgroups.GetThisCgroupDir(subsystem)
 	if err != nil {
 	if err != nil {
 		return "", err
 		return "", err
 	}
 	}
-	relDir, err := filepath.Rel(src, initPath)
+	relDir, err := filepath.Rel(root, initPath)
 	if err != nil {
 	if err != nil {
 		return "", err
 		return "", err
 	}
 	}
@@ -248,7 +248,7 @@ func (raw *data) parent(subsystem, mountpoint, src string) (string, error) {
 }
 }
 
 
 func (raw *data) path(subsystem string) (string, error) {
 func (raw *data) path(subsystem string) (string, error) {
-	mnt, src, err := cgroups.FindCgroupMountpointAndSource(subsystem)
+	mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem)
 	// If we didn't mount the subsystem, there is no point we make the path.
 	// If we didn't mount the subsystem, there is no point we make the path.
 	if err != nil {
 	if err != nil {
 		return "", err
 		return "", err
@@ -259,7 +259,7 @@ func (raw *data) path(subsystem string) (string, error) {
 		return filepath.Join(raw.root, filepath.Base(mnt), raw.cgroup), nil
 		return filepath.Join(raw.root, filepath.Base(mnt), raw.cgroup), nil
 	}
 	}
 
 
-	parent, err := raw.parent(subsystem, mnt, src)
+	parent, err := raw.parent(subsystem, mnt, root)
 	if err != nil {
 	if err != nil {
 		return "", err
 		return "", err
 	}
 	}

+ 9 - 8
vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go

@@ -17,7 +17,7 @@ import (
 type MemoryGroup struct {
 type MemoryGroup struct {
 }
 }
 
 
-func (s *MemoryGroup) Apply(d *data) error {
+func (s *MemoryGroup) Apply(d *data) (err error) {
 	path, err := d.path("memory")
 	path, err := d.path("memory")
 	if err != nil {
 	if err != nil {
 		if cgroups.IsNotFound(err) {
 		if cgroups.IsNotFound(err) {
@@ -28,21 +28,22 @@ func (s *MemoryGroup) Apply(d *data) error {
 	if err := os.MkdirAll(path, 0755); err != nil {
 	if err := os.MkdirAll(path, 0755); err != nil {
 		return err
 		return err
 	}
 	}
+
+	defer func() {
+		if err != nil {
+			os.RemoveAll(path)
+		}
+	}()
+
 	if err := s.Set(path, d.c); err != nil {
 	if err := s.Set(path, d.c); err != nil {
 		return err
 		return err
 	}
 	}
 
 
 	// We need to join memory cgroup after set memory limits, because
 	// We need to join memory cgroup after set memory limits, because
 	// kmem.limit_in_bytes can only be set when the cgroup is empty.
 	// kmem.limit_in_bytes can only be set when the cgroup is empty.
-	_, err = d.join("memory")
-	if err != nil {
+	if _, err = d.join("memory"); err != nil {
 		return err
 		return err
 	}
 	}
-	defer func() {
-		if err != nil {
-			os.RemoveAll(path)
-		}
-	}()
 
 
 	return nil
 	return nil
 }
 }

+ 21 - 5
vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/utils.go

@@ -21,6 +21,9 @@ const cgroupNamePrefix = "name="
 
 
 // https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
 // https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
 func FindCgroupMountpoint(subsystem string) (string, error) {
 func FindCgroupMountpoint(subsystem string) (string, error) {
+	// We are not using mount.GetMounts() because it's super-inefficient,
+	// parsing it directly sped up x10 times because of not using Sscanf.
+	// It was one of two major performance drawbacks in container start.
 	f, err := os.Open("/proc/self/mountinfo")
 	f, err := os.Open("/proc/self/mountinfo")
 	if err != nil {
 	if err != nil {
 		return "", err
 		return "", err
@@ -44,7 +47,7 @@ func FindCgroupMountpoint(subsystem string) (string, error) {
 	return "", NewNotFoundError(subsystem)
 	return "", NewNotFoundError(subsystem)
 }
 }
 
 
-func FindCgroupMountpointAndSource(subsystem string) (string, string, error) {
+func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
 	f, err := os.Open("/proc/self/mountinfo")
 	f, err := os.Open("/proc/self/mountinfo")
 	if err != nil {
 	if err != nil {
 		return "", "", err
 		return "", "", err
@@ -69,16 +72,29 @@ func FindCgroupMountpointAndSource(subsystem string) (string, string, error) {
 }
 }
 
 
 func FindCgroupMountpointDir() (string, error) {
 func FindCgroupMountpointDir() (string, error) {
-	mounts, err := mount.GetMounts()
+	f, err := os.Open("/proc/self/mountinfo")
 	if err != nil {
 	if err != nil {
 		return "", err
 		return "", err
 	}
 	}
+	defer f.Close()
 
 
-	for _, mount := range mounts {
-		if mount.Fstype == "cgroup" {
-			return filepath.Dir(mount.Mountpoint), nil
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		text := scanner.Text()
+		fields := strings.Split(text, " ")
+		// Safe as mountinfo encodes mountpoints with spaces as \040.
+		index := strings.Index(text, " - ")
+		postSeparatorFields := strings.Fields(text[index+3:])
+		if len(postSeparatorFields) < 3 {
+			return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
+		}
+		if postSeparatorFields[0] == "cgroup" {
+			return filepath.Dir(fields[4]), nil
 		}
 		}
 	}
 	}
+	if err := scanner.Err(); err != nil {
+		return "", err
+	}
 
 
 	return "", NewNotFoundError("cgroup")
 	return "", NewNotFoundError("cgroup")
 }
 }

+ 103 - 9
vendor/src/github.com/opencontainers/runc/libcontainer/configs/config.go

@@ -1,5 +1,11 @@
 package configs
 package configs
 
 
+import (
+	"bytes"
+	"encoding/json"
+	"os/exec"
+)
+
 type Rlimit struct {
 type Rlimit struct {
 	Type int    `json:"type"`
 	Type int    `json:"type"`
 	Hard uint64 `json:"hard"`
 	Hard uint64 `json:"hard"`
@@ -13,36 +19,46 @@ type IDMap struct {
 	Size        int `json:"size"`
 	Size        int `json:"size"`
 }
 }
 
 
+// Seccomp represents syscall restrictions
 type Seccomp struct {
 type Seccomp struct {
-	Syscalls []*Syscall `json:"syscalls"`
+	DefaultAction Action     `json:"default_action"`
+	Syscalls      []*Syscall `json:"syscalls"`
 }
 }
 
 
+// An action to be taken upon rule match in Seccomp
 type Action int
 type Action int
 
 
 const (
 const (
-	Kill Action = iota - 3
+	Kill Action = iota - 4
+	Errno
 	Trap
 	Trap
 	Allow
 	Allow
 )
 )
 
 
+// A comparison operator to be used when matching syscall arguments in Seccomp
 type Operator int
 type Operator int
 
 
 const (
 const (
 	EqualTo Operator = iota
 	EqualTo Operator = iota
 	NotEqualTo
 	NotEqualTo
-	GreatherThan
+	GreaterThan
+	GreaterThanOrEqualTo
 	LessThan
 	LessThan
+	LessThanOrEqualTo
 	MaskEqualTo
 	MaskEqualTo
 )
 )
 
 
+// A rule to match a specific syscall argument in Seccomp
 type Arg struct {
 type Arg struct {
-	Index int      `json:"index"`
-	Value uint32   `json:"value"`
-	Op    Operator `json:"op"`
+	Index    uint     `json:"index"`
+	Value    uint64   `json:"value"`
+	ValueTwo uint64   `json:"value_two"`
+	Op       Operator `json:"op"`
 }
 }
 
 
+// An rule to match a syscall in Seccomp
 type Syscall struct {
 type Syscall struct {
-	Value  int    `json:"value"`
+	Name   string `json:"name"`
 	Action Action `json:"action"`
 	Action Action `json:"action"`
 	Args   []*Arg `json:"args"`
 	Args   []*Arg `json:"args"`
 }
 }
@@ -117,6 +133,12 @@ type Config struct {
 	// If Rlimits are not set, the container will inherit rlimits from the parent process
 	// If Rlimits are not set, the container will inherit rlimits from the parent process
 	Rlimits []Rlimit `json:"rlimits"`
 	Rlimits []Rlimit `json:"rlimits"`
 
 
+	// OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
+	// for a process. Valid values are between the range [-1000, '1000'], where processes with
+	// higher scores are preferred for being killed.
+	// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
+	OomScoreAdj int `json:"oom_score_adj"`
+
 	// AdditionalGroups specifies the gids that should be added to supplementary groups
 	// AdditionalGroups specifies the gids that should be added to supplementary groups
 	// in addition to those that the user belongs to.
 	// in addition to those that the user belongs to.
 	AdditionalGroups []string `json:"additional_groups"`
 	AdditionalGroups []string `json:"additional_groups"`
@@ -140,7 +162,79 @@ type Config struct {
 	Sysctl map[string]string `json:"sysctl"`
 	Sysctl map[string]string `json:"sysctl"`
 
 
 	// Seccomp allows actions to be taken whenever a syscall is made within the container.
 	// Seccomp allows actions to be taken whenever a syscall is made within the container.
-	// By default, all syscalls are allowed with actions to allow, trap, kill, or return an errno
-	// can be specified on a per syscall basis.
+	// A number of rules are given, each having an action to be taken if a syscall matches it.
+	// A default action to be taken if no rules match is also given.
 	Seccomp *Seccomp `json:"seccomp"`
 	Seccomp *Seccomp `json:"seccomp"`
+
+	// Hooks are a collection of actions to perform at various container lifecycle events.
+	// Hooks are not able to be marshaled to json but they are also not needed to.
+	Hooks *Hooks `json:"-"`
+}
+
+type Hooks struct {
+	// Prestart commands are executed after the container namespaces are created,
+	// but before the user supplied command is executed from init.
+	Prestart []Hook
+
+	// Poststop commands are executed after the container init process exits.
+	Poststop []Hook
+}
+
+// HookState is the payload provided to a hook on execution.
+type HookState struct {
+	ID   string `json:"id"`
+	Pid  int    `json:"pid"`
+	Root string `json:"root"`
+}
+
+type Hook interface {
+	// Run executes the hook with the provided state.
+	Run(HookState) error
+}
+
+// NewFunctionHooks will call the provided function when the hook is run.
+func NewFunctionHook(f func(HookState) error) FuncHook {
+	return FuncHook{
+		run: f,
+	}
+}
+
+type FuncHook struct {
+	run func(HookState) error
+}
+
+func (f FuncHook) Run(s HookState) error {
+	return f.run(s)
+}
+
+type Command struct {
+	Path string   `json:"path"`
+	Args []string `json:"args"`
+	Env  []string `json:"env"`
+	Dir  string   `json:"dir"`
+}
+
+// NewCommandHooks will execute the provided command when the hook is run.
+func NewCommandHook(cmd Command) CommandHook {
+	return CommandHook{
+		Command: cmd,
+	}
+}
+
+type CommandHook struct {
+	Command
+}
+
+func (c Command) Run(s HookState) error {
+	b, err := json.Marshal(s)
+	if err != nil {
+		return err
+	}
+	cmd := exec.Cmd{
+		Path:  c.Path,
+		Args:  c.Args,
+		Env:   c.Env,
+		Stdin: bytes.NewReader(b),
+	}
+	return cmd.Run()
 }
 }

+ 0 - 7
vendor/src/github.com/opencontainers/runc/libcontainer/configs/mount.go

@@ -25,10 +25,3 @@ type Mount struct {
 	// Optional Command to be run after Source is mounted.
 	// Optional Command to be run after Source is mounted.
 	PostmountCmds []Command `json:"postmount_cmds"`
 	PostmountCmds []Command `json:"postmount_cmds"`
 }
 }
-
-type Command struct {
-	Path string   `json:"path"`
-	Args []string `json:"args"`
-	Env  []string `json:"env"`
-	Dir  string   `json:"dir"`
-}

+ 25 - 5
vendor/src/github.com/opencontainers/runc/libcontainer/container_linux.go

@@ -185,6 +185,7 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
 		parentPipe: parentPipe,
 		parentPipe: parentPipe,
 		manager:    c.cgroupManager,
 		manager:    c.cgroupManager,
 		config:     c.newInitConfig(p),
 		config:     c.newInitConfig(p),
+		container:  c,
 	}, nil
 	}, nil
 }
 }
 
 
@@ -247,6 +248,17 @@ func (c *linuxContainer) Destroy() error {
 		err = rerr
 		err = rerr
 	}
 	}
 	c.initProcess = nil
 	c.initProcess = nil
+	if c.config.Hooks != nil {
+		s := configs.HookState{
+			ID:   c.id,
+			Root: c.config.Rootfs,
+		}
+		for _, hook := range c.config.Hooks.Poststop {
+			if err := hook.Run(s); err != nil {
+				return err
+			}
+		}
+	}
 	return err
 	return err
 }
 }
 
 
@@ -299,7 +311,7 @@ func (c *linuxContainer) checkCriuVersion() error {
 	return nil
 	return nil
 }
 }
 
 
-const descriptors_filename = "descriptors.json"
+const descriptorsFilename = "descriptors.json"
 
 
 func (c *linuxContainer) addCriuDumpMount(req *criurpc.CriuReq, m *configs.Mount) {
 func (c *linuxContainer) addCriuDumpMount(req *criurpc.CriuReq, m *configs.Mount) {
 	mountDest := m.Destination
 	mountDest := m.Destination
@@ -406,7 +418,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
 		return err
 		return err
 	}
 	}
 
 
-	err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptors_filename), fdsJSON, 0655)
+	err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0655)
 	if err != nil {
 	if err != nil {
 		return err
 		return err
 	}
 	}
@@ -532,13 +544,19 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
 			break
 			break
 		}
 		}
 	}
 	}
+	for _, i := range criuOpts.VethPairs {
+		veth := new(criurpc.CriuVethPair)
+		veth.IfOut = proto.String(i.HostInterfaceName)
+		veth.IfIn = proto.String(i.ContainerInterfaceName)
+		req.Opts.Veths = append(req.Opts.Veths, veth)
+	}
 
 
 	var (
 	var (
 		fds    []string
 		fds    []string
 		fdJSON []byte
 		fdJSON []byte
 	)
 	)
 
 
-	if fdJSON, err = ioutil.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptors_filename)); err != nil {
+	if fdJSON, err = ioutil.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename)); err != nil {
 		return err
 		return err
 	}
 	}
 
 
@@ -568,6 +586,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
 		return err
 		return err
 	}
 	}
 
 
+	logPath := filepath.Join(opts.WorkDirectory, req.GetOpts().GetLogFile())
 	criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client")
 	criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client")
 	criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server")
 	criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server")
 	defer criuClient.Close()
 	defer criuClient.Close()
@@ -631,7 +650,8 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
 			return err
 			return err
 		}
 		}
 		if !resp.GetSuccess() {
 		if !resp.GetSuccess() {
-			return fmt.Errorf("criu failed: type %s errno %d", req.GetType().String(), resp.GetCrErrno())
+			typeString := req.GetType().String()
+			return fmt.Errorf("criu failed: type %s errno %d\nlog file: %s", typeString, resp.GetCrErrno(), logPath)
 		}
 		}
 
 
 		t := resp.GetType()
 		t := resp.GetType()
@@ -671,7 +691,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
 		return err
 		return err
 	}
 	}
 	if !st.Success() {
 	if !st.Success() {
-		return fmt.Errorf("criu failed: %s", st.String())
+		return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath)
 	}
 	}
 	return nil
 	return nil
 }
 }

+ 6 - 0
vendor/src/github.com/opencontainers/runc/libcontainer/criu_opts.go

@@ -5,6 +5,11 @@ type CriuPageServerInfo struct {
 	Port    int32  // port number of CRIU page server
 	Port    int32  // port number of CRIU page server
 }
 }
 
 
+type VethPairName struct {
+	ContainerInterfaceName string
+	HostInterfaceName      string
+}
+
 type CriuOpts struct {
 type CriuOpts struct {
 	ImagesDirectory         string             // directory for storing image files
 	ImagesDirectory         string             // directory for storing image files
 	WorkDirectory           string             // directory to cd and write logs/pidfiles/stats to
 	WorkDirectory           string             // directory to cd and write logs/pidfiles/stats to
@@ -14,4 +19,5 @@ type CriuOpts struct {
 	ShellJob                bool               // allow to dump and restore shell jobs
 	ShellJob                bool               // allow to dump and restore shell jobs
 	FileLocks               bool               // handle file locks, for safety
 	FileLocks               bool               // handle file locks, for safety
 	PageServer              CriuPageServerInfo // allow to dump to criu page server
 	PageServer              CriuPageServerInfo // allow to dump to criu page server
+	VethPairs               []VethPairName     // pass the veth to criu when restore
 }
 }

+ 7 - 59
vendor/src/github.com/opencontainers/runc/libcontainer/init_linux.go

@@ -5,7 +5,9 @@ package libcontainer
 import (
 import (
 	"encoding/json"
 	"encoding/json"
 	"fmt"
 	"fmt"
+	"io/ioutil"
 	"os"
 	"os"
+	"strconv"
 	"strings"
 	"strings"
 	"syscall"
 	"syscall"
 
 
@@ -13,7 +15,6 @@ import (
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/netlink"
 	"github.com/opencontainers/runc/libcontainer/netlink"
-	"github.com/opencontainers/runc/libcontainer/seccomp"
 	"github.com/opencontainers/runc/libcontainer/system"
 	"github.com/opencontainers/runc/libcontainer/system"
 	"github.com/opencontainers/runc/libcontainer/user"
 	"github.com/opencontainers/runc/libcontainer/user"
 	"github.com/opencontainers/runc/libcontainer/utils"
 	"github.com/opencontainers/runc/libcontainer/utils"
@@ -239,6 +240,11 @@ func setupRlimits(config *configs.Config) error {
 	return nil
 	return nil
 }
 }
 
 
+func setOomScoreAdj(oomScoreAdj int) error {
+	path := "/proc/self/oom_score_adj"
+	return ioutil.WriteFile(path, []byte(strconv.Itoa(oomScoreAdj)), 0700)
+}
+
 // killCgroupProcesses freezes then iterates over all the processes inside the
 // killCgroupProcesses freezes then iterates over all the processes inside the
 // manager's cgroups sending a SIGKILL to each process then waiting for them to
 // manager's cgroups sending a SIGKILL to each process then waiting for them to
 // exit.
 // exit.
@@ -270,61 +276,3 @@ func killCgroupProcesses(m cgroups.Manager) error {
 	}
 	}
 	return nil
 	return nil
 }
 }
-
-func finalizeSeccomp(config *initConfig) error {
-	if config.Config.Seccomp == nil {
-		return nil
-	}
-	context := seccomp.New()
-	for _, s := range config.Config.Seccomp.Syscalls {
-		ss := &seccomp.Syscall{
-			Value:  uint32(s.Value),
-			Action: seccompAction(s.Action),
-		}
-		if len(s.Args) > 0 {
-			ss.Args = seccompArgs(s.Args)
-		}
-		context.Add(ss)
-	}
-	return context.Load()
-}
-
-func seccompAction(a configs.Action) seccomp.Action {
-	switch a {
-	case configs.Kill:
-		return seccomp.Kill
-	case configs.Trap:
-		return seccomp.Trap
-	case configs.Allow:
-		return seccomp.Allow
-	}
-	return seccomp.Error(syscall.Errno(int(a)))
-}
-
-func seccompArgs(args []*configs.Arg) seccomp.Args {
-	var sa []seccomp.Arg
-	for _, a := range args {
-		sa = append(sa, seccomp.Arg{
-			Index: uint32(a.Index),
-			Op:    seccompOperator(a.Op),
-			Value: uint(a.Value),
-		})
-	}
-	return seccomp.Args{sa}
-}
-
-func seccompOperator(o configs.Operator) seccomp.Operator {
-	switch o {
-	case configs.EqualTo:
-		return seccomp.EqualTo
-	case configs.NotEqualTo:
-		return seccomp.NotEqualTo
-	case configs.GreatherThan:
-		return seccomp.GreatherThan
-	case configs.LessThan:
-		return seccomp.LessThan
-	case configs.MaskEqualTo:
-		return seccomp.MaskEqualTo
-	}
-	return 0
-}

+ 11 - 1
vendor/src/github.com/opencontainers/runc/libcontainer/label/label.go

@@ -29,7 +29,7 @@ func SetFileCreateLabel(fileLabel string) error {
 	return nil
 	return nil
 }
 }
 
 
-func Relabel(path string, fileLabel string, relabel string) error {
+func Relabel(path string, fileLabel string, shared bool) error {
 	return nil
 	return nil
 }
 }
 
 
@@ -59,3 +59,13 @@ func DupSecOpt(src string) []string {
 func DisableSecOpt() []string {
 func DisableSecOpt() []string {
 	return nil
 	return nil
 }
 }
+
+// Validate checks that the label does not include unexpected options
+func Validate(label string) error {
+	return nil
+}
+
+// IsShared checks that the label includes a "shared" mark
+func IsShared(label string) bool {
+	return false
+}

+ 28 - 17
vendor/src/github.com/opencontainers/runc/libcontainer/label/label_selinux.go

@@ -9,6 +9,8 @@ import (
 	"github.com/opencontainers/runc/libcontainer/selinux"
 	"github.com/opencontainers/runc/libcontainer/selinux"
 )
 )
 
 
+var ErrIncompatibleLabel = fmt.Errorf("Bad SELinux option z and Z can not be used together")
+
 // InitLabels returns the process label and file labels to be used within
 // InitLabels returns the process label and file labels to be used within
 // the container.  A list of options can be passed into this function to alter
 // the container.  A list of options can be passed into this function to alter
 // the labels.  The labels returned will include a random MCS String, that is
 // the labels.  The labels returned will include a random MCS String, that is
@@ -95,28 +97,24 @@ func SetFileCreateLabel(fileLabel string) error {
 	return nil
 	return nil
 }
 }
 
 
-// Change the label of path to the filelabel string.  If the relabel string
-// is "z", relabel will change the MCS label to s0.  This will allow all
-// containers to share the content.  If the relabel string is a "Z" then
-// the MCS label should continue to be used.  SELinux will use this field
-// to make sure the content can not be shared by other containes.
-func Relabel(path string, fileLabel string, relabel string) error {
-	exclude_path := []string{"/", "/usr", "/etc"}
-	if fileLabel == "" {
+// Change the label of path to the filelabel string.
+// It changes the MCS label to s0 if shared is true.
+// This will allow all containers to share the content.
+func Relabel(path string, fileLabel string, shared bool) error {
+	if !selinux.SelinuxEnabled() {
 		return nil
 		return nil
 	}
 	}
-	if !strings.ContainsAny(relabel, "zZ") {
+
+	if fileLabel == "" {
 		return nil
 		return nil
 	}
 	}
-	for _, p := range exclude_path {
-		if path == p {
-			return fmt.Errorf("Relabeling of %s is not allowed", path)
-		}
-	}
-	if strings.Contains(relabel, "z") && strings.Contains(relabel, "Z") {
-		return fmt.Errorf("Bad SELinux option z and Z can not be used together")
+
+	exclude_paths := map[string]bool{"/": true, "/usr": true, "/etc": true}
+	if exclude_paths[path] {
+		return fmt.Errorf("Relabeling of %s is not allowed", path)
 	}
 	}
-	if strings.Contains(relabel, "z") {
+
+	if shared {
 		c := selinux.NewContext(fileLabel)
 		c := selinux.NewContext(fileLabel)
 		c["level"] = "s0"
 		c["level"] = "s0"
 		fileLabel = c.Get()
 		fileLabel = c.Get()
@@ -161,3 +159,16 @@ func DupSecOpt(src string) []string {
 func DisableSecOpt() []string {
 func DisableSecOpt() []string {
 	return selinux.DisableSecOpt()
 	return selinux.DisableSecOpt()
 }
 }
+
+// Validate checks that the label does not include unexpected options
+func Validate(label string) error {
+	if strings.Contains(label, "z") && strings.Contains(label, "Z") {
+		return ErrIncompatibleLabel
+	}
+	return nil
+}
+
+// IsShared checks that the label includes a "shared" mark
+func IsShared(label string) bool {
+	return strings.Contains(label, "z")
+}

+ 1 - 1
vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_armppc64.go

@@ -1,4 +1,4 @@
-// +build arm ppc64
+// +build arm ppc64 ppc64le
 
 
 package netlink
 package netlink
 
 

+ 1 - 1
vendor/src/github.com/opencontainers/runc/libcontainer/netlink/netlink_linux_notarm.go

@@ -1,4 +1,4 @@
-// +build !arm,!ppc64
+// +build !arm,!ppc64,!ppc64le
 
 
 package netlink
 package netlink
 
 

+ 18 - 9
vendor/src/github.com/opencontainers/runc/libcontainer/process_linux.go

@@ -13,6 +13,7 @@ import (
 	"syscall"
 	"syscall"
 
 
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
+	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/system"
 	"github.com/opencontainers/runc/libcontainer/system"
 )
 )
 
 
@@ -138,11 +139,9 @@ func (p *setnsProcess) terminate() error {
 
 
 func (p *setnsProcess) wait() (*os.ProcessState, error) {
 func (p *setnsProcess) wait() (*os.ProcessState, error) {
 	err := p.cmd.Wait()
 	err := p.cmd.Wait()
-	if err != nil {
-		return p.cmd.ProcessState, err
-	}
 
 
-	return p.cmd.ProcessState, nil
+	// Return actual ProcessState even on Wait error
+	return p.cmd.ProcessState, err
 }
 }
 
 
 func (p *setnsProcess) pid() int {
 func (p *setnsProcess) pid() int {
@@ -175,9 +174,9 @@ func (p *initProcess) externalDescriptors() []string {
 	return p.fds
 	return p.fds
 }
 }
 
 
-func (p *initProcess) start() error {
+func (p *initProcess) start() (err error) {
 	defer p.parentPipe.Close()
 	defer p.parentPipe.Close()
-	err := p.cmd.Start()
+	err = p.cmd.Start()
 	p.childPipe.Close()
 	p.childPipe.Close()
 	if err != nil {
 	if err != nil {
 		return newSystemError(err)
 		return newSystemError(err)
@@ -202,6 +201,18 @@ func (p *initProcess) start() error {
 			p.manager.Destroy()
 			p.manager.Destroy()
 		}
 		}
 	}()
 	}()
+	if p.config.Config.Hooks != nil {
+		s := configs.HookState{
+			ID:   p.container.id,
+			Pid:  p.pid(),
+			Root: p.config.Config.Rootfs,
+		}
+		for _, hook := range p.config.Config.Hooks.Prestart {
+			if err := hook.Run(s); err != nil {
+				return newSystemError(err)
+			}
+		}
+	}
 	if err := p.createNetworkInterfaces(); err != nil {
 	if err := p.createNetworkInterfaces(); err != nil {
 		return newSystemError(err)
 		return newSystemError(err)
 	}
 	}
@@ -286,9 +297,7 @@ func (p *initProcess) setExternalDescriptors(newFds []string) {
 }
 }
 
 
 func getPipeFds(pid int) ([]string, error) {
 func getPipeFds(pid int) ([]string, error) {
-	var fds []string
-
-	fds = make([]string, 3)
+	fds := make([]string, 3)
 
 
 	dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
 	dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
 	for i := 0; i < 3; i++ {
 	for i := 0; i < 3; i++ {

+ 26 - 11
vendor/src/github.com/opencontainers/runc/libcontainer/rootfs_linux.go

@@ -27,6 +27,8 @@ func setupRootfs(config *configs.Config, console *linuxConsole) (err error) {
 	if err := prepareRoot(config); err != nil {
 	if err := prepareRoot(config); err != nil {
 		return newSystemError(err)
 		return newSystemError(err)
 	}
 	}
+
+	setupDev := len(config.Devices) == 0
 	for _, m := range config.Mounts {
 	for _, m := range config.Mounts {
 		for _, precmd := range m.PremountCmds {
 		for _, precmd := range m.PremountCmds {
 			if err := mountCmd(precmd); err != nil {
 			if err := mountCmd(precmd); err != nil {
@@ -43,14 +45,16 @@ func setupRootfs(config *configs.Config, console *linuxConsole) (err error) {
 			}
 			}
 		}
 		}
 	}
 	}
-	if err := createDevices(config); err != nil {
-		return newSystemError(err)
-	}
-	if err := setupPtmx(config, console); err != nil {
-		return newSystemError(err)
-	}
-	if err := setupDevSymlinks(config.Rootfs); err != nil {
-		return newSystemError(err)
+	if !setupDev {
+		if err := createDevices(config); err != nil {
+			return newSystemError(err)
+		}
+		if err := setupPtmx(config, console); err != nil {
+			return newSystemError(err)
+		}
+		if err := setupDevSymlinks(config.Rootfs); err != nil {
+			return newSystemError(err)
+		}
 	}
 	}
 	if err := syscall.Chdir(config.Rootfs); err != nil {
 	if err := syscall.Chdir(config.Rootfs); err != nil {
 		return newSystemError(err)
 		return newSystemError(err)
@@ -63,8 +67,10 @@ func setupRootfs(config *configs.Config, console *linuxConsole) (err error) {
 	if err != nil {
 	if err != nil {
 		return newSystemError(err)
 		return newSystemError(err)
 	}
 	}
-	if err := reOpenDevNull(config.Rootfs); err != nil {
-		return newSystemError(err)
+	if !setupDev {
+		if err := reOpenDevNull(config.Rootfs); err != nil {
+			return newSystemError(err)
+		}
 	}
 	}
 	if config.Readonlyfs {
 	if config.Readonlyfs {
 		if err := setReadonly(); err != nil {
 		if err := setReadonly(); err != nil {
@@ -131,6 +137,11 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
 			return err
 			return err
 		}
 		}
 		return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data)
 		return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data)
+	case "securityfs":
+		if err := os.MkdirAll(dest, 0755); err != nil {
+			return err
+		}
+		return syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags), data)
 	case "bind":
 	case "bind":
 		stat, err := os.Stat(m.Source)
 		stat, err := os.Stat(m.Source)
 		if err != nil {
 		if err != nil {
@@ -160,7 +171,11 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
 			}
 			}
 		}
 		}
 		if m.Relabel != "" {
 		if m.Relabel != "" {
-			if err := label.Relabel(m.Source, mountLabel, m.Relabel); err != nil {
+			if err := label.Validate(m.Relabel); err != nil {
+				return err
+			}
+			shared := label.IsShared(m.Relabel)
+			if err := label.Relabel(m.Source, mountLabel, shared); err != nil {
 				return err
 				return err
 			}
 			}
 		}
 		}

+ 0 - 34
vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/bpf.go

@@ -1,34 +0,0 @@
-// +build linux
-
-package seccomp
-
-import "strings"
-
-type bpfLabel struct {
-	label    string
-	location uint32
-}
-
-type bpfLabels []bpfLabel
-
-// labelIndex returns the index for the label if it exists in the slice.
-// if it does not exist in the slice it appends the label lb to the end
-// of the slice and returns the index.
-func labelIndex(labels *bpfLabels, lb string) uint32 {
-	var id uint32
-	for id = 0; id < uint32(len(*labels)); id++ {
-		if strings.EqualFold(lb, (*labels)[id].label) {
-			return id
-		}
-	}
-	*labels = append(*labels, bpfLabel{lb, 0xffffffff})
-	return id
-}
-
-func scmpBpfStmt(code uint16, k uint32) sockFilter {
-	return sockFilter{code, 0, 0, k}
-}
-
-func scmpBpfJump(code uint16, k uint32, jt, jf uint8) sockFilter {
-	return sockFilter{code, jt, jf, k}
-}

+ 53 - 0
vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/config.go

@@ -0,0 +1,53 @@
+package seccomp
+
+import (
+	"fmt"
+
+	"github.com/opencontainers/runc/libcontainer/configs"
+)
+
+// ConvertStringToOperator converts a string into a Seccomp comparison operator.
+// Comparison operators use the names they are assigned by Libseccomp's header.
+// Attempting to convert a string that is not a valid operator results in an
+// error.
+func ConvertStringToOperator(in string) (configs.Operator, error) {
+	switch in {
+	case "SCMP_CMP_NE":
+		return configs.NotEqualTo, nil
+	case "SCMP_CMP_LT":
+		return configs.LessThan, nil
+	case "SCMP_CMP_LE":
+		return configs.LessThanOrEqualTo, nil
+	case "SCMP_CMP_EQ":
+		return configs.EqualTo, nil
+	case "SCMP_CMP_GE":
+		return configs.GreaterThan, nil
+	case "SCMP_CMP_GT":
+		return configs.GreaterThanOrEqualTo, nil
+	case "SCMP_CMP_MASKED_EQ":
+		return configs.MaskEqualTo, nil
+	default:
+		return 0, fmt.Errorf("string %s is not a valid operator for seccomp", in)
+	}
+}
+
+// ConvertStringToAction converts a string into a Seccomp rule match action.
+// Actions use the named they are assigned in Libseccomp's header, though some
+// (notable, SCMP_ACT_TRACE) are not available in this implementation and will
+// return errors.
+// Attempting to convert a string that is not a valid action results in an
+// error.
+func ConvertStringToAction(in string) (configs.Action, error) {
+	switch in {
+	case "SCMP_ACT_KILL":
+		return configs.Kill, nil
+	case "SCMP_ACT_ERRNO":
+		return configs.Errno, nil
+	case "SCMP_ACT_TRAP":
+		return configs.Trap, nil
+	case "SCMP_ACT_ALLOW":
+		return configs.Allow, nil
+	default:
+		return 0, fmt.Errorf("string %s is not a valid action for seccomp", in)
+	}
+}

+ 0 - 146
vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/context.go

@@ -1,146 +0,0 @@
-// +build linux
-
-package seccomp
-
-import (
-	"errors"
-	"syscall"
-)
-
-const labelTemplate = "lb-%d-%d"
-
-// Action is the type of action that will be taken when a
-// syscall is performed.
-type Action int
-
-const (
-	Kill  Action = iota - 3 // Kill the calling process of the syscall.
-	Trap                    // Trap and coredump the calling process of the syscall.
-	Allow                   // Allow the syscall to be completed.
-)
-
-// Syscall is the specified syscall, action, and any type of arguments
-// to filter on.
-type Syscall struct {
-	// Value is the syscall number.
-	Value uint32
-	// Action is the action to perform when the specified syscall is made.
-	Action Action
-	// Args are filters that can be specified on the arguments to the syscall.
-	Args Args
-}
-
-func (s *Syscall) scmpAction() uint32 {
-	switch s.Action {
-	case Allow:
-		return retAllow
-	case Trap:
-		return retTrap
-	case Kill:
-		return retKill
-	}
-	return actionErrno(uint32(s.Action))
-}
-
-// Arg represents an argument to the syscall with the argument's index,
-// the operator to apply when matching, and the argument's value at that time.
-type Arg struct {
-	Index uint32   // index of args which start from zero
-	Op    Operator // operation, such as EQ/NE/GE/LE
-	Value uint     // the value of arg
-}
-
-type Args [][]Arg
-
-var (
-	ErrUnresolvedLabel      = errors.New("seccomp: unresolved label")
-	ErrDuplicateLabel       = errors.New("seccomp: duplicate label use")
-	ErrUnsupportedOperation = errors.New("seccomp: unsupported operation for argument")
-)
-
-// Error returns an Action that will be used to send the calling
-// process the specified errno when the syscall is made.
-func Error(code syscall.Errno) Action {
-	return Action(code)
-}
-
-// New returns a new syscall context for use.
-func New() *Context {
-	return &Context{
-		syscalls: make(map[uint32]*Syscall),
-	}
-}
-
-// Context holds syscalls for the current process to limit the type of
-// actions the calling process can make.
-type Context struct {
-	syscalls map[uint32]*Syscall
-}
-
-// Add will add the specified syscall, action, and arguments to the seccomp
-// Context.
-func (c *Context) Add(s *Syscall) {
-	c.syscalls[s.Value] = s
-}
-
-// Remove removes the specified syscall configuration from the Context.
-func (c *Context) Remove(call uint32) {
-	delete(c.syscalls, call)
-}
-
-// Load will apply the Context to the calling process makeing any secccomp process changes
-// apply after the context is loaded.
-func (c *Context) Load() error {
-	filter, err := c.newFilter()
-	if err != nil {
-		return err
-	}
-	if err := prctl(prSetNoNewPrivileges, 1, 0, 0, 0); err != nil {
-		return err
-	}
-	prog := newSockFprog(filter)
-	return prog.set()
-}
-
-func (c *Context) newFilter() ([]sockFilter, error) {
-	var (
-		labels bpfLabels
-		f      = newFilter()
-	)
-	for _, s := range c.syscalls {
-		f.addSyscall(s, &labels)
-	}
-	f.allow()
-	// process args for the syscalls
-	for _, s := range c.syscalls {
-		if err := f.addArguments(s, &labels); err != nil {
-			return nil, err
-		}
-	}
-	// apply labels for arguments
-	idx := int32(len(*f) - 1)
-	for ; idx >= 0; idx-- {
-		lf := &(*f)[idx]
-		if lf.code != (syscall.BPF_JMP + syscall.BPF_JA) {
-			continue
-		}
-		rel := int32(lf.jt)<<8 | int32(lf.jf)
-		if ((jumpJT << 8) | jumpJF) == rel {
-			if labels[lf.k].location == 0xffffffff {
-				return nil, ErrUnresolvedLabel
-			}
-			lf.k = labels[lf.k].location - uint32(idx+1)
-			lf.jt = 0
-			lf.jf = 0
-		} else if ((labelJT << 8) | labelJF) == rel {
-			if labels[lf.k].location != 0xffffffff {
-				return nil, ErrDuplicateLabel
-			}
-			labels[lf.k].location = uint32(idx)
-			lf.k = 0
-			lf.jt = 0
-			lf.jf = 0
-		}
-	}
-	return *f, nil
-}

+ 0 - 118
vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/filter.go

@@ -1,118 +0,0 @@
-// +build linux
-
-package seccomp
-
-import (
-	"fmt"
-	"syscall"
-	"unsafe"
-)
-
-type sockFilter struct {
-	code uint16
-	jt   uint8
-	jf   uint8
-	k    uint32
-}
-
-func newFilter() *filter {
-	var f filter
-	f = append(f, sockFilter{
-		pfLD + syscall.BPF_W + syscall.BPF_ABS,
-		0,
-		0,
-		uint32(unsafe.Offsetof(secData.nr)),
-	})
-	return &f
-}
-
-type filter []sockFilter
-
-func (f *filter) addSyscall(s *Syscall, labels *bpfLabels) {
-	if len(s.Args) == 0 {
-		f.call(s.Value, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction()))
-	} else {
-		if len(s.Args[0]) > 0 {
-			lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[0][0].Index)
-			f.call(s.Value,
-				scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb),
-					jumpJT, jumpJF))
-		}
-	}
-}
-
-func (f *filter) addArguments(s *Syscall, labels *bpfLabels) error {
-	for i := 0; len(s.Args) > i; i++ {
-		if len(s.Args[i]) > 0 {
-			lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[i][0].Index)
-			f.label(labels, lb)
-			f.arg(s.Args[i][0].Index)
-		}
-		for j := 0; j < len(s.Args[i]); j++ {
-			var jf sockFilter
-			if len(s.Args)-1 > i && len(s.Args[i+1]) > 0 {
-				lbj := fmt.Sprintf(labelTemplate, s.Value, s.Args[i+1][0].Index)
-				jf = scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA,
-					labelIndex(labels, lbj), jumpJT, jumpJF)
-			} else {
-				jf = scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction())
-			}
-			if err := f.op(s.Args[i][j].Op, s.Args[i][j].Value, jf); err != nil {
-				return err
-			}
-		}
-		f.allow()
-	}
-	return nil
-}
-
-func (f *filter) label(labels *bpfLabels, lb string) {
-	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), labelJT, labelJF))
-}
-
-func (f *filter) call(nr uint32, jt sockFilter) {
-	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, nr, 0, 1))
-	*f = append(*f, jt)
-}
-
-func (f *filter) allow() {
-	*f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retAllow))
-}
-
-func (f *filter) deny() {
-	*f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retTrap))
-}
-
-func (f *filter) arg(index uint32) {
-	arg(f, index)
-}
-
-func (f *filter) op(operation Operator, v uint, jf sockFilter) error {
-	switch operation {
-	case EqualTo:
-		jumpEqualTo(f, v, jf)
-	case NotEqualTo:
-		jumpNotEqualTo(f, v, jf)
-	case GreatherThan:
-		jumpGreaterThan(f, v, jf)
-	case LessThan:
-		jumpLessThan(f, v, jf)
-	case MaskEqualTo:
-		jumpMaskEqualTo(f, v, jf)
-	default:
-		return ErrUnsupportedOperation
-	}
-	return nil
-}
-
-func arg(f *filter, idx uint32) {
-	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.low(idx)))
-	*f = append(*f, scmpBpfStmt(syscall.BPF_ST, 0))
-	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.hi(idx)))
-	*f = append(*f, scmpBpfStmt(syscall.BPF_ST, 1))
-}
-
-func jump(f *filter, labels *bpfLabels, lb string) {
-	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb),
-		jumpJT, jumpJF))
-}

+ 0 - 68
vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/jump_amd64.go

@@ -1,68 +0,0 @@
-// +build linux,amd64
-
-package seccomp
-
-// Using BPF filters
-//
-// ref: http://www.gsp.com/cgi-bin/man.cgi?topic=bpf
-import "syscall"
-
-func jumpGreaterThan(f *filter, v uint, jt sockFilter) {
-	lo := uint32(uint64(v) % 0x100000000)
-	hi := uint32(uint64(v) / 0x100000000)
-	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 4, 0))
-	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5))
-	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
-	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGE+syscall.BPF_K, (lo), 0, 2))
-	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
-	*f = append(*f, jt)
-	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
-}
-
-func jumpEqualTo(f *filter, v uint, jt sockFilter) {
-	lo := uint32(uint64(v) % 0x100000000)
-	hi := uint32(uint64(v) / 0x100000000)
-	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5))
-	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
-	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (lo), 0, 2))
-	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
-	*f = append(*f, jt)
-	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
-}
-
-func jumpLessThan(f *filter, v uint, jt sockFilter) {
-	lo := uint32(uint64(v) % 0x100000000)
-	hi := uint32(uint64(v) / 0x100000000)
-	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 6, 0))
-	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 3))
-	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
-	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (lo), 2, 0))
-	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
-	*f = append(*f, jt)
-	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
-}
-
-func jumpNotEqualTo(f *filter, v uint, jt sockFilter) {
-	lo := uint32(uint64(v) % 0x100000000)
-	hi := uint32(uint64(v) / 0x100000000)
-	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 5, 0))
-	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
-	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 2, 0))
-	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
-	*f = append(*f, jt)
-	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
-}
-
-// this checks for a value inside a mask. The evalusation is equal to doing
-// CLONE_NEWUSER & syscallMask == CLONE_NEWUSER
-func jumpMaskEqualTo(f *filter, v uint, jt sockFilter) {
-	lo := uint32(uint64(v) % 0x100000000)
-	hi := uint32(uint64(v) / 0x100000000)
-	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 0, 6))
-	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
-	*f = append(*f, scmpBpfStmt(syscall.BPF_ALU+syscall.BPF_AND, uint32(v)))
-	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 0, 2))
-	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
-	*f = append(*f, jt)
-	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
-}

+ 165 - 0
vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go

@@ -0,0 +1,165 @@
+// +build linux,cgo,seccomp
+
+package seccomp
+
+import (
+	"fmt"
+	"log"
+	"syscall"
+
+	"github.com/opencontainers/runc/libcontainer/configs"
+	libseccomp "github.com/seccomp/libseccomp-golang"
+)
+
+var (
+	actAllow = libseccomp.ActAllow
+	actTrap  = libseccomp.ActTrap
+	actKill  = libseccomp.ActKill
+	actErrno = libseccomp.ActErrno.SetReturnCode(int16(syscall.EPERM))
+)
+
+// Filters given syscalls in a container, preventing them from being used
+// Started in the container init process, and carried over to all child processes
+// Setns calls, however, require a separate invocation, as they are not children
+// of the init until they join the namespace
+func InitSeccomp(config *configs.Seccomp) error {
+	if config == nil {
+		return fmt.Errorf("cannot initialize Seccomp - nil config passed")
+	}
+
+	defaultAction, err := getAction(config.DefaultAction)
+	if err != nil {
+		return fmt.Errorf("error initializing seccomp - invalid default action")
+	}
+
+	filter, err := libseccomp.NewFilter(defaultAction)
+	if err != nil {
+		return fmt.Errorf("error creating filter: %s", err)
+	}
+
+	// Unset no new privs bit
+	if err := filter.SetNoNewPrivsBit(false); err != nil {
+		return fmt.Errorf("error setting no new privileges: %s", err)
+	}
+
+	// Add a rule for each syscall
+	for _, call := range config.Syscalls {
+		if call == nil {
+			return fmt.Errorf("encountered nil syscall while initializing Seccomp")
+		}
+
+		if err = matchCall(filter, call); err != nil {
+			return err
+		}
+	}
+
+	if err = filter.Load(); err != nil {
+		return fmt.Errorf("error loading seccomp filter into kernel: %s", err)
+	}
+
+	return nil
+}
+
+// Convert Libcontainer Action to Libseccomp ScmpAction
+func getAction(act configs.Action) (libseccomp.ScmpAction, error) {
+	switch act {
+	case configs.Kill:
+		return actKill, nil
+	case configs.Errno:
+		return actErrno, nil
+	case configs.Trap:
+		return actTrap, nil
+	case configs.Allow:
+		return actAllow, nil
+	default:
+		return libseccomp.ActInvalid, fmt.Errorf("invalid action, cannot use in rule")
+	}
+}
+
+// Convert Libcontainer Operator to Libseccomp ScmpCompareOp
+func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) {
+	switch op {
+	case configs.EqualTo:
+		return libseccomp.CompareEqual, nil
+	case configs.NotEqualTo:
+		return libseccomp.CompareNotEqual, nil
+	case configs.GreaterThan:
+		return libseccomp.CompareGreater, nil
+	case configs.GreaterThanOrEqualTo:
+		return libseccomp.CompareGreaterEqual, nil
+	case configs.LessThan:
+		return libseccomp.CompareLess, nil
+	case configs.LessThanOrEqualTo:
+		return libseccomp.CompareLessOrEqual, nil
+	case configs.MaskEqualTo:
+		return libseccomp.CompareMaskedEqual, nil
+	default:
+		return libseccomp.CompareInvalid, fmt.Errorf("invalid operator, cannot use in rule")
+	}
+}
+
+// Convert Libcontainer Arg to Libseccomp ScmpCondition
+func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) {
+	cond := libseccomp.ScmpCondition{}
+
+	if arg == nil {
+		return cond, fmt.Errorf("cannot convert nil to syscall condition")
+	}
+
+	op, err := getOperator(arg.Op)
+	if err != nil {
+		return cond, err
+	}
+
+	return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo)
+}
+
+// Add a rule to match a single syscall
+func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error {
+	if call == nil || filter == nil {
+		return fmt.Errorf("cannot use nil as syscall to block")
+	}
+
+	if len(call.Name) == 0 {
+		return fmt.Errorf("empty string is not a valid syscall")
+	}
+
+	// If we can't resolve the syscall, assume it's not supported on this kernel
+	// Ignore it, don't error out
+	callNum, err := libseccomp.GetSyscallFromName(call.Name)
+	if err != nil {
+		log.Printf("Error resolving syscall name %s: %s - ignoring syscall.", call.Name, err)
+		return nil
+	}
+
+	// Convert the call's action to the libseccomp equivalent
+	callAct, err := getAction(call.Action)
+	if err != nil {
+		return err
+	}
+
+	// Unconditional match - just add the rule
+	if len(call.Args) == 0 {
+		if err = filter.AddRule(callNum, callAct); err != nil {
+			return err
+		}
+	} else {
+		// Conditional match - convert the per-arg rules into library format
+		conditions := []libseccomp.ScmpCondition{}
+
+		for _, cond := range call.Args {
+			newCond, err := getCondition(cond)
+			if err != nil {
+				return err
+			}
+
+			conditions = append(conditions, newCond)
+		}
+
+		if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}

+ 0 - 124
vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unix.go

@@ -1,124 +0,0 @@
-// +build linux
-
-// Package seccomp provides native seccomp ( https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt ) support for go.
-package seccomp
-
-import (
-	"syscall"
-	"unsafe"
-)
-
-// Operator that is used for argument comparison.
-type Operator int
-
-const (
-	EqualTo Operator = iota
-	NotEqualTo
-	GreatherThan
-	LessThan
-	MaskEqualTo
-)
-
-const (
-	jumpJT  = 0xff
-	jumpJF  = 0xff
-	labelJT = 0xfe
-	labelJF = 0xfe
-)
-
-const (
-	pfLD                 = 0x0
-	retKill              = 0x00000000
-	retTrap              = 0x00030000
-	retAllow             = 0x7fff0000
-	modeFilter           = 0x2
-	prSetNoNewPrivileges = 0x26
-)
-
-func actionErrno(errno uint32) uint32 {
-	return 0x00050000 | (errno & 0x0000ffff)
-}
-
-var (
-	secData = struct {
-		nr         int32
-		arch       uint32
-		insPointer uint64
-		args       [6]uint64
-	}{0, 0, 0, [6]uint64{0, 0, 0, 0, 0, 0}}
-)
-
-var isLittle = func() bool {
-	var (
-		x  = 0x1234
-		p  = unsafe.Pointer(&x)
-		p2 = (*[unsafe.Sizeof(0)]byte)(p)
-	)
-	if p2[0] == 0 {
-		return false
-	}
-	return true
-}()
-
-var endian endianSupport
-
-type endianSupport struct {
-}
-
-func (e endianSupport) hi(i uint32) uint32 {
-	if isLittle {
-		return e.little(i)
-	}
-	return e.big(i)
-}
-
-func (e endianSupport) low(i uint32) uint32 {
-	if isLittle {
-		return e.big(i)
-	}
-	return e.little(i)
-}
-
-func (endianSupport) big(idx uint32) uint32 {
-	if idx >= 6 {
-		return 0
-	}
-	return uint32(unsafe.Offsetof(secData.args)) + 8*idx
-}
-
-func (endianSupport) little(idx uint32) uint32 {
-	if idx < 0 || idx >= 6 {
-		return 0
-	}
-	return uint32(unsafe.Offsetof(secData.args)) +
-		uint32(unsafe.Alignof(secData.args[0]))*idx + uint32(unsafe.Sizeof(secData.arch))
-}
-
-func prctl(option int, arg2, arg3, arg4, arg5 uintptr) error {
-	_, _, err := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0)
-	if err != 0 {
-		return err
-	}
-	return nil
-}
-
-func newSockFprog(filter []sockFilter) *sockFprog {
-	return &sockFprog{
-		len:  uint16(len(filter)),
-		filt: filter,
-	}
-}
-
-type sockFprog struct {
-	len  uint16
-	filt []sockFilter
-}
-
-func (s *sockFprog) set() error {
-	_, _, err := syscall.Syscall(syscall.SYS_PRCTL, uintptr(syscall.PR_SET_SECCOMP),
-		uintptr(modeFilter), uintptr(unsafe.Pointer(s)))
-	if err != 0 {
-		return err
-	}
-	return nil
-}

+ 17 - 1
vendor/src/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go

@@ -1,3 +1,19 @@
-// +build !linux
+// +build !linux !cgo !seccomp
 
 
 package seccomp
 package seccomp
+
+import (
+	"errors"
+
+	"github.com/opencontainers/runc/libcontainer/configs"
+)
+
+var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported")
+
+// Seccomp not supported, do nothing
+func InitSeccomp(config *configs.Seccomp) error {
+	if config != nil {
+		return ErrSeccompNotEnabled
+	}
+	return nil
+}

+ 9 - 0
vendor/src/github.com/opencontainers/runc/libcontainer/setns_init_linux.go

@@ -7,6 +7,7 @@ import (
 
 
 	"github.com/opencontainers/runc/libcontainer/apparmor"
 	"github.com/opencontainers/runc/libcontainer/apparmor"
 	"github.com/opencontainers/runc/libcontainer/label"
 	"github.com/opencontainers/runc/libcontainer/label"
+	"github.com/opencontainers/runc/libcontainer/seccomp"
 	"github.com/opencontainers/runc/libcontainer/system"
 	"github.com/opencontainers/runc/libcontainer/system"
 )
 )
 
 
@@ -20,6 +21,14 @@ func (l *linuxSetnsInit) Init() error {
 	if err := setupRlimits(l.config.Config); err != nil {
 	if err := setupRlimits(l.config.Config); err != nil {
 		return err
 		return err
 	}
 	}
+	if err := setOomScoreAdj(l.config.Config.OomScoreAdj); err != nil {
+		return err
+	}
+	if l.config.Config.Seccomp != nil {
+		if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
+			return err
+		}
+	}
 	if err := finalizeNamespace(l.config); err != nil {
 	if err := finalizeNamespace(l.config); err != nil {
 		return err
 		return err
 	}
 	}

+ 10 - 3
vendor/src/github.com/opencontainers/runc/libcontainer/standard_init_linux.go

@@ -9,6 +9,7 @@ import (
 	"github.com/opencontainers/runc/libcontainer/apparmor"
 	"github.com/opencontainers/runc/libcontainer/apparmor"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/label"
 	"github.com/opencontainers/runc/libcontainer/label"
+	"github.com/opencontainers/runc/libcontainer/seccomp"
 	"github.com/opencontainers/runc/libcontainer/system"
 	"github.com/opencontainers/runc/libcontainer/system"
 )
 )
 
 
@@ -46,6 +47,10 @@ func (l *linuxStandardInit) Init() error {
 	if err := setupRlimits(l.config.Config); err != nil {
 	if err := setupRlimits(l.config.Config); err != nil {
 		return err
 		return err
 	}
 	}
+	if err := setOomScoreAdj(l.config.Config.OomScoreAdj); err != nil {
+		return err
+	}
+
 	label.Init()
 	label.Init()
 	// InitializeMountNamespace() can be executed only for a new mount namespace
 	// InitializeMountNamespace() can be executed only for a new mount namespace
 	if l.config.Config.Namespaces.Contains(configs.NEWNS) {
 	if l.config.Config.Namespaces.Contains(configs.NEWNS) {
@@ -85,6 +90,11 @@ func (l *linuxStandardInit) Init() error {
 	if err != nil {
 	if err != nil {
 		return err
 		return err
 	}
 	}
+	if l.config.Config.Seccomp != nil {
+		if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
+			return err
+		}
+	}
 	if err := finalizeNamespace(l.config); err != nil {
 	if err := finalizeNamespace(l.config); err != nil {
 		return err
 		return err
 	}
 	}
@@ -99,8 +109,5 @@ func (l *linuxStandardInit) Init() error {
 	if syscall.Getppid() != l.parentPid {
 	if syscall.Getppid() != l.parentPid {
 		return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
 		return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
 	}
 	}
-	if err := finalizeSeccomp(l.config); err != nil {
-		return err
-	}
 	return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
 	return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
 }
 }