Bladeren bron

Merge pull request #13893 from mrunalp/update_libcontainer

Updates libcontainer to v2.2.1
Michael Crosby 10 jaren geleden
bovenliggende
commit
010470dd53
36 gewijzigde bestanden met toevoegingen van 930 en 78 verwijderingen
  1. 3 3
      daemon/stats_linux.go
  2. 1 1
      hack/vendor.sh
  3. 1 1
      vendor/src/github.com/docker/libcontainer/README.md
  4. 8 8
      vendor/src/github.com/docker/libcontainer/SPEC.md
  5. 15 3
      vendor/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go
  6. 43 2
      vendor/src/github.com/docker/libcontainer/cgroups/fs/hugetlb.go
  7. 55 17
      vendor/src/github.com/docker/libcontainer/cgroups/fs/memory.go
  8. 25 10
      vendor/src/github.com/docker/libcontainer/cgroups/stats.go
  9. 36 10
      vendor/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go
  10. 32 8
      vendor/src/github.com/docker/libcontainer/cgroups/utils.go
  11. 6 0
      vendor/src/github.com/docker/libcontainer/configs/cgroup.go
  12. 40 1
      vendor/src/github.com/docker/libcontainer/configs/config.go
  13. 2 0
      vendor/src/github.com/docker/libcontainer/configs/config_unix.go
  14. 1 1
      vendor/src/github.com/docker/libcontainer/configs/device_defaults.go
  15. 9 0
      vendor/src/github.com/docker/libcontainer/configs/hugepage_limit.go
  16. 1 1
      vendor/src/github.com/docker/libcontainer/configs/namespaces_unix.go
  17. 13 0
      vendor/src/github.com/docker/libcontainer/console_freebsd.go
  18. 2 2
      vendor/src/github.com/docker/libcontainer/console_linux.go
  19. 2 0
      vendor/src/github.com/docker/libcontainer/devices/devices_unix.go
  20. 1 1
      vendor/src/github.com/docker/libcontainer/devices/number.go
  21. 70 1
      vendor/src/github.com/docker/libcontainer/init_linux.go
  22. 2 0
      vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_armppc64.go
  23. 1 1
      vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_notarm.go
  24. 3 3
      vendor/src/github.com/docker/libcontainer/nsenter/nsexec.c
  25. 1 1
      vendor/src/github.com/docker/libcontainer/rootfs_linux.go
  26. 32 0
      vendor/src/github.com/docker/libcontainer/seccomp/bpf.go
  27. 144 0
      vendor/src/github.com/docker/libcontainer/seccomp/context.go
  28. 116 0
      vendor/src/github.com/docker/libcontainer/seccomp/filter.go
  29. 68 0
      vendor/src/github.com/docker/libcontainer/seccomp/jump_amd64.go
  30. 122 0
      vendor/src/github.com/docker/libcontainer/seccomp/seccomp.go
  31. 3 0
      vendor/src/github.com/docker/libcontainer/standard_init_linux.go
  32. 5 0
      vendor/src/github.com/docker/libcontainer/stats_freebsd.go
  33. 6 2
      vendor/src/github.com/docker/libcontainer/system/setns_linux.go
  34. 1 1
      vendor/src/github.com/docker/libcontainer/system/sysconfig.go
  35. 57 0
      vendor/src/github.com/docker/libcontainer/user/user.go
  36. 3 0
      vendor/src/github.com/docker/libcontainer/utils/utils.go

+ 3 - 3
daemon/stats_linux.go

@@ -52,10 +52,10 @@ func convertStatsToAPITypes(ls *libcontainer.Stats) *types.Stats {
 		}
 		mem := cs.MemoryStats
 		s.MemoryStats = types.MemoryStats{
-			Usage:    mem.Usage,
-			MaxUsage: mem.MaxUsage,
+			Usage:    mem.Usage.Usage,
+			MaxUsage: mem.Usage.MaxUsage,
 			Stats:    mem.Stats,
-			Failcnt:  mem.Failcnt,
+			Failcnt:  mem.Usage.Failcnt,
 		}
 	}
 

+ 1 - 1
hack/vendor.sh

@@ -30,7 +30,7 @@ clone git github.com/hashicorp/consul v0.5.2
 # get distribution packages
 clone git github.com/docker/distribution b9eeb328080d367dbde850ec6e94f1e4ac2b5efe
 
-clone git github.com/docker/libcontainer v2.1.1
+clone git github.com/docker/libcontainer v2.2.1
 # libcontainer deps (see src/github.com/docker/libcontainer/update-vendor.sh)
 clone git github.com/coreos/go-systemd v2
 clone git github.com/godbus/dbus v2

+ 1 - 1
vendor/src/github.com/docker/libcontainer/README.md

@@ -1,4 +1,4 @@
-## libcontainer - reference implementation for containers [![Build Status](https://jenkins.dockerproject.com/buildStatus/icon?job=Libcontainer Master)](https://jenkins.dockerproject.com/job/Libcontainer%20Master/)
+## libcontainer - reference implementation for containers [![Build Status](https://jenkins.dockerproject.org/buildStatus/icon?job=Libcontainer%20Master)](https://jenkins.dockerproject.org/job/Libcontainer%20Master/)
 
 Libcontainer provides a native Go implementation for creating containers
 with namespaces, cgroups, capabilities, and filesystem access controls.

+ 8 - 8
vendor/src/github.com/docker/libcontainer/SPEC.md

@@ -47,14 +47,14 @@ unmount all the mounts that were setup within that namespace.
 For a container to execute properly there are certain filesystems that 
 are required to be mounted within the rootfs that the runtime will setup.
 
-|     Path    |  Type  |                  Flags                 |                 Data                    |
-| ----------- | ------ | -------------------------------------- | --------------------------------------- |
-| /proc       | proc   | MS_NOEXEC,MS_NOSUID,MS_NODEV           |                                         |
-| /dev        | tmpfs  | MS_NOEXEC,MS_STRICTATIME               | mode=755                                |
-| /dev/shm    | shm    | MS_NOEXEC,MS_NOSUID,MS_NODEV           | mode=1777,size=65536k                   |
-| /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV           |                                         |
-| /dev/pts    | devpts | MS_NOEXEC,MS_NOSUID                    | newinstance,ptmxmode=0666,mode=620,gid5 |
-| /sys        | sysfs  | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY |                                         |
+|     Path    |  Type  |                  Flags                 |                 Data                     |
+| ----------- | ------ | -------------------------------------- | ---------------------------------------- |
+| /proc       | proc   | MS_NOEXEC,MS_NOSUID,MS_NODEV           |                                          |
+| /dev        | tmpfs  | MS_NOEXEC,MS_STRICTATIME               | mode=755                                 |
+| /dev/shm    | tmpfs  | MS_NOEXEC,MS_NOSUID,MS_NODEV           | mode=1777,size=65536k                    |
+| /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV           |                                          |
+| /dev/pts    | devpts | MS_NOEXEC,MS_NOSUID                    | newinstance,ptmxmode=0666,mode=620,gid=5 |
+| /sys        | sysfs  | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY |                                          |
 
 
 After a container's filesystems are mounted within the newly created 

+ 15 - 3
vendor/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go

@@ -30,6 +30,7 @@ var (
 		"freezer":    &FreezerGroup{},
 	}
 	CgroupProcesses = "cgroup.procs"
+	HugePageSizes, _ = cgroups.GetHugePageSize()
 )
 
 type subsystem interface {
@@ -44,6 +45,7 @@ type subsystem interface {
 }
 
 type Manager struct {
+	mu      sync.Mutex
 	Cgroups *configs.Cgroup
 	Paths   map[string]string
 }
@@ -82,7 +84,6 @@ type data struct {
 }
 
 func (m *Manager) Apply(pid int) error {
-
 	if m.Cgroups == nil {
 		return nil
 	}
@@ -128,14 +129,25 @@ func (m *Manager) Apply(pid int) error {
 }
 
 func (m *Manager) Destroy() error {
-	return cgroups.RemovePaths(m.Paths)
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	if err := cgroups.RemovePaths(m.Paths); err != nil {
+		return err
+	}
+	m.Paths = make(map[string]string)
+	return nil
 }
 
 func (m *Manager) GetPaths() map[string]string {
-	return m.Paths
+	m.mu.Lock()
+	paths := m.Paths
+	m.mu.Unlock()
+	return paths
 }
 
 func (m *Manager) GetStats() (*cgroups.Stats, error) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
 	stats := cgroups.NewStats()
 	for name, path := range m.Paths {
 		sys, ok := subsystems[name]

+ 43 - 2
vendor/src/github.com/docker/libcontainer/cgroups/fs/hugetlb.go

@@ -3,6 +3,10 @@
 package fs
 
 import (
+	"fmt"
+	"strconv"
+	"strings"
+
 	"github.com/docker/libcontainer/cgroups"
 	"github.com/docker/libcontainer/configs"
 )
@@ -11,14 +15,25 @@ type HugetlbGroup struct {
 }
 
 func (s *HugetlbGroup) Apply(d *data) error {
-	// we just want to join this group even though we don't set anything
-	if _, err := d.join("hugetlb"); err != nil && !cgroups.IsNotFound(err) {
+	dir, err := d.join("hugetlb")
+	if err != nil && !cgroups.IsNotFound(err) {
+		return err
+	}
+
+	if err := s.Set(dir, d.c); err != nil {
 		return err
 	}
+
 	return nil
 }
 
 func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error {
+	for _, hugetlb := range cgroup.HugetlbLimit {
+		if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.Itoa(hugetlb.Limit)); err != nil {
+			return err
+		}
+	}
+
 	return nil
 }
 
@@ -27,5 +42,31 @@ func (s *HugetlbGroup) Remove(d *data) error {
 }
 
 func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
+	hugetlbStats := cgroups.HugetlbStats{}
+	for _, pageSize := range HugePageSizes {
+		usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".")
+		value, err := getCgroupParamUint(path, usage)
+		if err != nil {
+			return fmt.Errorf("failed to parse %s - %v", usage, err)
+		}
+		hugetlbStats.Usage = value
+
+		maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".")
+		value, err = getCgroupParamUint(path, maxUsage)
+		if err != nil {
+			return fmt.Errorf("failed to parse %s - %v", maxUsage, err)
+		}
+		hugetlbStats.MaxUsage = value
+
+		failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".")
+		value, err = getCgroupParamUint(path, failcnt)
+		if err != nil {
+			return fmt.Errorf("failed to parse %s - %v", failcnt, err)
+		}
+		hugetlbStats.Failcnt = value
+
+		stats.HugetlbStats[pageSize] = hugetlbStats
+	}
+
 	return nil
 }

+ 55 - 17
vendor/src/github.com/docker/libcontainer/cgroups/fs/memory.go

@@ -8,6 +8,7 @@ import (
 	"os"
 	"path/filepath"
 	"strconv"
+	"strings"
 
 	"github.com/docker/libcontainer/cgroups"
 	"github.com/docker/libcontainer/configs"
@@ -45,12 +46,6 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
 			return err
 		}
 	}
-	// By default, MemorySwap is set to twice the size of Memory.
-	if cgroup.MemorySwap == 0 && cgroup.Memory != 0 {
-		if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Memory*2, 10)); err != nil {
-			return err
-		}
-	}
 	if cgroup.MemorySwap > 0 {
 		if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.MemorySwap, 10)); err != nil {
 			return err
@@ -67,6 +62,11 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
 			return err
 		}
 	}
+	if cgroup.MemorySwappiness >= 0 && cgroup.MemorySwappiness <= 100 {
+		if err := writeFile(path, "memory.swappiness", strconv.FormatInt(cgroup.MemorySwappiness, 10)); err != nil {
+			return err
+		}
+	}
 
 	return nil
 }
@@ -94,24 +94,62 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
 		}
 		stats.MemoryStats.Stats[t] = v
 	}
+	stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"]
 
-	// Set memory usage and max historical usage.
-	value, err := getCgroupParamUint(path, "memory.usage_in_bytes")
+	memoryUsage, err := getMemoryData(path, "")
 	if err != nil {
-		return fmt.Errorf("failed to parse memory.usage_in_bytes - %v", err)
+		return err
 	}
-	stats.MemoryStats.Usage = value
-	stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"]
-	value, err = getCgroupParamUint(path, "memory.max_usage_in_bytes")
+	stats.MemoryStats.Usage = memoryUsage
+	swapUsage, err := getMemoryData(path, "memsw")
 	if err != nil {
-		return fmt.Errorf("failed to parse memory.max_usage_in_bytes - %v", err)
+		return err
 	}
-	stats.MemoryStats.MaxUsage = value
-	value, err = getCgroupParamUint(path, "memory.failcnt")
+	stats.MemoryStats.SwapUsage = swapUsage
+	kernelUsage, err := getMemoryData(path, "kmem")
 	if err != nil {
-		return fmt.Errorf("failed to parse memory.failcnt - %v", err)
+		return err
 	}
-	stats.MemoryStats.Failcnt = value
+	stats.MemoryStats.KernelUsage = kernelUsage
 
 	return nil
 }
+
+func getMemoryData(path, name string) (cgroups.MemoryData, error) {
+	memoryData := cgroups.MemoryData{}
+
+	moduleName := "memory"
+	if name != "" {
+		moduleName = strings.Join([]string{"memory", name}, ".")
+	}
+	usage := strings.Join([]string{moduleName, "usage_in_bytes"}, ".")
+	maxUsage := strings.Join([]string{moduleName, "max_usage_in_bytes"}, ".")
+	failcnt := strings.Join([]string{moduleName, "failcnt"}, ".")
+
+	value, err := getCgroupParamUint(path, usage)
+	if err != nil {
+		if moduleName != "memory" && os.IsNotExist(err) {
+			return cgroups.MemoryData{}, nil
+		}
+		return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err)
+	}
+	memoryData.Usage = value
+	value, err = getCgroupParamUint(path, maxUsage)
+	if err != nil {
+		if moduleName != "memory" && os.IsNotExist(err) {
+			return cgroups.MemoryData{}, nil
+		}
+		return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err)
+	}
+	memoryData.MaxUsage = value
+	value, err = getCgroupParamUint(path, failcnt)
+	if err != nil {
+		if moduleName != "memory" && os.IsNotExist(err) {
+			return cgroups.MemoryData{}, nil
+		}
+		return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err)
+	}
+	memoryData.Failcnt = value
+
+	return memoryData, nil
+}

+ 25 - 10
vendor/src/github.com/docker/libcontainer/cgroups/stats.go

@@ -32,18 +32,21 @@ type CpuStats struct {
 	ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
 }
 
+type MemoryData struct {
+	Usage    uint64 `json:"usage,omitempty"`
+	MaxUsage uint64 `json:"max_usage,omitempty"`
+	Failcnt  uint64 `json:"failcnt"`
+}
 type MemoryStats struct {
-	// current res_counter usage for memory
-	Usage uint64 `json:"usage,omitempty"`
 	// memory used for cache
 	Cache uint64 `json:"cache,omitempty"`
-	// maximum usage ever recorded.
-	MaxUsage uint64 `json:"max_usage,omitempty"`
-	// TODO(vishh): Export these as stronger types.
-	// all the stats exported via memory.stat.
-	Stats map[string]uint64 `json:"stats,omitempty"`
-	// number of times memory usage hits limits.
-	Failcnt uint64 `json:"failcnt"`
+	// usage of memory
+	Usage MemoryData `json:"usage,omitempty"`
+	// usage of memory + swap
+	SwapUsage MemoryData `json:"swap_usage,omitempty"`
+	// usafe of kernel memory
+	KernelUsage MemoryData        `json:"kernel_usage,omitempty"`
+	Stats       map[string]uint64 `json:"stats,omitempty"`
 }
 
 type BlkioStatEntry struct {
@@ -65,13 +68,25 @@ type BlkioStats struct {
 	SectorsRecursive        []BlkioStatEntry `json:"sectors_recursive,omitempty"`
 }
 
+type HugetlbStats struct {
+	// current res_counter usage for hugetlb
+	Usage uint64 `json:"usage,omitempty"`
+	// maximum usage ever recorded.
+	MaxUsage uint64 `json:"max_usage,omitempty"`
+	// number of times htgetlb usage allocation failure.
+	Failcnt uint64 `json:"failcnt"`
+}
+
 type Stats struct {
 	CpuStats    CpuStats    `json:"cpu_stats,omitempty"`
 	MemoryStats MemoryStats `json:"memory_stats,omitempty"`
 	BlkioStats  BlkioStats  `json:"blkio_stats,omitempty"`
+	// the map is in the format "size of hugepage: stats of the hugepage"
+	HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
 }
 
 func NewStats() *Stats {
 	memoryStats := MemoryStats{Stats: make(map[string]uint64)}
-	return &Stats{MemoryStats: memoryStats}
+	hugetlbStats := make(map[string]HugetlbStats)
+	return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats}
 }

+ 36 - 10
vendor/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go

@@ -20,6 +20,7 @@ import (
 )
 
 type Manager struct {
+	mu      sync.Mutex
 	Cgroups *configs.Cgroup
 	Paths   map[string]string
 }
@@ -222,6 +223,9 @@ func (m *Manager) Apply(pid int) error {
 		return err
 	}
 
+	if err := joinHugetlb(c, pid); err != nil {
+		return err
+	}
 	// FIXME: Systemd does have `BlockIODeviceWeight` property, but we got problem
 	// using that (at least on systemd 208, see https://github.com/docker/libcontainer/pull/354),
 	// so use fs work around for now.
@@ -253,11 +257,21 @@ func (m *Manager) Apply(pid int) error {
 }
 
 func (m *Manager) Destroy() error {
-	return cgroups.RemovePaths(m.Paths)
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	theConn.StopUnit(getUnitName(m.Cgroups), "replace")
+	if err := cgroups.RemovePaths(m.Paths); err != nil {
+		return err
+	}
+	m.Paths = make(map[string]string)
+	return nil
 }
 
 func (m *Manager) GetPaths() map[string]string {
-	return m.Paths
+	m.mu.Lock()
+	paths := m.Paths
+	m.mu.Unlock()
+	return paths
 }
 
 func writeFile(dir, file, data string) error {
@@ -391,6 +405,8 @@ func (m *Manager) GetPids() ([]int, error) {
 }
 
 func (m *Manager) GetStats() (*cgroups.Stats, error) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
 	stats := cgroups.NewStats()
 	for name, path := range m.Paths {
 		sys, ok := subsystems[name]
@@ -453,14 +469,8 @@ func joinMemory(c *configs.Cgroup, pid int) error {
 	}
 
 	// -1 disables memoryswap
-	if c.Memory != 0 && c.MemorySwap >= 0 {
-		memorySwap := c.MemorySwap
-
-		if memorySwap == 0 {
-			// By default, MemorySwap is set to twice the size of RAM.
-			memorySwap = c.Memory * 2
-		}
-		err = writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(memorySwap, 10))
+	if c.MemorySwap > 0 {
+		err = writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.MemorySwap, 10))
 		if err != nil {
 			return err
 		}
@@ -472,6 +482,12 @@ func joinMemory(c *configs.Cgroup, pid int) error {
 			return err
 		}
 	}
+	if c.MemorySwappiness >= 0 && c.MemorySwappiness <= 100 {
+		err = writeFile(path, "memory.swappiness", strconv.FormatInt(c.MemorySwappiness, 10))
+		if err != nil {
+			return err
+		}
+	}
 
 	return nil
 }
@@ -526,3 +542,13 @@ func joinBlkio(c *configs.Cgroup, pid int) error {
 
 	return nil
 }
+
+func joinHugetlb(c *configs.Cgroup, pid int) error {
+	path, err := join(c, "hugetlb", pid)
+	if err != nil && !cgroups.IsNotFound(err) {
+		return err
+	}
+
+	hugetlb := subsystems["hugetlb"]
+	return hugetlb.Set(path, c)
+}

+ 32 - 8
vendor/src/github.com/docker/libcontainer/cgroups/utils.go

@@ -14,24 +14,28 @@ import (
 	"time"
 
 	"github.com/docker/docker/pkg/mount"
+	"github.com/docker/docker/pkg/units"
 )
 
 // https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
 func FindCgroupMountpoint(subsystem string) (string, error) {
-	mounts, err := mount.GetMounts()
+	f, err := os.Open("/proc/self/mountinfo")
 	if err != nil {
 		return "", err
 	}
-
-	for _, mount := range mounts {
-		if mount.Fstype == "cgroup" {
-			for _, opt := range strings.Split(mount.VfsOpts, ",") {
-				if opt == subsystem {
-					return mount.Mountpoint, nil
-				}
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		txt := scanner.Text()
+		fields := strings.Split(txt, " ")
+		for _, opt := range strings.Split(fields[len(fields)-1], ",") {
+			if opt == subsystem {
+				return fields[4], nil
 			}
 		}
 	}
+	if err := scanner.Err(); err != nil {
+		return "", err
+	}
 
 	return "", NewNotFoundError(subsystem)
 }
@@ -238,3 +242,23 @@ func RemovePaths(paths map[string]string) (err error) {
 	}
 	return fmt.Errorf("Failed to remove paths: %s", paths)
 }
+
+func GetHugePageSize() ([]string, error) {
+	var pageSizes []string
+	sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"}
+	files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
+	if err != nil {
+		return pageSizes, err
+	}
+	for _, st := range files {
+		nameArray := strings.Split(st.Name(), "-")
+		pageSize, err := units.RAMInBytes(nameArray[1])
+		if err != nil {
+			return []string{}, err
+		}
+		sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList)
+		pageSizes = append(pageSizes, sizeString)
+	}
+
+	return pageSizes, nil
+}

+ 6 - 0
vendor/src/github.com/docker/libcontainer/configs/cgroup.go

@@ -78,12 +78,18 @@ type Cgroup struct {
 	// set the freeze value for the process
 	Freezer FreezerState `json:"freezer"`
 
+	// Hugetlb limit (in bytes)
+	HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"`
+
 	// Parent slice to use for systemd TODO: remove in favor or parent
 	Slice string `json:"slice"`
 
 	// Whether to disable OOM Killer
 	OomKillDisable bool `json:"oom_kill_disable"`
 
+	// Tuning swappiness behaviour per cgroup
+	MemorySwappiness int64 `json:"memory_swappiness"`
+
 	// Set priority of network traffic for container
 	NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`
 

+ 40 - 1
vendor/src/github.com/docker/libcontainer/configs/config.go

@@ -13,6 +13,40 @@ type IDMap struct {
 	Size        int `json:"size"`
 }
 
+type Seccomp struct {
+	Syscalls []*Syscall `json:"syscalls"`
+}
+
+type Action int
+
+const (
+	Kill Action = iota - 3
+	Trap
+	Allow
+)
+
+type Operator int
+
+const (
+	EqualTo Operator = iota
+	NotEqualTo
+	GreatherThan
+	LessThan
+	MaskEqualTo
+)
+
+type Arg struct {
+	Index int      `json:"index"`
+	Value uint32   `json:"value"`
+	Op    Operator `json:"op"`
+}
+
+type Syscall struct {
+	Value  int    `json:"value"`
+	Action Action `json:"action"`
+	Args   []*Arg `json:"args"`
+}
+
 // TODO Windows. Many of these fields should be factored out into those parts
 // which are common across platforms, and those which are platform specific.
 
@@ -85,7 +119,7 @@ type Config struct {
 
 	// AdditionalGroups specifies the gids that should be added to supplementary groups
 	// in addition to those that the user belongs to.
-	AdditionalGroups []int `json:"additional_groups"`
+	AdditionalGroups []string `json:"additional_groups"`
 
 	// UidMappings is an array of User ID mappings for User Namespaces
 	UidMappings []IDMap `json:"uid_mappings"`
@@ -104,4 +138,9 @@ type Config struct {
 	// SystemProperties is a map of properties and their values. It is the equivalent of using
 	// sysctl -w my.property.name value in Linux.
 	SystemProperties map[string]string `json:"system_properties"`
+
+	// Seccomp allows actions to be taken whenever a syscall is made within the container.
+	// By default, all syscalls are allowed with actions to allow, trap, kill, or return an errno
+	// can be specified on a per syscall basis.
+	Seccomp *Seccomp `json:"seccomp"`
 }

+ 2 - 0
vendor/src/github.com/docker/libcontainer/configs/config_linux.go → vendor/src/github.com/docker/libcontainer/configs/config_unix.go

@@ -1,3 +1,5 @@
+// +build freebsd linux
+
 package configs
 
 import "fmt"

+ 1 - 1
vendor/src/github.com/docker/libcontainer/configs/device_defaults.go

@@ -1,4 +1,4 @@
-// +build linux
+// +build linux freebsd
 
 package configs
 

+ 9 - 0
vendor/src/github.com/docker/libcontainer/configs/hugepage_limit.go

@@ -0,0 +1,9 @@
+package configs
+
+type HugepageLimit struct {
+	// which type of hugepage to limit.
+	Pagesize string `json:"page_size"`
+
+	// usage limit for hugepage.
+	Limit int `json:"limit"`
+}

+ 1 - 1
vendor/src/github.com/docker/libcontainer/configs/namespaces_linux.go → vendor/src/github.com/docker/libcontainer/configs/namespaces_unix.go

@@ -1,4 +1,4 @@
-// +build linux
+// +build linux freebsd
 
 package configs
 

+ 13 - 0
vendor/src/github.com/docker/libcontainer/console_freebsd.go

@@ -0,0 +1,13 @@
+// +build freebsd
+
+package libcontainer
+
+import (
+	"errors"
+)
+
+// newConsole returns an initalized console that can be used within a container by copying bytes
+// from the master side to the slave that is attached as the tty for the container's init process.
+func newConsole(uid, gid int) (Console, error) {
+	return nil, errors.New("libcontainer console is not supported on FreeBSD")
+}

+ 2 - 2
vendor/src/github.com/docker/libcontainer/console_linux.go

@@ -92,7 +92,7 @@ func (c *linuxConsole) mount(rootfs, mountLabel string, uid, gid int) error {
 	return syscall.Mount(c.slavePath, dest, "bind", syscall.MS_BIND, "")
 }
 
-// dupStdio opens the slavePath for the console and dup2s the fds to the current
+// dupStdio opens the slavePath for the console and dups the fds to the current
 // processes stdio, fd 0,1,2.
 func (c *linuxConsole) dupStdio() error {
 	slave, err := c.open(syscall.O_RDWR)
@@ -101,7 +101,7 @@ func (c *linuxConsole) dupStdio() error {
 	}
 	fd := int(slave.Fd())
 	for _, i := range []int{0, 1, 2} {
-		if err := syscall.Dup2(fd, i); err != nil {
+		if err := syscall.Dup3(fd, i, 0); err != nil {
 			return err
 		}
 	}

+ 2 - 0
vendor/src/github.com/docker/libcontainer/devices/devices_linux.go → vendor/src/github.com/docker/libcontainer/devices/devices_unix.go

@@ -1,3 +1,5 @@
+// +build linux freebsd
+
 package devices
 
 import (

+ 1 - 1
vendor/src/github.com/docker/libcontainer/devices/number.go

@@ -1,4 +1,4 @@
-// +build linux
+// +build linux freebsd
 
 package devices
 

+ 70 - 1
vendor/src/github.com/docker/libcontainer/init_linux.go

@@ -13,6 +13,7 @@ import (
 	"github.com/docker/libcontainer/cgroups"
 	"github.com/docker/libcontainer/configs"
 	"github.com/docker/libcontainer/netlink"
+	"github.com/docker/libcontainer/seccomp"
 	"github.com/docker/libcontainer/system"
 	"github.com/docker/libcontainer/user"
 	"github.com/docker/libcontainer/utils"
@@ -176,10 +177,20 @@ func setupUser(config *initConfig) error {
 	if err != nil {
 		return err
 	}
-	suppGroups := append(execUser.Sgids, config.Config.AdditionalGroups...)
+
+	var addGroups []int
+	if len(config.Config.AdditionalGroups) > 0 {
+		addGroups, err = user.GetAdditionalGroupsPath(config.Config.AdditionalGroups, groupPath)
+		if err != nil {
+			return err
+		}
+	}
+
+	suppGroups := append(execUser.Sgids, addGroups...)
 	if err := syscall.Setgroups(suppGroups); err != nil {
 		return err
 	}
+
 	if err := system.Setgid(execUser.Gid); err != nil {
 		return err
 	}
@@ -259,3 +270,61 @@ func killCgroupProcesses(m cgroups.Manager) error {
 	}
 	return nil
 }
+
+func finalizeSeccomp(config *initConfig) error {
+	if config.Config.Seccomp == nil {
+		return nil
+	}
+	context := seccomp.New()
+	for _, s := range config.Config.Seccomp.Syscalls {
+		ss := &seccomp.Syscall{
+			Value:  uint32(s.Value),
+			Action: seccompAction(s.Action),
+		}
+		if len(s.Args) > 0 {
+			ss.Args = seccompArgs(s.Args)
+		}
+		context.Add(ss)
+	}
+	return context.Load()
+}
+
+func seccompAction(a configs.Action) seccomp.Action {
+	switch a {
+	case configs.Kill:
+		return seccomp.Kill
+	case configs.Trap:
+		return seccomp.Trap
+	case configs.Allow:
+		return seccomp.Allow
+	}
+	return seccomp.Error(syscall.Errno(int(a)))
+}
+
+func seccompArgs(args []*configs.Arg) seccomp.Args {
+	var sa []seccomp.Arg
+	for _, a := range args {
+		sa = append(sa, seccomp.Arg{
+			Index: uint32(a.Index),
+			Op:    seccompOperator(a.Op),
+			Value: uint(a.Value),
+		})
+	}
+	return seccomp.Args{sa}
+}
+
+func seccompOperator(o configs.Operator) seccomp.Operator {
+	switch o {
+	case configs.EqualTo:
+		return seccomp.EqualTo
+	case configs.NotEqualTo:
+		return seccomp.NotEqualTo
+	case configs.GreatherThan:
+		return seccomp.GreatherThan
+	case configs.LessThan:
+		return seccomp.LessThan
+	case configs.MaskEqualTo:
+		return seccomp.MaskEqualTo
+	}
+	return 0
+}

+ 2 - 0
vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_arm.go → vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_armppc64.go

@@ -1,3 +1,5 @@
+// +build arm ppc64 ppc64le
+
 package netlink
 
 func ifrDataByte(b byte) uint8 {

+ 1 - 1
vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_notarm.go

@@ -1,4 +1,4 @@
-// +build !arm
+// +build !arm,!ppc64,!ppc64le
 
 package netlink
 

+ 3 - 3
vendor/src/github.com/docker/libcontainer/nsenter/nsexec.c

@@ -148,15 +148,15 @@ void nsexec()
 				pr_perror("ioctl TIOCSCTTY failed");
 				exit(1);
 			}
-			if (dup2(consolefd, STDIN_FILENO) != STDIN_FILENO) {
+			if (dup3(consolefd, STDIN_FILENO, 0) != STDIN_FILENO) {
 				pr_perror("Failed to dup 0");
 				exit(1);
 			}
-			if (dup2(consolefd, STDOUT_FILENO) != STDOUT_FILENO) {
+			if (dup3(consolefd, STDOUT_FILENO, 0) != STDOUT_FILENO) {
 				pr_perror("Failed to dup 1");
 				exit(1);
 			}
-			if (dup2(consolefd, STDERR_FILENO) != STDERR_FILENO) {
+			if (dup3(consolefd, STDERR_FILENO, 0) != STDERR_FILENO) {
 				pr_perror("Failed to dup 2");
 				exit(1);
 			}

+ 1 - 1
vendor/src/github.com/docker/libcontainer/rootfs_linux.go

@@ -272,7 +272,7 @@ func reOpenDevNull(rootfs string) error {
 		}
 		if stat.Rdev == devNullStat.Rdev {
 			// Close and re-open the fd.
-			if err := syscall.Dup2(int(file.Fd()), fd); err != nil {
+			if err := syscall.Dup3(int(file.Fd()), fd, 0); err != nil {
 				return err
 			}
 		}

+ 32 - 0
vendor/src/github.com/docker/libcontainer/seccomp/bpf.go

@@ -0,0 +1,32 @@
+package seccomp
+
+import "strings"
+
+type bpfLabel struct {
+	label    string
+	location uint32
+}
+
+type bpfLabels []bpfLabel
+
+// labelIndex returns the index for the label if it exists in the slice.
+// if it does not exist in the slice it appends the label lb to the end
+// of the slice and returns the index.
+func labelIndex(labels *bpfLabels, lb string) uint32 {
+	var id uint32
+	for id = 0; id < uint32(len(*labels)); id++ {
+		if strings.EqualFold(lb, (*labels)[id].label) {
+			return id
+		}
+	}
+	*labels = append(*labels, bpfLabel{lb, 0xffffffff})
+	return id
+}
+
+func scmpBpfStmt(code uint16, k uint32) sockFilter {
+	return sockFilter{code, 0, 0, k}
+}
+
+func scmpBpfJump(code uint16, k uint32, jt, jf uint8) sockFilter {
+	return sockFilter{code, jt, jf, k}
+}

+ 144 - 0
vendor/src/github.com/docker/libcontainer/seccomp/context.go

@@ -0,0 +1,144 @@
+package seccomp
+
+import (
+	"errors"
+	"syscall"
+)
+
+const labelTemplate = "lb-%d-%d"
+
+// Action is the type of action that will be taken when a
+// syscall is performed.
+type Action int
+
+const (
+	Kill  Action = iota - 3 // Kill the calling process of the syscall.
+	Trap                    // Trap and coredump the calling process of the syscall.
+	Allow                   // Allow the syscall to be completed.
+)
+
+// Syscall is the specified syscall, action, and any type of arguments
+// to filter on.
+type Syscall struct {
+	// Value is the syscall number.
+	Value uint32
+	// Action is the action to perform when the specified syscall is made.
+	Action Action
+	// Args are filters that can be specified on the arguments to the syscall.
+	Args Args
+}
+
+func (s *Syscall) scmpAction() uint32 {
+	switch s.Action {
+	case Allow:
+		return retAllow
+	case Trap:
+		return retTrap
+	case Kill:
+		return retKill
+	}
+	return actionErrno(uint32(s.Action))
+}
+
+// Arg represents an argument to the syscall with the argument's index,
+// the operator to apply when matching, and the argument's value at that time.
+type Arg struct {
+	Index uint32   // index of args which start from zero
+	Op    Operator // operation, such as EQ/NE/GE/LE
+	Value uint     // the value of arg
+}
+
+type Args [][]Arg
+
+var (
+	ErrUnresolvedLabel      = errors.New("seccomp: unresolved label")
+	ErrDuplicateLabel       = errors.New("seccomp: duplicate label use")
+	ErrUnsupportedOperation = errors.New("seccomp: unsupported operation for argument")
+)
+
+// Error returns an Action that will be used to send the calling
+// process the specified errno when the syscall is made.
+func Error(code syscall.Errno) Action {
+	return Action(code)
+}
+
+// New returns a new syscall context for use.
+func New() *Context {
+	return &Context{
+		syscalls: make(map[uint32]*Syscall),
+	}
+}
+
+// Context holds syscalls for the current process to limit the type of
+// actions the calling process can make.
+type Context struct {
+	syscalls map[uint32]*Syscall
+}
+
+// Add will add the specified syscall, action, and arguments to the seccomp
+// Context.
+func (c *Context) Add(s *Syscall) {
+	c.syscalls[s.Value] = s
+}
+
+// Remove removes the specified syscall configuration from the Context.
+func (c *Context) Remove(call uint32) {
+	delete(c.syscalls, call)
+}
+
+// Load will apply the Context to the calling process makeing any secccomp process changes
+// apply after the context is loaded.
+func (c *Context) Load() error {
+	filter, err := c.newFilter()
+	if err != nil {
+		return err
+	}
+	if err := prctl(prSetNoNewPrivileges, 1, 0, 0, 0); err != nil {
+		return err
+	}
+	prog := newSockFprog(filter)
+	return prog.set()
+}
+
+func (c *Context) newFilter() ([]sockFilter, error) {
+	var (
+		labels bpfLabels
+		f      = newFilter()
+	)
+	for _, s := range c.syscalls {
+		f.addSyscall(s, &labels)
+	}
+	f.allow()
+	// process args for the syscalls
+	for _, s := range c.syscalls {
+		if err := f.addArguments(s, &labels); err != nil {
+			return nil, err
+		}
+	}
+	// apply labels for arguments
+	idx := int32(len(*f) - 1)
+	for ; idx >= 0; idx-- {
+		lf := &(*f)[idx]
+		if lf.code != (syscall.BPF_JMP + syscall.BPF_JA) {
+			continue
+		}
+		rel := int32(lf.jt)<<8 | int32(lf.jf)
+		if ((jumpJT << 8) | jumpJF) == rel {
+			if labels[lf.k].location == 0xffffffff {
+				return nil, ErrUnresolvedLabel
+			}
+			lf.k = labels[lf.k].location - uint32(idx+1)
+			lf.jt = 0
+			lf.jf = 0
+		} else if ((labelJT << 8) | labelJF) == rel {
+			if labels[lf.k].location != 0xffffffff {
+				return nil, ErrDuplicateLabel
+			}
+			labels[lf.k].location = uint32(idx)
+			lf.k = 0
+			lf.jt = 0
+			lf.jf = 0
+		}
+	}
+	return *f, nil
+}

+ 116 - 0
vendor/src/github.com/docker/libcontainer/seccomp/filter.go

@@ -0,0 +1,116 @@
+package seccomp
+
+import (
+	"fmt"
+	"syscall"
+	"unsafe"
+)
+
+type sockFilter struct {
+	code uint16
+	jt   uint8
+	jf   uint8
+	k    uint32
+}
+
+func newFilter() *filter {
+	var f filter
+	f = append(f, sockFilter{
+		pfLD + syscall.BPF_W + syscall.BPF_ABS,
+		0,
+		0,
+		uint32(unsafe.Offsetof(secData.nr)),
+	})
+	return &f
+}
+
+type filter []sockFilter
+
+func (f *filter) addSyscall(s *Syscall, labels *bpfLabels) {
+	if len(s.Args) == 0 {
+		f.call(s.Value, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction()))
+	} else {
+		if len(s.Args[0]) > 0 {
+			lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[0][0].Index)
+			f.call(s.Value,
+				scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb),
+					jumpJT, jumpJF))
+		}
+	}
+}
+
+func (f *filter) addArguments(s *Syscall, labels *bpfLabels) error {
+	for i := 0; len(s.Args) > i; i++ {
+		if len(s.Args[i]) > 0 {
+			lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[i][0].Index)
+			f.label(labels, lb)
+			f.arg(s.Args[i][0].Index)
+		}
+		for j := 0; j < len(s.Args[i]); j++ {
+			var jf sockFilter
+			if len(s.Args)-1 > i && len(s.Args[i+1]) > 0 {
+				lbj := fmt.Sprintf(labelTemplate, s.Value, s.Args[i+1][0].Index)
+				jf = scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA,
+					labelIndex(labels, lbj), jumpJT, jumpJF)
+			} else {
+				jf = scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction())
+			}
+			if err := f.op(s.Args[i][j].Op, s.Args[i][j].Value, jf); err != nil {
+				return err
+			}
+		}
+		f.allow()
+	}
+	return nil
+}
+
+func (f *filter) label(labels *bpfLabels, lb string) {
+	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), labelJT, labelJF))
+}
+
+func (f *filter) call(nr uint32, jt sockFilter) {
+	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, nr, 0, 1))
+	*f = append(*f, jt)
+}
+
+func (f *filter) allow() {
+	*f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retAllow))
+}
+
+func (f *filter) deny() {
+	*f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retTrap))
+}
+
+func (f *filter) arg(index uint32) {
+	arg(f, index)
+}
+
+func (f *filter) op(operation Operator, v uint, jf sockFilter) error {
+	switch operation {
+	case EqualTo:
+		jumpEqualTo(f, v, jf)
+	case NotEqualTo:
+		jumpNotEqualTo(f, v, jf)
+	case GreatherThan:
+		jumpGreaterThan(f, v, jf)
+	case LessThan:
+		jumpLessThan(f, v, jf)
+	case MaskEqualTo:
+		jumpMaskEqualTo(f, v, jf)
+	default:
+		return ErrUnsupportedOperation
+	}
+	return nil
+}
+
+func arg(f *filter, idx uint32) {
+	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.low(idx)))
+	*f = append(*f, scmpBpfStmt(syscall.BPF_ST, 0))
+	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.hi(idx)))
+	*f = append(*f, scmpBpfStmt(syscall.BPF_ST, 1))
+}
+
+func jump(f *filter, labels *bpfLabels, lb string) {
+	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb),
+		jumpJT, jumpJF))
+}

+ 68 - 0
vendor/src/github.com/docker/libcontainer/seccomp/jump_amd64.go

@@ -0,0 +1,68 @@
+// +build linux,amd64
+
+package seccomp
+
+// Using BPF filters
+//
+// ref: http://www.gsp.com/cgi-bin/man.cgi?topic=bpf
+import "syscall"
+
+func jumpGreaterThan(f *filter, v uint, jt sockFilter) {
+	lo := uint32(uint64(v) % 0x100000000)
+	hi := uint32(uint64(v) / 0x100000000)
+	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 4, 0))
+	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5))
+	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
+	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGE+syscall.BPF_K, (lo), 0, 2))
+	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
+	*f = append(*f, jt)
+	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
+}
+
+func jumpEqualTo(f *filter, v uint, jt sockFilter) {
+	lo := uint32(uint64(v) % 0x100000000)
+	hi := uint32(uint64(v) / 0x100000000)
+	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5))
+	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
+	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (lo), 0, 2))
+	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
+	*f = append(*f, jt)
+	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
+}
+
+func jumpLessThan(f *filter, v uint, jt sockFilter) {
+	lo := uint32(uint64(v) % 0x100000000)
+	hi := uint32(uint64(v) / 0x100000000)
+	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 6, 0))
+	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 3))
+	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
+	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (lo), 2, 0))
+	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
+	*f = append(*f, jt)
+	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
+}
+
+func jumpNotEqualTo(f *filter, v uint, jt sockFilter) {
+	lo := uint32(uint64(v) % 0x100000000)
+	hi := uint32(uint64(v) / 0x100000000)
+	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 5, 0))
+	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
+	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 2, 0))
+	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
+	*f = append(*f, jt)
+	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
+}
+
+// this checks for a value inside a mask. The evalusation is equal to doing
+// CLONE_NEWUSER & syscallMask == CLONE_NEWUSER
+func jumpMaskEqualTo(f *filter, v uint, jt sockFilter) {
+	lo := uint32(uint64(v) % 0x100000000)
+	hi := uint32(uint64(v) / 0x100000000)
+	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 0, 6))
+	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
+	*f = append(*f, scmpBpfStmt(syscall.BPF_ALU+syscall.BPF_AND, uint32(v)))
+	*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 0, 2))
+	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
+	*f = append(*f, jt)
+	*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
+}

+ 122 - 0
vendor/src/github.com/docker/libcontainer/seccomp/seccomp.go

@@ -0,0 +1,122 @@
+// Package seccomp provides native seccomp ( https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt ) support for go.
+package seccomp
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+// Operator that is used for argument comparison.
+type Operator int
+
+const (
+	EqualTo Operator = iota
+	NotEqualTo
+	GreatherThan
+	LessThan
+	MaskEqualTo
+)
+
+const (
+	jumpJT  = 0xff
+	jumpJF  = 0xff
+	labelJT = 0xfe
+	labelJF = 0xfe
+)
+
+const (
+	pfLD                 = 0x0
+	retKill              = 0x00000000
+	retTrap              = 0x00030000
+	retAllow             = 0x7fff0000
+	modeFilter           = 0x2
+	prSetNoNewPrivileges = 0x26
+)
+
+func actionErrno(errno uint32) uint32 {
+	return 0x00050000 | (errno & 0x0000ffff)
+}
+
+var (
+	secData = struct {
+		nr         int32
+		arch       uint32
+		insPointer uint64
+		args       [6]uint64
+	}{0, 0, 0, [6]uint64{0, 0, 0, 0, 0, 0}}
+)
+
+var isLittle = func() bool {
+	var (
+		x  = 0x1234
+		p  = unsafe.Pointer(&x)
+		p2 = (*[unsafe.Sizeof(0)]byte)(p)
+	)
+	if p2[0] == 0 {
+		return false
+	}
+	return true
+}()
+
+var endian endianSupport
+
+type endianSupport struct {
+}
+
+func (e endianSupport) hi(i uint32) uint32 {
+	if isLittle {
+		return e.little(i)
+	}
+	return e.big(i)
+}
+
+func (e endianSupport) low(i uint32) uint32 {
+	if isLittle {
+		return e.big(i)
+	}
+	return e.little(i)
+}
+
+func (endianSupport) big(idx uint32) uint32 {
+	if idx >= 6 {
+		return 0
+	}
+	return uint32(unsafe.Offsetof(secData.args)) + 8*idx
+}
+
+func (endianSupport) little(idx uint32) uint32 {
+	if idx < 0 || idx >= 6 {
+		return 0
+	}
+	return uint32(unsafe.Offsetof(secData.args)) +
+		uint32(unsafe.Alignof(secData.args[0]))*idx + uint32(unsafe.Sizeof(secData.arch))
+}
+
+func prctl(option int, arg2, arg3, arg4, arg5 uintptr) error {
+	_, _, err := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0)
+	if err != 0 {
+		return err
+	}
+	return nil
+}
+
+func newSockFprog(filter []sockFilter) *sockFprog {
+	return &sockFprog{
+		len:  uint16(len(filter)),
+		filt: filter,
+	}
+}
+
+type sockFprog struct {
+	len  uint16
+	filt []sockFilter
+}
+
+func (s *sockFprog) set() error {
+	_, _, err := syscall.Syscall(syscall.SYS_PRCTL, uintptr(syscall.PR_SET_SECCOMP),
+		uintptr(modeFilter), uintptr(unsafe.Pointer(s)))
+	if err != 0 {
+		return err
+	}
+	return nil
+}

+ 3 - 0
vendor/src/github.com/docker/libcontainer/standard_init_linux.go

@@ -99,5 +99,8 @@ func (l *linuxStandardInit) Init() error {
 	if syscall.Getppid() != l.parentPid {
 		return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
 	}
+	if err := finalizeSeccomp(l.config); err != nil {
+		return err
+	}
 	return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
 }

+ 5 - 0
vendor/src/github.com/docker/libcontainer/stats_freebsd.go

@@ -0,0 +1,5 @@
+package libcontainer
+
+type Stats struct {
+	Interfaces []*NetworkInterface
+}

+ 6 - 2
vendor/src/github.com/docker/libcontainer/system/setns_linux.go

@@ -21,16 +21,20 @@ var setNsMap = map[string]uintptr{
 	"linux/s390x":   339,
 }
 
+var sysSetns = setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
+
+func SysSetns() uint32 {
+	return uint32(sysSetns)
+}
+
 func Setns(fd uintptr, flags uintptr) error {
 	ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
 	if !exists {
 		return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
 	}
-
 	_, _, err := syscall.RawSyscall(ns, fd, flags, 0)
 	if err != 0 {
 		return err
 	}
-
 	return nil
 }

+ 1 - 1
vendor/src/github.com/docker/libcontainer/system/sysconfig.go

@@ -1,4 +1,4 @@
-// +build cgo,linux
+// +build cgo,linux cgo,freebsd
 
 package system
 

+ 57 - 0
vendor/src/github.com/docker/libcontainer/user/user.go

@@ -348,3 +348,60 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (
 
 	return user, nil
 }
+
+// GetAdditionalGroupsPath looks up a list of groups by name or group id
+// against the group file. If a group name cannot be found, an error will be
+// returned. If a group id cannot be found, it will be returned as-is.
+func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) {
+	groupReader, err := os.Open(groupPath)
+	if err != nil {
+		return nil, fmt.Errorf("Failed to open group file: %v", err)
+	}
+	defer groupReader.Close()
+
+	groups, err := ParseGroupFilter(groupReader, func(g Group) bool {
+		for _, ag := range additionalGroups {
+			if g.Name == ag || strconv.Itoa(g.Gid) == ag {
+				return true
+			}
+		}
+		return false
+	})
+	if err != nil {
+		return nil, fmt.Errorf("Unable to find additional groups %v: %v", additionalGroups, err)
+	}
+
+	gidMap := make(map[int]struct{})
+	for _, ag := range additionalGroups {
+		var found bool
+		for _, g := range groups {
+			// if we found a matched group either by name or gid, take the
+			// first matched as correct
+			if g.Name == ag || strconv.Itoa(g.Gid) == ag {
+				if _, ok := gidMap[g.Gid]; !ok {
+					gidMap[g.Gid] = struct{}{}
+					found = true
+					break
+				}
+			}
+		}
+		// we asked for a group but didn't find it. let's check to see
+		// if we wanted a numeric group
+		if !found {
+			gid, err := strconv.Atoi(ag)
+			if err != nil {
+				return nil, fmt.Errorf("Unable to find group %s", ag)
+			}
+			// Ensure gid is inside gid range.
+			if gid < minId || gid > maxId {
+				return nil, ErrRange
+			}
+			gidMap[gid] = struct{}{}
+		}
+	}
+	gids := []int{}
+	for gid := range gidMap {
+		gids = append(gids, gid)
+	}
+	return gids, nil
+}

+ 3 - 0
vendor/src/github.com/docker/libcontainer/utils/utils.go

@@ -21,6 +21,9 @@ func GenerateRandomName(prefix string, size int) (string, error) {
 	if _, err := io.ReadFull(rand.Reader, id); err != nil {
 		return "", err
 	}
+	if size > 64 {
+		size = 64
+	}
 	return prefix + hex.EncodeToString(id)[:size], nil
 }