Browse Source

Merge pull request #38828 from tiborvass/nvidia-gpu

Add DeviceRequests to HostConfig to support NVIDIA GPUs
Tibor Vass 6 năm trước cách đây
mục cha
commit
07bb45e23a

+ 42 - 0
api/swagger.yaml

@@ -210,6 +210,43 @@ definitions:
       PathInContainer: "/dev/deviceName"
       PathInContainer: "/dev/deviceName"
       CgroupPermissions: "mrw"
       CgroupPermissions: "mrw"
 
 
+  DeviceRequest:
+    type: "object"
+    description: "A request for devices to be sent to device drivers"
+    properties:
+      Driver:
+        type: "string"
+        example: "nvidia"
+      Count:
+        type: "integer"
+        example: -1
+      DeviceIDs:
+        type: "array"
+        items:
+          type: "string"
+        example:
+          - "0"
+          - "1"
+          - "GPU-fef8089b-4820-abfc-e83e-94318197576e"
+      Capabilities:
+        description: |
+          A list of capabilities; an OR list of AND lists of capabilities.
+        type: "array"
+        items:
+          type: "array"
+          items:
+            type: "string"
+        example:
+          # gpu AND nvidia AND compute
+          - ["gpu", "nvidia", "compute"]
+      Options:
+        description: |
+          Driver-specific options, specified as a key/value pairs. These options
+          are passed directly to the driver.
+        type: "object"
+        additionalProperties:
+          type: "string"
+
   ThrottleDevice:
   ThrottleDevice:
     type: "object"
     type: "object"
     properties:
     properties:
@@ -421,6 +458,11 @@ definitions:
         items:
         items:
           type: "string"
           type: "string"
           example: "c 13:* rwm"
           example: "c 13:* rwm"
+      DeviceRequests:
+        description: "a list of requests for devices to be sent to device drivers"
+        type: "array"
+        items:
+          $ref: "#/definitions/DeviceRequest"
       DiskQuota:
       DiskQuota:
         description: "Disk limit (in bytes)."
         description: "Disk limit (in bytes)."
         type: "integer"
         type: "integer"

+ 11 - 0
api/types/container/host_config.go

@@ -244,6 +244,16 @@ func (n PidMode) Container() string {
 	return ""
 	return ""
 }
 }
 
 
+// DeviceRequest represents a request for devices from a device driver.
+// Used by GPU device drivers.
+type DeviceRequest struct {
+	Driver       string            // Name of device driver
+	Count        int               // Number of devices to request (-1 = All)
+	DeviceIDs    []string          // List of device IDs as recognizable by the device driver
+	Capabilities [][]string        // An OR list of AND lists of device capabilities (e.g. "gpu")
+	Options      map[string]string // Options to pass onto the device driver
+}
+
 // DeviceMapping represents the device mapping between the host and the container.
 // DeviceMapping represents the device mapping between the host and the container.
 type DeviceMapping struct {
 type DeviceMapping struct {
 	PathOnHost        string
 	PathOnHost        string
@@ -327,6 +337,7 @@ type Resources struct {
 	CpusetMems           string          // CpusetMems 0-2, 0,1
 	CpusetMems           string          // CpusetMems 0-2, 0,1
 	Devices              []DeviceMapping // List of devices to map inside the container
 	Devices              []DeviceMapping // List of devices to map inside the container
 	DeviceCgroupRules    []string        // List of rule to be added to the device cgroup
 	DeviceCgroupRules    []string        // List of rule to be added to the device cgroup
+	DeviceRequests       []DeviceRequest // List of device requests for device drivers
 	DiskQuota            int64           // Disk limit (in bytes)
 	DiskQuota            int64           // Disk limit (in bytes)
 	KernelMemory         int64           // Kernel memory limit (in bytes)
 	KernelMemory         int64           // Kernel memory limit (in bytes)
 	KernelMemoryTCP      int64           // Hard limit for kernel TCP buffer memory (in bytes)
 	KernelMemoryTCP      int64           // Hard limit for kernel TCP buffer memory (in bytes)

+ 38 - 0
daemon/devices_linux.go

@@ -0,0 +1,38 @@
+package daemon // import "github.com/docker/docker/daemon"
+
+import (
+	"github.com/docker/docker/api/types/container"
+	"github.com/docker/docker/pkg/capabilities"
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+var deviceDrivers = map[string]*deviceDriver{}
+
+type deviceDriver struct {
+	capset     capabilities.Set
+	updateSpec func(*specs.Spec, *deviceInstance) error
+}
+
+type deviceInstance struct {
+	req          container.DeviceRequest
+	selectedCaps []string
+}
+
+func registerDeviceDriver(name string, d *deviceDriver) {
+	deviceDrivers[name] = d
+}
+
+func (daemon *Daemon) handleDevice(req container.DeviceRequest, spec *specs.Spec) error {
+	if req.Driver == "" {
+		for _, dd := range deviceDrivers {
+			if selected := dd.capset.Match(req.Capabilities); selected != nil {
+				return dd.updateSpec(spec, &deviceInstance{req: req, selectedCaps: selected})
+			}
+		}
+	} else if dd := deviceDrivers[req.Driver]; dd != nil {
+		if selected := dd.capset.Match(req.Capabilities); selected != nil {
+			return dd.updateSpec(spec, &deviceInstance{req: req, selectedCaps: selected})
+		}
+	}
+	return incompatibleDeviceRequest{req.Driver, req.Capabilities}
+}

+ 11 - 0
daemon/errors.go

@@ -80,6 +80,17 @@ func (e invalidIdentifier) Error() string {
 
 
 func (invalidIdentifier) InvalidParameter() {}
 func (invalidIdentifier) InvalidParameter() {}
 
 
+type incompatibleDeviceRequest struct {
+	driver string
+	caps   [][]string
+}
+
+func (i incompatibleDeviceRequest) Error() string {
+	return fmt.Sprintf("could not select device driver %q with capabilities: %v", i.driver, i.caps)
+}
+
+func (incompatibleDeviceRequest) InvalidParameter() {}
+
 type duplicateMountPointError string
 type duplicateMountPointError string
 
 
 func (e duplicateMountPointError) Error() string {
 func (e duplicateMountPointError) Error() string {

+ 107 - 0
daemon/nvidia_linux.go

@@ -0,0 +1,107 @@
+package daemon
+
+import (
+	"os/exec"
+	"strconv"
+
+	"github.com/containerd/containerd/contrib/nvidia"
+	"github.com/docker/docker/pkg/capabilities"
+	"github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/pkg/errors"
+)
+
+// TODO: nvidia should not be hard-coded, and should be a device plugin instead on the daemon object.
+// TODO: add list of device capabilities in daemon/node info
+
+var errConflictCountDeviceIDs = errors.New("cannot set both Count and DeviceIDs on device request")
+
+// stolen from github.com/containerd/containerd/contrib/nvidia
+const nvidiaCLI = "nvidia-container-cli"
+
+// These are NVIDIA-specific capabilities stolen from github.com/containerd/containerd/contrib/nvidia.allCaps
+var allNvidiaCaps = map[nvidia.Capability]struct{}{
+	nvidia.Compute:  {},
+	nvidia.Compat32: {},
+	nvidia.Graphics: {},
+	nvidia.Utility:  {},
+	nvidia.Video:    {},
+	nvidia.Display:  {},
+}
+
+func init() {
+	if _, err := exec.LookPath(nvidiaCLI); err != nil {
+		// do not register Nvidia driver if helper binary is not present.
+		return
+	}
+	capset := capabilities.Set{"gpu": struct{}{}, "nvidia": struct{}{}}
+	nvidiaDriver := &deviceDriver{
+		capset:     capset,
+		updateSpec: setNvidiaGPUs,
+	}
+	for c := range capset {
+		nvidiaDriver.capset[c] = struct{}{}
+	}
+	registerDeviceDriver("nvidia", nvidiaDriver)
+}
+
+func setNvidiaGPUs(s *specs.Spec, dev *deviceInstance) error {
+	var opts []nvidia.Opts
+
+	req := dev.req
+	if req.Count != 0 && len(req.DeviceIDs) > 0 {
+		return errConflictCountDeviceIDs
+	}
+
+	if len(req.DeviceIDs) > 0 {
+		var ids []int
+		var uuids []string
+		for _, devID := range req.DeviceIDs {
+			id, err := strconv.Atoi(devID)
+			if err == nil {
+				ids = append(ids, id)
+				continue
+			}
+			// if not an integer, then assume UUID.
+			uuids = append(uuids, devID)
+		}
+		if len(ids) > 0 {
+			opts = append(opts, nvidia.WithDevices(ids...))
+		}
+		if len(uuids) > 0 {
+			opts = append(opts, nvidia.WithDeviceUUIDs(uuids...))
+		}
+	}
+
+	if req.Count < 0 {
+		opts = append(opts, nvidia.WithAllDevices)
+	} else if req.Count > 0 {
+		opts = append(opts, nvidia.WithDevices(countToDevices(req.Count)...))
+	}
+
+	var nvidiaCaps []nvidia.Capability
+	// req.Capabilities contains device capabilities, some but not all are NVIDIA driver capabilities.
+	for _, c := range dev.selectedCaps {
+		nvcap := nvidia.Capability(c)
+		if _, isNvidiaCap := allNvidiaCaps[nvcap]; isNvidiaCap {
+			nvidiaCaps = append(nvidiaCaps, nvcap)
+			continue
+		}
+		// TODO: nvidia.WithRequiredCUDAVersion
+		// for now we let the prestart hook verify cuda versions but errors are not pretty.
+	}
+
+	if nvidiaCaps != nil {
+		opts = append(opts, nvidia.WithCapabilities(nvidiaCaps...))
+	}
+
+	return nvidia.WithGPUs(opts...)(nil, nil, nil, s)
+}
+
+// countToDevices returns the list 0, 1, ... count-1 of deviceIDs.
+func countToDevices(count int) []int {
+	devices := make([]int, count)
+	for i := range devices {
+		devices[i] = i
+	}
+	return devices
+}

+ 16 - 8
daemon/oci_linux.go

@@ -85,7 +85,7 @@ func setResources(s *specs.Spec, r containertypes.Resources) error {
 	return nil
 	return nil
 }
 }
 
 
-func setDevices(s *specs.Spec, c *container.Container) error {
+func (daemon *Daemon) setDevices(s *specs.Spec, c *container.Container) error {
 	// Build lists of devices allowed and created within the container.
 	// Build lists of devices allowed and created within the container.
 	var devs []specs.LinuxDevice
 	var devs []specs.LinuxDevice
 	devPermissions := s.Linux.Resources.Devices
 	devPermissions := s.Linux.Resources.Devices
@@ -122,6 +122,13 @@ func setDevices(s *specs.Spec, c *container.Container) error {
 
 
 	s.Linux.Devices = append(s.Linux.Devices, devs...)
 	s.Linux.Devices = append(s.Linux.Devices, devs...)
 	s.Linux.Resources.Devices = devPermissions
 	s.Linux.Resources.Devices = devPermissions
+
+	for _, req := range c.HostConfig.DeviceRequests {
+		if err := daemon.handleDevice(req, s); err != nil {
+			return err
+		}
+	}
+
 	return nil
 	return nil
 }
 }
 
 
@@ -751,7 +758,7 @@ func (daemon *Daemon) createSpec(c *container.Container) (retSpec *specs.Spec, e
 	if err := daemon.initCgroupsPath(parentPath); err != nil {
 	if err := daemon.initCgroupsPath(parentPath); err != nil {
 		return nil, fmt.Errorf("linux init cgroups path: %v", err)
 		return nil, fmt.Errorf("linux init cgroups path: %v", err)
 	}
 	}
-	if err := setDevices(&s, c); err != nil {
+	if err := daemon.setDevices(&s, c); err != nil {
 		return nil, fmt.Errorf("linux runtime spec devices: %v", err)
 		return nil, fmt.Errorf("linux runtime spec devices: %v", err)
 	}
 	}
 	if err := daemon.setRlimits(&s, c); err != nil {
 	if err := daemon.setRlimits(&s, c); err != nil {
@@ -818,15 +825,16 @@ func (daemon *Daemon) createSpec(c *container.Container) (retSpec *specs.Spec, e
 		return nil, fmt.Errorf("linux mounts: %v", err)
 		return nil, fmt.Errorf("linux mounts: %v", err)
 	}
 	}
 
 
+	if s.Hooks == nil {
+		s.Hooks = &specs.Hooks{}
+	}
 	for _, ns := range s.Linux.Namespaces {
 	for _, ns := range s.Linux.Namespaces {
 		if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled {
 		if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled {
 			target := filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe")
 			target := filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe")
-			s.Hooks = &specs.Hooks{
-				Prestart: []specs.Hook{{
-					Path: target,
-					Args: []string{"libnetwork-setkey", "-exec-root=" + daemon.configStore.GetExecRoot(), c.ID, daemon.netController.ID()},
-				}},
-			}
+			s.Hooks.Prestart = append(s.Hooks.Prestart, specs.Hook{
+				Path: target,
+				Args: []string{"libnetwork-setkey", "-exec-root=" + daemon.configStore.GetExecRoot(), c.ID, daemon.netController.ID()},
+			})
 		}
 		}
 	}
 	}
 
 

+ 2 - 0
docs/api/version-history.md

@@ -49,6 +49,8 @@ keywords: "API, Docker, rcli, REST, documentation"
 * `GET /info` now returns information about `DataPathPort` that is currently used in swarm
 * `GET /info` now returns information about `DataPathPort` that is currently used in swarm
 * `GET /info` now returns `PidsLimit` boolean to indicate if the host kernel has
 * `GET /info` now returns `PidsLimit` boolean to indicate if the host kernel has
   PID limit support enabled.
   PID limit support enabled.
+* `POST /containers/create` now accepts `DeviceRequests` as part of `HostConfig`.
+  Can be used to set Nvidia GPUs.
 * `GET /swarm` endpoint now returns DataPathPort info
 * `GET /swarm` endpoint now returns DataPathPort info
 * `POST /containers/create` now takes `KernelMemoryTCP` field to set hard limit for kernel TCP buffer memory.
 * `POST /containers/create` now takes `KernelMemoryTCP` field to set hard limit for kernel TCP buffer memory.
 * `GET /service` now  returns `MaxReplicas` as part of the `Placement`.
 * `GET /service` now  returns `MaxReplicas` as part of the `Placement`.

+ 23 - 0
pkg/capabilities/caps.go

@@ -0,0 +1,23 @@
+// Package capabilities allows to generically handle capabilities.
+package capabilities
+
+// Set represents a set of capabilities.
+type Set map[string]struct{}
+
+// Match tries to match set with caps, which is an OR list of AND lists of capabilities.
+// The matched AND list of capabilities is returned; or nil if none are matched.
+func (set Set) Match(caps [][]string) []string {
+	if set == nil {
+		return nil
+	}
+anyof:
+	for _, andList := range caps {
+		for _, cap := range andList {
+			if _, ok := set[cap]; !ok {
+				continue anyof
+			}
+		}
+		return andList
+	}
+	return nil
+}

+ 72 - 0
pkg/capabilities/caps_test.go

@@ -0,0 +1,72 @@
+package capabilities
+
+import (
+	"fmt"
+	"testing"
+)
+
+func TestMatch(t *testing.T) {
+	set := Set{
+		"foo": struct{}{},
+		"bar": struct{}{},
+	}
+	type testcase struct {
+		caps     [][]string
+		expected []string
+	}
+	var testcases = []testcase{
+		// matches
+		{
+			caps:     [][]string{{}},
+			expected: []string{},
+		},
+		{
+			caps:     [][]string{{"foo"}},
+			expected: []string{"foo"},
+		},
+		{
+			caps:     [][]string{{"bar"}, {"foo"}},
+			expected: []string{"bar"},
+		},
+		{
+			caps:     [][]string{{"foo", "bar"}},
+			expected: []string{"foo", "bar"},
+		},
+		{
+			caps:     [][]string{{"qux"}, {"foo"}},
+			expected: []string{"foo"},
+		},
+		{
+			caps:     [][]string{{"foo", "bar"}, {"baz"}, {"bar"}},
+			expected: []string{"foo", "bar"},
+		},
+
+		// non matches
+		{caps: nil},
+		{caps: [][]string{}},
+		{caps: [][]string{{"qux"}}},
+		{caps: [][]string{{"foo", "bar", "qux"}}},
+		{caps: [][]string{{"qux"}, {"baz"}}},
+		{caps: [][]string{{"foo", "baz"}}},
+	}
+
+	for _, m := range testcases {
+		t.Run(fmt.Sprintf("%v", m.caps), func(t *testing.T) {
+			selected := set.Match(m.caps)
+			if m.expected == nil || selected == nil {
+				if m.expected == nil && selected == nil {
+					return
+				}
+				t.Fatalf("selected = %v, expected = %v", selected, m.expected)
+			}
+			if len(selected) != len(m.expected) {
+				t.Fatalf("len(selected) = %d, len(expected) = %d", len(selected), len(m.expected))
+			}
+			for i, s := range selected {
+				if m.expected[i] != s {
+					t.Fatalf("selected[%d] = %s, expected[%d] = %s", i, s, i, m.expected[i])
+				}
+			}
+		})
+	}
+}

+ 207 - 0
vendor/github.com/containerd/containerd/contrib/nvidia/nvidia.go

@@ -0,0 +1,207 @@
+/*
+   Copyright The containerd Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+package nvidia
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"os/exec"
+	"strconv"
+	"strings"
+
+	"github.com/containerd/containerd/containers"
+	"github.com/containerd/containerd/oci"
+	specs "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+const nvidiaCLI = "nvidia-container-cli"
+
+// Capability specifies capabilities for the gpu inside the container
+// Detailed explanation of options can be found:
+// https://github.com/nvidia/nvidia-container-runtime#supported-driver-capabilities
+type Capability string
+
+const (
+	// Compute capability
+	Compute Capability = "compute"
+	// Compat32 capability
+	Compat32 Capability = "compat32"
+	// Graphics capability
+	Graphics Capability = "graphics"
+	// Utility capability
+	Utility Capability = "utility"
+	// Video capability
+	Video Capability = "video"
+	// Display capability
+	Display Capability = "display"
+)
+
+var allCaps = []Capability{
+	Compute,
+	Compat32,
+	Graphics,
+	Utility,
+	Video,
+	Display,
+}
+
+// WithGPUs adds NVIDIA gpu support to a container
+func WithGPUs(opts ...Opts) oci.SpecOpts {
+	return func(_ context.Context, _ oci.Client, _ *containers.Container, s *specs.Spec) error {
+		c := &config{}
+		for _, o := range opts {
+			if err := o(c); err != nil {
+				return err
+			}
+		}
+		if c.OCIHookPath == "" {
+			path, err := exec.LookPath("containerd")
+			if err != nil {
+				return err
+			}
+			c.OCIHookPath = path
+		}
+		nvidiaPath, err := exec.LookPath(nvidiaCLI)
+		if err != nil {
+			return err
+		}
+		if s.Hooks == nil {
+			s.Hooks = &specs.Hooks{}
+		}
+		s.Hooks.Prestart = append(s.Hooks.Prestart, specs.Hook{
+			Path: c.OCIHookPath,
+			Args: append([]string{
+				"containerd",
+				"oci-hook",
+				"--",
+				nvidiaPath,
+				// ensures the required kernel modules are properly loaded
+				"--load-kmods",
+			}, c.args()...),
+			Env: os.Environ(),
+		})
+		return nil
+	}
+}
+
+type config struct {
+	Devices      []string
+	Capabilities []Capability
+	LoadKmods    bool
+	LDCache      string
+	LDConfig     string
+	Requirements []string
+	OCIHookPath  string
+}
+
+func (c *config) args() []string {
+	var args []string
+
+	if c.LoadKmods {
+		args = append(args, "--load-kmods")
+	}
+	if c.LDCache != "" {
+		args = append(args, fmt.Sprintf("--ldcache=%s", c.LDCache))
+	}
+	args = append(args,
+		"configure",
+	)
+	if len(c.Devices) > 0 {
+		args = append(args, fmt.Sprintf("--device=%s", strings.Join(c.Devices, ",")))
+	}
+	for _, c := range c.Capabilities {
+		args = append(args, fmt.Sprintf("--%s", c))
+	}
+	if c.LDConfig != "" {
+		args = append(args, fmt.Sprintf("--ldconfig=%s", c.LDConfig))
+	}
+	for _, r := range c.Requirements {
+		args = append(args, fmt.Sprintf("--require=%s", r))
+	}
+	args = append(args, "--pid={{pid}}", "{{rootfs}}")
+	return args
+}
+
+// Opts are options for configuring gpu support
+type Opts func(*config) error
+
+// WithDevices adds the provided device indexes to the container
+func WithDevices(ids ...int) Opts {
+	return func(c *config) error {
+		for _, i := range ids {
+			c.Devices = append(c.Devices, strconv.Itoa(i))
+		}
+		return nil
+	}
+}
+
+// WithDeviceUUIDs adds the specific device UUID to the container
+func WithDeviceUUIDs(uuids ...string) Opts {
+	return func(c *config) error {
+		c.Devices = append(c.Devices, uuids...)
+		return nil
+	}
+}
+
+// WithAllDevices adds all gpus to the container
+func WithAllDevices(c *config) error {
+	c.Devices = []string{"all"}
+	return nil
+}
+
+// WithAllCapabilities adds all capabilities to the container for the gpus
+func WithAllCapabilities(c *config) error {
+	c.Capabilities = allCaps
+	return nil
+}
+
+// WithCapabilities adds the specified capabilities to the container for the gpus
+func WithCapabilities(caps ...Capability) Opts {
+	return func(c *config) error {
+		c.Capabilities = append(c.Capabilities, caps...)
+		return nil
+	}
+}
+
+// WithRequiredCUDAVersion sets the required cuda version
+func WithRequiredCUDAVersion(major, minor int) Opts {
+	return func(c *config) error {
+		c.Requirements = append(c.Requirements, fmt.Sprintf("cuda>=%d.%d", major, minor))
+		return nil
+	}
+}
+
+// WithOCIHookPath sets the hook path for the binary
+func WithOCIHookPath(path string) Opts {
+	return func(c *config) error {
+		c.OCIHookPath = path
+		return nil
+	}
+}
+
+// WithLookupOCIHookPath sets the hook path for the binary via a binary name
+func WithLookupOCIHookPath(name string) Opts {
+	return func(c *config) error {
+		path, err := exec.LookPath(name)
+		if err != nil {
+			return err
+		}
+		c.OCIHookPath = path
+		return nil
+	}
+}