diff --git a/api/swagger.yaml b/api/swagger.yaml index 8652c368c4..8b7b3c0a7f 100644 --- a/api/swagger.yaml +++ b/api/swagger.yaml @@ -210,6 +210,43 @@ definitions: PathInContainer: "/dev/deviceName" CgroupPermissions: "mrw" + DeviceRequest: + type: "object" + description: "A request for devices to be sent to device drivers" + properties: + Driver: + type: "string" + example: "nvidia" + Count: + type: "integer" + example: -1 + DeviceIDs: + type: "array" + items: + type: "string" + example: + - "0" + - "1" + - "GPU-fef8089b-4820-abfc-e83e-94318197576e" + Capabilities: + description: | + A list of capabilities; an OR list of AND lists of capabilities. + type: "array" + items: + type: "array" + items: + type: "string" + example: + # gpu AND nvidia AND compute + - ["gpu", "nvidia", "compute"] + Options: + description: | + Driver-specific options, specified as a key/value pairs. These options + are passed directly to the driver. + type: "object" + additionalProperties: + type: "string" + ThrottleDevice: type: "object" properties: @@ -421,6 +458,11 @@ definitions: items: type: "string" example: "c 13:* rwm" + DeviceRequests: + description: "a list of requests for devices to be sent to device drivers" + type: "array" + items: + $ref: "#/definitions/DeviceRequest" DiskQuota: description: "Disk limit (in bytes)." type: "integer" diff --git a/api/types/container/host_config.go b/api/types/container/host_config.go index f4f5c09f84..c710107702 100644 --- a/api/types/container/host_config.go +++ b/api/types/container/host_config.go @@ -244,6 +244,16 @@ func (n PidMode) Container() string { return "" } +// DeviceRequest represents a request for devices from a device driver. +// Used by GPU device drivers. +type DeviceRequest struct { + Driver string // Name of device driver + Count int // Number of devices to request (-1 = All) + DeviceIDs []string // List of device IDs as recognizable by the device driver + Capabilities [][]string // An OR list of AND lists of device capabilities (e.g. "gpu") + Options map[string]string // Options to pass onto the device driver +} + // DeviceMapping represents the device mapping between the host and the container. type DeviceMapping struct { PathOnHost string @@ -327,6 +337,7 @@ type Resources struct { CpusetMems string // CpusetMems 0-2, 0,1 Devices []DeviceMapping // List of devices to map inside the container DeviceCgroupRules []string // List of rule to be added to the device cgroup + DeviceRequests []DeviceRequest // List of device requests for device drivers DiskQuota int64 // Disk limit (in bytes) KernelMemory int64 // Kernel memory limit (in bytes) KernelMemoryTCP int64 // Hard limit for kernel TCP buffer memory (in bytes) diff --git a/daemon/devices_linux.go b/daemon/devices_linux.go new file mode 100644 index 0000000000..a7b76eacaf --- /dev/null +++ b/daemon/devices_linux.go @@ -0,0 +1,38 @@ +package daemon // import "github.com/docker/docker/daemon" + +import ( + "github.com/docker/docker/api/types/container" + "github.com/docker/docker/pkg/capabilities" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +var deviceDrivers = map[string]*deviceDriver{} + +type deviceDriver struct { + capset capabilities.Set + updateSpec func(*specs.Spec, *deviceInstance) error +} + +type deviceInstance struct { + req container.DeviceRequest + selectedCaps []string +} + +func registerDeviceDriver(name string, d *deviceDriver) { + deviceDrivers[name] = d +} + +func (daemon *Daemon) handleDevice(req container.DeviceRequest, spec *specs.Spec) error { + if req.Driver == "" { + for _, dd := range deviceDrivers { + if selected := dd.capset.Match(req.Capabilities); selected != nil { + return dd.updateSpec(spec, &deviceInstance{req: req, selectedCaps: selected}) + } + } + } else if dd := deviceDrivers[req.Driver]; dd != nil { + if selected := dd.capset.Match(req.Capabilities); selected != nil { + return dd.updateSpec(spec, &deviceInstance{req: req, selectedCaps: selected}) + } + } + return incompatibleDeviceRequest{req.Driver, req.Capabilities} +} diff --git a/daemon/errors.go b/daemon/errors.go index ed60ce7698..9c72b982fd 100644 --- a/daemon/errors.go +++ b/daemon/errors.go @@ -80,6 +80,17 @@ func (e invalidIdentifier) Error() string { func (invalidIdentifier) InvalidParameter() {} +type incompatibleDeviceRequest struct { + driver string + caps [][]string +} + +func (i incompatibleDeviceRequest) Error() string { + return fmt.Sprintf("could not select device driver %q with capabilities: %v", i.driver, i.caps) +} + +func (incompatibleDeviceRequest) InvalidParameter() {} + type duplicateMountPointError string func (e duplicateMountPointError) Error() string { diff --git a/daemon/nvidia_linux.go b/daemon/nvidia_linux.go new file mode 100644 index 0000000000..727d1c89fc --- /dev/null +++ b/daemon/nvidia_linux.go @@ -0,0 +1,107 @@ +package daemon + +import ( + "os/exec" + "strconv" + + "github.com/containerd/containerd/contrib/nvidia" + "github.com/docker/docker/pkg/capabilities" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" +) + +// TODO: nvidia should not be hard-coded, and should be a device plugin instead on the daemon object. +// TODO: add list of device capabilities in daemon/node info + +var errConflictCountDeviceIDs = errors.New("cannot set both Count and DeviceIDs on device request") + +// stolen from github.com/containerd/containerd/contrib/nvidia +const nvidiaCLI = "nvidia-container-cli" + +// These are NVIDIA-specific capabilities stolen from github.com/containerd/containerd/contrib/nvidia.allCaps +var allNvidiaCaps = map[nvidia.Capability]struct{}{ + nvidia.Compute: {}, + nvidia.Compat32: {}, + nvidia.Graphics: {}, + nvidia.Utility: {}, + nvidia.Video: {}, + nvidia.Display: {}, +} + +func init() { + if _, err := exec.LookPath(nvidiaCLI); err != nil { + // do not register Nvidia driver if helper binary is not present. + return + } + capset := capabilities.Set{"gpu": struct{}{}, "nvidia": struct{}{}} + nvidiaDriver := &deviceDriver{ + capset: capset, + updateSpec: setNvidiaGPUs, + } + for c := range capset { + nvidiaDriver.capset[c] = struct{}{} + } + registerDeviceDriver("nvidia", nvidiaDriver) +} + +func setNvidiaGPUs(s *specs.Spec, dev *deviceInstance) error { + var opts []nvidia.Opts + + req := dev.req + if req.Count != 0 && len(req.DeviceIDs) > 0 { + return errConflictCountDeviceIDs + } + + if len(req.DeviceIDs) > 0 { + var ids []int + var uuids []string + for _, devID := range req.DeviceIDs { + id, err := strconv.Atoi(devID) + if err == nil { + ids = append(ids, id) + continue + } + // if not an integer, then assume UUID. + uuids = append(uuids, devID) + } + if len(ids) > 0 { + opts = append(opts, nvidia.WithDevices(ids...)) + } + if len(uuids) > 0 { + opts = append(opts, nvidia.WithDeviceUUIDs(uuids...)) + } + } + + if req.Count < 0 { + opts = append(opts, nvidia.WithAllDevices) + } else if req.Count > 0 { + opts = append(opts, nvidia.WithDevices(countToDevices(req.Count)...)) + } + + var nvidiaCaps []nvidia.Capability + // req.Capabilities contains device capabilities, some but not all are NVIDIA driver capabilities. + for _, c := range dev.selectedCaps { + nvcap := nvidia.Capability(c) + if _, isNvidiaCap := allNvidiaCaps[nvcap]; isNvidiaCap { + nvidiaCaps = append(nvidiaCaps, nvcap) + continue + } + // TODO: nvidia.WithRequiredCUDAVersion + // for now we let the prestart hook verify cuda versions but errors are not pretty. + } + + if nvidiaCaps != nil { + opts = append(opts, nvidia.WithCapabilities(nvidiaCaps...)) + } + + return nvidia.WithGPUs(opts...)(nil, nil, nil, s) +} + +// countToDevices returns the list 0, 1, ... count-1 of deviceIDs. +func countToDevices(count int) []int { + devices := make([]int, count) + for i := range devices { + devices[i] = i + } + return devices +} diff --git a/daemon/oci_linux.go b/daemon/oci_linux.go index 12523a6a62..629ba0e3c2 100644 --- a/daemon/oci_linux.go +++ b/daemon/oci_linux.go @@ -85,7 +85,7 @@ func setResources(s *specs.Spec, r containertypes.Resources) error { return nil } -func setDevices(s *specs.Spec, c *container.Container) error { +func (daemon *Daemon) setDevices(s *specs.Spec, c *container.Container) error { // Build lists of devices allowed and created within the container. var devs []specs.LinuxDevice devPermissions := s.Linux.Resources.Devices @@ -122,6 +122,13 @@ func setDevices(s *specs.Spec, c *container.Container) error { s.Linux.Devices = append(s.Linux.Devices, devs...) s.Linux.Resources.Devices = devPermissions + + for _, req := range c.HostConfig.DeviceRequests { + if err := daemon.handleDevice(req, s); err != nil { + return err + } + } + return nil } @@ -751,7 +758,7 @@ func (daemon *Daemon) createSpec(c *container.Container) (retSpec *specs.Spec, e if err := daemon.initCgroupsPath(parentPath); err != nil { return nil, fmt.Errorf("linux init cgroups path: %v", err) } - if err := setDevices(&s, c); err != nil { + if err := daemon.setDevices(&s, c); err != nil { return nil, fmt.Errorf("linux runtime spec devices: %v", err) } if err := daemon.setRlimits(&s, c); err != nil { @@ -818,15 +825,16 @@ func (daemon *Daemon) createSpec(c *container.Container) (retSpec *specs.Spec, e return nil, fmt.Errorf("linux mounts: %v", err) } + if s.Hooks == nil { + s.Hooks = &specs.Hooks{} + } for _, ns := range s.Linux.Namespaces { if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled { target := filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe") - s.Hooks = &specs.Hooks{ - Prestart: []specs.Hook{{ - Path: target, - Args: []string{"libnetwork-setkey", "-exec-root=" + daemon.configStore.GetExecRoot(), c.ID, daemon.netController.ID()}, - }}, - } + s.Hooks.Prestart = append(s.Hooks.Prestart, specs.Hook{ + Path: target, + Args: []string{"libnetwork-setkey", "-exec-root=" + daemon.configStore.GetExecRoot(), c.ID, daemon.netController.ID()}, + }) } } diff --git a/docs/api/version-history.md b/docs/api/version-history.md index f6d6e54ad9..46d0cfc2dd 100644 --- a/docs/api/version-history.md +++ b/docs/api/version-history.md @@ -49,6 +49,8 @@ keywords: "API, Docker, rcli, REST, documentation" * `GET /info` now returns information about `DataPathPort` that is currently used in swarm * `GET /info` now returns `PidsLimit` boolean to indicate if the host kernel has PID limit support enabled. +* `POST /containers/create` now accepts `DeviceRequests` as part of `HostConfig`. + Can be used to set Nvidia GPUs. * `GET /swarm` endpoint now returns DataPathPort info * `POST /containers/create` now takes `KernelMemoryTCP` field to set hard limit for kernel TCP buffer memory. * `GET /service` now returns `MaxReplicas` as part of the `Placement`. diff --git a/pkg/capabilities/caps.go b/pkg/capabilities/caps.go new file mode 100644 index 0000000000..c0deb13d00 --- /dev/null +++ b/pkg/capabilities/caps.go @@ -0,0 +1,23 @@ +// Package capabilities allows to generically handle capabilities. +package capabilities + +// Set represents a set of capabilities. +type Set map[string]struct{} + +// Match tries to match set with caps, which is an OR list of AND lists of capabilities. +// The matched AND list of capabilities is returned; or nil if none are matched. +func (set Set) Match(caps [][]string) []string { + if set == nil { + return nil + } +anyof: + for _, andList := range caps { + for _, cap := range andList { + if _, ok := set[cap]; !ok { + continue anyof + } + } + return andList + } + return nil +} diff --git a/pkg/capabilities/caps_test.go b/pkg/capabilities/caps_test.go new file mode 100644 index 0000000000..ab80c57258 --- /dev/null +++ b/pkg/capabilities/caps_test.go @@ -0,0 +1,72 @@ +package capabilities + +import ( + "fmt" + "testing" +) + +func TestMatch(t *testing.T) { + set := Set{ + "foo": struct{}{}, + "bar": struct{}{}, + } + type testcase struct { + caps [][]string + expected []string + } + var testcases = []testcase{ + // matches + { + caps: [][]string{{}}, + expected: []string{}, + }, + { + caps: [][]string{{"foo"}}, + expected: []string{"foo"}, + }, + { + caps: [][]string{{"bar"}, {"foo"}}, + expected: []string{"bar"}, + }, + { + caps: [][]string{{"foo", "bar"}}, + expected: []string{"foo", "bar"}, + }, + { + caps: [][]string{{"qux"}, {"foo"}}, + expected: []string{"foo"}, + }, + { + caps: [][]string{{"foo", "bar"}, {"baz"}, {"bar"}}, + expected: []string{"foo", "bar"}, + }, + + // non matches + {caps: nil}, + {caps: [][]string{}}, + {caps: [][]string{{"qux"}}}, + {caps: [][]string{{"foo", "bar", "qux"}}}, + {caps: [][]string{{"qux"}, {"baz"}}}, + {caps: [][]string{{"foo", "baz"}}}, + } + + for _, m := range testcases { + t.Run(fmt.Sprintf("%v", m.caps), func(t *testing.T) { + selected := set.Match(m.caps) + if m.expected == nil || selected == nil { + if m.expected == nil && selected == nil { + return + } + t.Fatalf("selected = %v, expected = %v", selected, m.expected) + } + if len(selected) != len(m.expected) { + t.Fatalf("len(selected) = %d, len(expected) = %d", len(selected), len(m.expected)) + } + for i, s := range selected { + if m.expected[i] != s { + t.Fatalf("selected[%d] = %s, expected[%d] = %s", i, s, i, m.expected[i]) + } + } + }) + } +} diff --git a/vendor/github.com/containerd/containerd/contrib/nvidia/nvidia.go b/vendor/github.com/containerd/containerd/contrib/nvidia/nvidia.go new file mode 100644 index 0000000000..5a1d3c7f1c --- /dev/null +++ b/vendor/github.com/containerd/containerd/contrib/nvidia/nvidia.go @@ -0,0 +1,207 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package nvidia + +import ( + "context" + "fmt" + "os" + "os/exec" + "strconv" + "strings" + + "github.com/containerd/containerd/containers" + "github.com/containerd/containerd/oci" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +const nvidiaCLI = "nvidia-container-cli" + +// Capability specifies capabilities for the gpu inside the container +// Detailed explanation of options can be found: +// https://github.com/nvidia/nvidia-container-runtime#supported-driver-capabilities +type Capability string + +const ( + // Compute capability + Compute Capability = "compute" + // Compat32 capability + Compat32 Capability = "compat32" + // Graphics capability + Graphics Capability = "graphics" + // Utility capability + Utility Capability = "utility" + // Video capability + Video Capability = "video" + // Display capability + Display Capability = "display" +) + +var allCaps = []Capability{ + Compute, + Compat32, + Graphics, + Utility, + Video, + Display, +} + +// WithGPUs adds NVIDIA gpu support to a container +func WithGPUs(opts ...Opts) oci.SpecOpts { + return func(_ context.Context, _ oci.Client, _ *containers.Container, s *specs.Spec) error { + c := &config{} + for _, o := range opts { + if err := o(c); err != nil { + return err + } + } + if c.OCIHookPath == "" { + path, err := exec.LookPath("containerd") + if err != nil { + return err + } + c.OCIHookPath = path + } + nvidiaPath, err := exec.LookPath(nvidiaCLI) + if err != nil { + return err + } + if s.Hooks == nil { + s.Hooks = &specs.Hooks{} + } + s.Hooks.Prestart = append(s.Hooks.Prestart, specs.Hook{ + Path: c.OCIHookPath, + Args: append([]string{ + "containerd", + "oci-hook", + "--", + nvidiaPath, + // ensures the required kernel modules are properly loaded + "--load-kmods", + }, c.args()...), + Env: os.Environ(), + }) + return nil + } +} + +type config struct { + Devices []string + Capabilities []Capability + LoadKmods bool + LDCache string + LDConfig string + Requirements []string + OCIHookPath string +} + +func (c *config) args() []string { + var args []string + + if c.LoadKmods { + args = append(args, "--load-kmods") + } + if c.LDCache != "" { + args = append(args, fmt.Sprintf("--ldcache=%s", c.LDCache)) + } + args = append(args, + "configure", + ) + if len(c.Devices) > 0 { + args = append(args, fmt.Sprintf("--device=%s", strings.Join(c.Devices, ","))) + } + for _, c := range c.Capabilities { + args = append(args, fmt.Sprintf("--%s", c)) + } + if c.LDConfig != "" { + args = append(args, fmt.Sprintf("--ldconfig=%s", c.LDConfig)) + } + for _, r := range c.Requirements { + args = append(args, fmt.Sprintf("--require=%s", r)) + } + args = append(args, "--pid={{pid}}", "{{rootfs}}") + return args +} + +// Opts are options for configuring gpu support +type Opts func(*config) error + +// WithDevices adds the provided device indexes to the container +func WithDevices(ids ...int) Opts { + return func(c *config) error { + for _, i := range ids { + c.Devices = append(c.Devices, strconv.Itoa(i)) + } + return nil + } +} + +// WithDeviceUUIDs adds the specific device UUID to the container +func WithDeviceUUIDs(uuids ...string) Opts { + return func(c *config) error { + c.Devices = append(c.Devices, uuids...) + return nil + } +} + +// WithAllDevices adds all gpus to the container +func WithAllDevices(c *config) error { + c.Devices = []string{"all"} + return nil +} + +// WithAllCapabilities adds all capabilities to the container for the gpus +func WithAllCapabilities(c *config) error { + c.Capabilities = allCaps + return nil +} + +// WithCapabilities adds the specified capabilities to the container for the gpus +func WithCapabilities(caps ...Capability) Opts { + return func(c *config) error { + c.Capabilities = append(c.Capabilities, caps...) + return nil + } +} + +// WithRequiredCUDAVersion sets the required cuda version +func WithRequiredCUDAVersion(major, minor int) Opts { + return func(c *config) error { + c.Requirements = append(c.Requirements, fmt.Sprintf("cuda>=%d.%d", major, minor)) + return nil + } +} + +// WithOCIHookPath sets the hook path for the binary +func WithOCIHookPath(path string) Opts { + return func(c *config) error { + c.OCIHookPath = path + return nil + } +} + +// WithLookupOCIHookPath sets the hook path for the binary via a binary name +func WithLookupOCIHookPath(name string) Opts { + return func(c *config) error { + path, err := exec.LookPath(name) + if err != nil { + return err + } + c.OCIHookPath = path + return nil + } +}