|
@@ -0,0 +1,207 @@
|
|
|
+/*
|
|
|
+ Copyright The containerd Authors.
|
|
|
+
|
|
|
+ Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
+ you may not use this file except in compliance with the License.
|
|
|
+ You may obtain a copy of the License at
|
|
|
+
|
|
|
+ http://www.apache.org/licenses/LICENSE-2.0
|
|
|
+
|
|
|
+ Unless required by applicable law or agreed to in writing, software
|
|
|
+ distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
+ See the License for the specific language governing permissions and
|
|
|
+ limitations under the License.
|
|
|
+*/
|
|
|
+
|
|
|
+package nvidia
|
|
|
+
|
|
|
+import (
|
|
|
+ "context"
|
|
|
+ "fmt"
|
|
|
+ "os"
|
|
|
+ "os/exec"
|
|
|
+ "strconv"
|
|
|
+ "strings"
|
|
|
+
|
|
|
+ "github.com/containerd/containerd/containers"
|
|
|
+ "github.com/containerd/containerd/oci"
|
|
|
+ specs "github.com/opencontainers/runtime-spec/specs-go"
|
|
|
+)
|
|
|
+
|
|
|
+const nvidiaCLI = "nvidia-container-cli"
|
|
|
+
|
|
|
+// Capability specifies capabilities for the gpu inside the container
|
|
|
+// Detailed explanation of options can be found:
|
|
|
+// https://github.com/nvidia/nvidia-container-runtime#supported-driver-capabilities
|
|
|
+type Capability string
|
|
|
+
|
|
|
+const (
|
|
|
+ // Compute capability
|
|
|
+ Compute Capability = "compute"
|
|
|
+ // Compat32 capability
|
|
|
+ Compat32 Capability = "compat32"
|
|
|
+ // Graphics capability
|
|
|
+ Graphics Capability = "graphics"
|
|
|
+ // Utility capability
|
|
|
+ Utility Capability = "utility"
|
|
|
+ // Video capability
|
|
|
+ Video Capability = "video"
|
|
|
+ // Display capability
|
|
|
+ Display Capability = "display"
|
|
|
+)
|
|
|
+
|
|
|
+var allCaps = []Capability{
|
|
|
+ Compute,
|
|
|
+ Compat32,
|
|
|
+ Graphics,
|
|
|
+ Utility,
|
|
|
+ Video,
|
|
|
+ Display,
|
|
|
+}
|
|
|
+
|
|
|
+// WithGPUs adds NVIDIA gpu support to a container
|
|
|
+func WithGPUs(opts ...Opts) oci.SpecOpts {
|
|
|
+ return func(_ context.Context, _ oci.Client, _ *containers.Container, s *specs.Spec) error {
|
|
|
+ c := &config{}
|
|
|
+ for _, o := range opts {
|
|
|
+ if err := o(c); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if c.OCIHookPath == "" {
|
|
|
+ path, err := exec.LookPath("containerd")
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ c.OCIHookPath = path
|
|
|
+ }
|
|
|
+ nvidiaPath, err := exec.LookPath(nvidiaCLI)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ if s.Hooks == nil {
|
|
|
+ s.Hooks = &specs.Hooks{}
|
|
|
+ }
|
|
|
+ s.Hooks.Prestart = append(s.Hooks.Prestart, specs.Hook{
|
|
|
+ Path: c.OCIHookPath,
|
|
|
+ Args: append([]string{
|
|
|
+ "containerd",
|
|
|
+ "oci-hook",
|
|
|
+ "--",
|
|
|
+ nvidiaPath,
|
|
|
+ // ensures the required kernel modules are properly loaded
|
|
|
+ "--load-kmods",
|
|
|
+ }, c.args()...),
|
|
|
+ Env: os.Environ(),
|
|
|
+ })
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+type config struct {
|
|
|
+ Devices []string
|
|
|
+ Capabilities []Capability
|
|
|
+ LoadKmods bool
|
|
|
+ LDCache string
|
|
|
+ LDConfig string
|
|
|
+ Requirements []string
|
|
|
+ OCIHookPath string
|
|
|
+}
|
|
|
+
|
|
|
+func (c *config) args() []string {
|
|
|
+ var args []string
|
|
|
+
|
|
|
+ if c.LoadKmods {
|
|
|
+ args = append(args, "--load-kmods")
|
|
|
+ }
|
|
|
+ if c.LDCache != "" {
|
|
|
+ args = append(args, fmt.Sprintf("--ldcache=%s", c.LDCache))
|
|
|
+ }
|
|
|
+ args = append(args,
|
|
|
+ "configure",
|
|
|
+ )
|
|
|
+ if len(c.Devices) > 0 {
|
|
|
+ args = append(args, fmt.Sprintf("--device=%s", strings.Join(c.Devices, ",")))
|
|
|
+ }
|
|
|
+ for _, c := range c.Capabilities {
|
|
|
+ args = append(args, fmt.Sprintf("--%s", c))
|
|
|
+ }
|
|
|
+ if c.LDConfig != "" {
|
|
|
+ args = append(args, fmt.Sprintf("--ldconfig=%s", c.LDConfig))
|
|
|
+ }
|
|
|
+ for _, r := range c.Requirements {
|
|
|
+ args = append(args, fmt.Sprintf("--require=%s", r))
|
|
|
+ }
|
|
|
+ args = append(args, "--pid={{pid}}", "{{rootfs}}")
|
|
|
+ return args
|
|
|
+}
|
|
|
+
|
|
|
+// Opts are options for configuring gpu support
|
|
|
+type Opts func(*config) error
|
|
|
+
|
|
|
+// WithDevices adds the provided device indexes to the container
|
|
|
+func WithDevices(ids ...int) Opts {
|
|
|
+ return func(c *config) error {
|
|
|
+ for _, i := range ids {
|
|
|
+ c.Devices = append(c.Devices, strconv.Itoa(i))
|
|
|
+ }
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// WithDeviceUUIDs adds the specific device UUID to the container
|
|
|
+func WithDeviceUUIDs(uuids ...string) Opts {
|
|
|
+ return func(c *config) error {
|
|
|
+ c.Devices = append(c.Devices, uuids...)
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// WithAllDevices adds all gpus to the container
|
|
|
+func WithAllDevices(c *config) error {
|
|
|
+ c.Devices = []string{"all"}
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+// WithAllCapabilities adds all capabilities to the container for the gpus
|
|
|
+func WithAllCapabilities(c *config) error {
|
|
|
+ c.Capabilities = allCaps
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+// WithCapabilities adds the specified capabilities to the container for the gpus
|
|
|
+func WithCapabilities(caps ...Capability) Opts {
|
|
|
+ return func(c *config) error {
|
|
|
+ c.Capabilities = append(c.Capabilities, caps...)
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// WithRequiredCUDAVersion sets the required cuda version
|
|
|
+func WithRequiredCUDAVersion(major, minor int) Opts {
|
|
|
+ return func(c *config) error {
|
|
|
+ c.Requirements = append(c.Requirements, fmt.Sprintf("cuda>=%d.%d", major, minor))
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// WithOCIHookPath sets the hook path for the binary
|
|
|
+func WithOCIHookPath(path string) Opts {
|
|
|
+ return func(c *config) error {
|
|
|
+ c.OCIHookPath = path
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// WithLookupOCIHookPath sets the hook path for the binary via a binary name
|
|
|
+func WithLookupOCIHookPath(name string) Opts {
|
|
|
+ return func(c *config) error {
|
|
|
+ path, err := exec.LookPath(name)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ c.OCIHookPath = path
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+}
|