123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491 |
- package daemon // import "github.com/docker/docker/daemon"
- import (
- "context"
- "encoding/json"
- "fmt"
- "os"
- "path/filepath"
- "strings"
- coci "github.com/containerd/containerd/oci"
- "github.com/containerd/log"
- containertypes "github.com/docker/docker/api/types/container"
- imagetypes "github.com/docker/docker/api/types/image"
- "github.com/docker/docker/container"
- "github.com/docker/docker/daemon/config"
- "github.com/docker/docker/errdefs"
- "github.com/docker/docker/image"
- "github.com/docker/docker/oci"
- "github.com/docker/docker/pkg/sysinfo"
- "github.com/docker/docker/pkg/system"
- specs "github.com/opencontainers/runtime-spec/specs-go"
- "github.com/pkg/errors"
- "golang.org/x/sys/windows/registry"
- )
- const (
- credentialSpecRegistryLocation = `SOFTWARE\Microsoft\Windows NT\CurrentVersion\Virtualization\Containers\CredentialSpecs`
- credentialSpecFileLocation = "CredentialSpecs"
- )
- func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c *container.Container) (*specs.Spec, error) {
- img, err := daemon.imageService.GetImage(ctx, string(c.ImageID), imagetypes.GetImageOpts{})
- if err != nil {
- return nil, err
- }
- if err := image.CheckOS(img.OperatingSystem()); err != nil {
- return nil, err
- }
- s := oci.DefaultSpec()
- if err := coci.WithAnnotations(c.HostConfig.Annotations)(ctx, nil, nil, &s); err != nil {
- return nil, err
- }
- linkedEnv, err := daemon.setupLinkedContainers(c)
- if err != nil {
- return nil, err
- }
- // Note, unlike Unix, we do NOT call into SetupWorkingDirectory as
- // this is done in VMCompute. Further, we couldn't do it for Hyper-V
- // containers anyway.
- if err := daemon.setupSecretDir(c); err != nil {
- return nil, err
- }
- if err := daemon.setupConfigDir(c); err != nil {
- return nil, err
- }
- // In s.Mounts
- mounts, err := daemon.setupMounts(c)
- if err != nil {
- return nil, err
- }
- var isHyperV bool
- if c.HostConfig.Isolation.IsDefault() {
- // Container using default isolation, so take the default from the daemon configuration
- isHyperV = daemon.defaultIsolation.IsHyperV()
- } else {
- // Container may be requesting an explicit isolation mode.
- isHyperV = c.HostConfig.Isolation.IsHyperV()
- }
- if isHyperV {
- s.Windows.HyperV = &specs.WindowsHyperV{}
- }
- // If the container has not been started, and has configs or secrets
- // secrets, create symlinks to each config and secret. If it has been
- // started before, the symlinks should have already been created. Also, it
- // is important to not mount a Hyper-V container that has been started
- // before, to protect the host from the container; for example, from
- // malicious mutation of NTFS data structures.
- if !c.HasBeenStartedBefore && (len(c.SecretReferences) > 0 || len(c.ConfigReferences) > 0) {
- // The container file system is mounted before this function is called,
- // except for Hyper-V containers, so mount it here in that case.
- if isHyperV {
- if err := daemon.Mount(c); err != nil {
- return nil, err
- }
- defer daemon.Unmount(c)
- }
- if err := c.CreateSecretSymlinks(); err != nil {
- return nil, err
- }
- if err := c.CreateConfigSymlinks(); err != nil {
- return nil, err
- }
- }
- secretMounts, err := c.SecretMounts()
- if err != nil {
- return nil, err
- }
- if secretMounts != nil {
- mounts = append(mounts, secretMounts...)
- }
- if configMounts := c.ConfigMounts(); configMounts != nil {
- mounts = append(mounts, configMounts...)
- }
- for _, mount := range mounts {
- m := specs.Mount{
- Source: mount.Source,
- Destination: mount.Destination,
- }
- if !mount.Writable {
- m.Options = append(m.Options, "ro")
- }
- s.Mounts = append(s.Mounts, m)
- }
- // In s.Process
- s.Process.Cwd = c.Config.WorkingDir
- s.Process.Env = c.CreateDaemonEnvironment(c.Config.Tty, linkedEnv)
- s.Process.Terminal = c.Config.Tty
- if c.Config.Tty {
- s.Process.ConsoleSize = &specs.Box{
- Height: c.HostConfig.ConsoleSize[0],
- Width: c.HostConfig.ConsoleSize[1],
- }
- }
- s.Process.User.Username = c.Config.User
- s.Windows.LayerFolders, err = daemon.imageService.GetLayerFolders(img, c.RWLayer)
- if err != nil {
- return nil, errors.Wrapf(err, "container %s", c.ID)
- }
- // Get endpoints for the libnetwork allocated networks to the container
- var epList []string
- AllowUnqualifiedDNSQuery := false
- gwHNSID := ""
- if c.NetworkSettings != nil {
- for n := range c.NetworkSettings.Networks {
- sn, err := daemon.FindNetwork(n)
- if err != nil {
- continue
- }
- ep, err := getEndpointInNetwork(c.Name, sn)
- if err != nil {
- continue
- }
- data, err := ep.DriverInfo()
- if err != nil {
- continue
- }
- if data["GW_INFO"] != nil {
- gwInfo := data["GW_INFO"].(map[string]interface{})
- if gwInfo["hnsid"] != nil {
- gwHNSID = gwInfo["hnsid"].(string)
- }
- }
- if data["hnsid"] != nil {
- epList = append(epList, data["hnsid"].(string))
- }
- if data["AllowUnqualifiedDNSQuery"] != nil {
- AllowUnqualifiedDNSQuery = true
- }
- }
- }
- var networkSharedContainerID string
- if c.HostConfig.NetworkMode.IsContainer() {
- networkSharedContainerID = c.NetworkSharedContainerID
- for _, ep := range c.SharedEndpointList {
- epList = append(epList, ep)
- }
- }
- if gwHNSID != "" {
- epList = append(epList, gwHNSID)
- }
- var dnsSearch []string
- if len(c.HostConfig.DNSSearch) > 0 {
- dnsSearch = c.HostConfig.DNSSearch
- } else if len(daemonCfg.DNSSearch) > 0 {
- dnsSearch = daemonCfg.DNSSearch
- }
- s.Windows.Network = &specs.WindowsNetwork{
- AllowUnqualifiedDNSQuery: AllowUnqualifiedDNSQuery,
- DNSSearchList: dnsSearch,
- EndpointList: epList,
- NetworkSharedContainerName: networkSharedContainerID,
- }
- if err := daemon.createSpecWindowsFields(c, &s, isHyperV); err != nil {
- return nil, err
- }
- if log.G(ctx).Level >= log.DebugLevel {
- if b, err := json.Marshal(&s); err == nil {
- log.G(ctx).Debugf("Generated spec: %s", string(b))
- }
- }
- return &s, nil
- }
- // Sets the Windows-specific fields of the OCI spec
- func (daemon *Daemon) createSpecWindowsFields(c *container.Container, s *specs.Spec, isHyperV bool) error {
- s.Hostname = c.FullHostname()
- if len(s.Process.Cwd) == 0 {
- // We default to C:\ to workaround the oddity of the case that the
- // default directory for cmd running as LocalSystem (or
- // ContainerAdministrator) is c:\windows\system32. Hence docker run
- // <image> cmd will by default end in c:\windows\system32, rather
- // than 'root' (/) on Linux. The oddity is that if you have a dockerfile
- // which has no WORKDIR and has a COPY file ., . will be interpreted
- // as c:\. Hence, setting it to default of c:\ makes for consistency.
- s.Process.Cwd = `C:\`
- }
- if c.Config.ArgsEscaped {
- s.Process.CommandLine = c.Path
- if len(c.Args) > 0 {
- s.Process.CommandLine += " " + system.EscapeArgs(c.Args)
- }
- } else {
- s.Process.Args = append([]string{c.Path}, c.Args...)
- }
- s.Root.Readonly = false // Windows does not support a read-only root filesystem
- if !isHyperV {
- if c.BaseFS == "" {
- return errors.New("createSpecWindowsFields: BaseFS of container " + c.ID + " is unexpectedly empty")
- }
- s.Root.Path = c.BaseFS // This is not set for Hyper-V containers
- if !strings.HasSuffix(s.Root.Path, `\`) {
- s.Root.Path = s.Root.Path + `\` // Ensure a correctly formatted volume GUID path \\?\Volume{GUID}\
- }
- }
- // First boot optimization
- s.Windows.IgnoreFlushesDuringBoot = !c.HasBeenStartedBefore
- setResourcesInSpec(c, s, isHyperV)
- // Read and add credentials from the security options if a credential spec has been provided.
- if err := daemon.setWindowsCredentialSpec(c, s); err != nil {
- return err
- }
- devices, err := setupWindowsDevices(c.HostConfig.Devices)
- if err != nil {
- return err
- }
- s.Windows.Devices = append(s.Windows.Devices, devices...)
- return nil
- }
- var errInvalidCredentialSpecSecOpt = errdefs.InvalidParameter(fmt.Errorf("invalid credential spec security option - value must be prefixed by 'file://', 'registry://', or 'raw://' followed by a non-empty value"))
- // setWindowsCredentialSpec sets the spec's `Windows.CredentialSpec`
- // field if relevant
- func (daemon *Daemon) setWindowsCredentialSpec(c *container.Container, s *specs.Spec) error {
- if c.HostConfig == nil || c.HostConfig.SecurityOpt == nil {
- return nil
- }
- // TODO (jrouge/wk8): if provided with several security options, we silently ignore
- // all but the last one (provided they're all valid, otherwise we do return an error);
- // this doesn't seem like a great idea?
- credentialSpec := ""
- // TODO(thaJeztah): extract validating and parsing SecurityOpt to a reusable function.
- for _, secOpt := range c.HostConfig.SecurityOpt {
- k, v, ok := strings.Cut(secOpt, "=")
- if !ok {
- return errdefs.InvalidParameter(fmt.Errorf("invalid security option: no equals sign in supplied value %s", secOpt))
- }
- // FIXME(thaJeztah): options should not be case-insensitive
- if !strings.EqualFold(k, "credentialspec") {
- return errdefs.InvalidParameter(fmt.Errorf("security option not supported: %s", k))
- }
- scheme, value, ok := strings.Cut(v, "://")
- if !ok || value == "" {
- return errInvalidCredentialSpecSecOpt
- }
- var err error
- switch strings.ToLower(scheme) {
- case "file":
- credentialSpec, err = readCredentialSpecFile(c.ID, daemon.root, filepath.Clean(value))
- if err != nil {
- return errdefs.InvalidParameter(err)
- }
- case "registry":
- credentialSpec, err = readCredentialSpecRegistry(c.ID, value)
- if err != nil {
- return errdefs.InvalidParameter(err)
- }
- case "config":
- // if the container does not have a DependencyStore, then it
- // isn't swarmkit managed. In order to avoid creating any
- // impression that `config://` is a valid API, return the same
- // error as if you'd passed any other random word.
- if c.DependencyStore == nil {
- return errInvalidCredentialSpecSecOpt
- }
- csConfig, err := c.DependencyStore.Configs().Get(value)
- if err != nil {
- return errdefs.System(errors.Wrap(err, "error getting value from config store"))
- }
- // stuff the resulting secret data into a string to use as the
- // CredentialSpec
- credentialSpec = string(csConfig.Spec.Data)
- case "raw":
- credentialSpec = value
- default:
- return errInvalidCredentialSpecSecOpt
- }
- }
- if credentialSpec != "" {
- if s.Windows == nil {
- s.Windows = &specs.Windows{}
- }
- s.Windows.CredentialSpec = credentialSpec
- }
- return nil
- }
- func setResourcesInSpec(c *container.Container, s *specs.Spec, isHyperV bool) {
- // In s.Windows.Resources
- cpuShares := uint16(c.HostConfig.CPUShares)
- cpuMaximum := uint16(c.HostConfig.CPUPercent) * 100
- cpuCount := uint64(c.HostConfig.CPUCount)
- if c.HostConfig.NanoCPUs > 0 {
- if isHyperV {
- cpuCount = uint64(c.HostConfig.NanoCPUs / 1e9)
- leftoverNanoCPUs := c.HostConfig.NanoCPUs % 1e9
- if leftoverNanoCPUs != 0 {
- cpuCount++
- cpuMaximum = uint16(c.HostConfig.NanoCPUs / int64(cpuCount) / (1e9 / 10000))
- if cpuMaximum < 1 {
- // The requested NanoCPUs is so small that we rounded to 0, use 1 instead
- cpuMaximum = 1
- }
- }
- } else {
- cpuMaximum = uint16(c.HostConfig.NanoCPUs / int64(sysinfo.NumCPU()) / (1e9 / 10000))
- if cpuMaximum < 1 {
- // The requested NanoCPUs is so small that we rounded to 0, use 1 instead
- cpuMaximum = 1
- }
- }
- }
- if cpuMaximum != 0 || cpuShares != 0 || cpuCount != 0 {
- if s.Windows.Resources == nil {
- s.Windows.Resources = &specs.WindowsResources{}
- }
- s.Windows.Resources.CPU = &specs.WindowsCPUResources{
- Maximum: &cpuMaximum,
- Shares: &cpuShares,
- Count: &cpuCount,
- }
- }
- memoryLimit := uint64(c.HostConfig.Memory)
- if memoryLimit != 0 {
- if s.Windows.Resources == nil {
- s.Windows.Resources = &specs.WindowsResources{}
- }
- s.Windows.Resources.Memory = &specs.WindowsMemoryResources{
- Limit: &memoryLimit,
- }
- }
- if c.HostConfig.IOMaximumBandwidth != 0 || c.HostConfig.IOMaximumIOps != 0 {
- if s.Windows.Resources == nil {
- s.Windows.Resources = &specs.WindowsResources{}
- }
- s.Windows.Resources.Storage = &specs.WindowsStorageResources{
- Bps: &c.HostConfig.IOMaximumBandwidth,
- Iops: &c.HostConfig.IOMaximumIOps,
- }
- }
- }
- // mergeUlimits merge the Ulimits from HostConfig with daemon defaults, and update HostConfig
- // It will do nothing on non-Linux platform
- func (daemon *Daemon) mergeUlimits(c *containertypes.HostConfig, daemonCfg *config.Config) {
- return
- }
- // registryKey is an interface wrapper around `registry.Key`,
- // listing only the methods we care about here.
- // It's mainly useful to easily allow mocking the registry in tests.
- type registryKey interface {
- GetStringValue(name string) (val string, valtype uint32, err error)
- Close() error
- }
- var registryOpenKeyFunc = func(baseKey registry.Key, path string, access uint32) (registryKey, error) {
- return registry.OpenKey(baseKey, path, access)
- }
- // readCredentialSpecRegistry is a helper function to read a credential spec from
- // the registry. If not found, we return an empty string and warn in the log.
- // This allows for staging on machines which do not have the necessary components.
- func readCredentialSpecRegistry(id, name string) (string, error) {
- key, err := registryOpenKeyFunc(registry.LOCAL_MACHINE, credentialSpecRegistryLocation, registry.QUERY_VALUE)
- if err != nil {
- return "", errors.Wrapf(err, "failed handling spec %q for container %s - registry key %s could not be opened", name, id, credentialSpecRegistryLocation)
- }
- defer key.Close()
- value, _, err := key.GetStringValue(name)
- if err != nil {
- if err == registry.ErrNotExist {
- return "", fmt.Errorf("registry credential spec %q for container %s was not found", name, id)
- }
- return "", errors.Wrapf(err, "error reading credential spec %q from registry for container %s", name, id)
- }
- return value, nil
- }
- // readCredentialSpecFile is a helper function to read a credential spec from
- // a file. If not found, we return an empty string and warn in the log.
- // This allows for staging on machines which do not have the necessary components.
- func readCredentialSpecFile(id, root, location string) (string, error) {
- if filepath.IsAbs(location) {
- return "", fmt.Errorf("invalid credential spec: file:// path cannot be absolute")
- }
- base := filepath.Join(root, credentialSpecFileLocation)
- full := filepath.Join(base, location)
- if !strings.HasPrefix(full, base) {
- return "", fmt.Errorf("invalid credential spec: file:// path must be under %s", base)
- }
- bcontents, err := os.ReadFile(full)
- if err != nil {
- return "", errors.Wrapf(err, "failed to load credential spec for container %s", id)
- }
- return string(bcontents[:]), nil
- }
- func setupWindowsDevices(devices []containertypes.DeviceMapping) (specDevices []specs.WindowsDevice, err error) {
- for _, deviceMapping := range devices {
- if strings.HasPrefix(deviceMapping.PathOnHost, "class/") {
- specDevices = append(specDevices, specs.WindowsDevice{
- ID: strings.TrimPrefix(deviceMapping.PathOnHost, "class/"),
- IDType: "class",
- })
- } else {
- idType, id, ok := strings.Cut(deviceMapping.PathOnHost, "://")
- if !ok {
- return nil, errors.Errorf("invalid device assignment path: '%s', must be 'class/ID' or 'IDType://ID'", deviceMapping.PathOnHost)
- }
- if idType == "" {
- return nil, errors.Errorf("invalid device assignment path: '%s', IDType cannot be empty", deviceMapping.PathOnHost)
- }
- specDevices = append(specDevices, specs.WindowsDevice{
- ID: id,
- IDType: idType,
- })
- }
- }
- return specDevices, nil
- }
|