oci_windows.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. package daemon // import "github.com/docker/docker/daemon"
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "os"
  7. "path/filepath"
  8. "strings"
  9. coci "github.com/containerd/containerd/oci"
  10. "github.com/containerd/log"
  11. containertypes "github.com/docker/docker/api/types/container"
  12. imagetypes "github.com/docker/docker/api/types/image"
  13. "github.com/docker/docker/container"
  14. "github.com/docker/docker/daemon/config"
  15. "github.com/docker/docker/errdefs"
  16. "github.com/docker/docker/image"
  17. "github.com/docker/docker/oci"
  18. "github.com/docker/docker/pkg/sysinfo"
  19. "github.com/docker/docker/pkg/system"
  20. specs "github.com/opencontainers/runtime-spec/specs-go"
  21. "github.com/pkg/errors"
  22. "golang.org/x/sys/windows/registry"
  23. )
  24. const (
  25. credentialSpecRegistryLocation = `SOFTWARE\Microsoft\Windows NT\CurrentVersion\Virtualization\Containers\CredentialSpecs`
  26. credentialSpecFileLocation = "CredentialSpecs"
  27. )
  28. func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c *container.Container) (*specs.Spec, error) {
  29. img, err := daemon.imageService.GetImage(ctx, string(c.ImageID), imagetypes.GetImageOpts{})
  30. if err != nil {
  31. return nil, err
  32. }
  33. if err := image.CheckOS(img.OperatingSystem()); err != nil {
  34. return nil, err
  35. }
  36. s := oci.DefaultSpec()
  37. if err := coci.WithAnnotations(c.HostConfig.Annotations)(ctx, nil, nil, &s); err != nil {
  38. return nil, err
  39. }
  40. linkedEnv, err := daemon.setupLinkedContainers(c)
  41. if err != nil {
  42. return nil, err
  43. }
  44. // Note, unlike Unix, we do NOT call into SetupWorkingDirectory as
  45. // this is done in VMCompute. Further, we couldn't do it for Hyper-V
  46. // containers anyway.
  47. if err := daemon.setupSecretDir(c); err != nil {
  48. return nil, err
  49. }
  50. if err := daemon.setupConfigDir(c); err != nil {
  51. return nil, err
  52. }
  53. // In s.Mounts
  54. mounts, err := daemon.setupMounts(c)
  55. if err != nil {
  56. return nil, err
  57. }
  58. var isHyperV bool
  59. if c.HostConfig.Isolation.IsDefault() {
  60. // Container using default isolation, so take the default from the daemon configuration
  61. isHyperV = daemon.defaultIsolation.IsHyperV()
  62. } else {
  63. // Container may be requesting an explicit isolation mode.
  64. isHyperV = c.HostConfig.Isolation.IsHyperV()
  65. }
  66. if isHyperV {
  67. s.Windows.HyperV = &specs.WindowsHyperV{}
  68. }
  69. // If the container has not been started, and has configs or secrets
  70. // secrets, create symlinks to each config and secret. If it has been
  71. // started before, the symlinks should have already been created. Also, it
  72. // is important to not mount a Hyper-V container that has been started
  73. // before, to protect the host from the container; for example, from
  74. // malicious mutation of NTFS data structures.
  75. if !c.HasBeenStartedBefore && (len(c.SecretReferences) > 0 || len(c.ConfigReferences) > 0) {
  76. // The container file system is mounted before this function is called,
  77. // except for Hyper-V containers, so mount it here in that case.
  78. if isHyperV {
  79. if err := daemon.Mount(c); err != nil {
  80. return nil, err
  81. }
  82. defer daemon.Unmount(c)
  83. }
  84. if err := c.CreateSecretSymlinks(); err != nil {
  85. return nil, err
  86. }
  87. if err := c.CreateConfigSymlinks(); err != nil {
  88. return nil, err
  89. }
  90. }
  91. secretMounts, err := c.SecretMounts()
  92. if err != nil {
  93. return nil, err
  94. }
  95. if secretMounts != nil {
  96. mounts = append(mounts, secretMounts...)
  97. }
  98. if configMounts := c.ConfigMounts(); configMounts != nil {
  99. mounts = append(mounts, configMounts...)
  100. }
  101. for _, mount := range mounts {
  102. m := specs.Mount{
  103. Source: mount.Source,
  104. Destination: mount.Destination,
  105. }
  106. if !mount.Writable {
  107. m.Options = append(m.Options, "ro")
  108. }
  109. s.Mounts = append(s.Mounts, m)
  110. }
  111. // In s.Process
  112. s.Process.Cwd = c.Config.WorkingDir
  113. s.Process.Env = c.CreateDaemonEnvironment(c.Config.Tty, linkedEnv)
  114. s.Process.Terminal = c.Config.Tty
  115. if c.Config.Tty {
  116. s.Process.ConsoleSize = &specs.Box{
  117. Height: c.HostConfig.ConsoleSize[0],
  118. Width: c.HostConfig.ConsoleSize[1],
  119. }
  120. }
  121. s.Process.User.Username = c.Config.User
  122. s.Windows.LayerFolders, err = daemon.imageService.GetLayerFolders(img, c.RWLayer)
  123. if err != nil {
  124. return nil, errors.Wrapf(err, "container %s", c.ID)
  125. }
  126. // Get endpoints for the libnetwork allocated networks to the container
  127. var epList []string
  128. AllowUnqualifiedDNSQuery := false
  129. gwHNSID := ""
  130. if c.NetworkSettings != nil {
  131. for n := range c.NetworkSettings.Networks {
  132. sn, err := daemon.FindNetwork(n)
  133. if err != nil {
  134. continue
  135. }
  136. ep, err := getEndpointInNetwork(c.Name, sn)
  137. if err != nil {
  138. continue
  139. }
  140. data, err := ep.DriverInfo()
  141. if err != nil {
  142. continue
  143. }
  144. if data["GW_INFO"] != nil {
  145. gwInfo := data["GW_INFO"].(map[string]interface{})
  146. if gwInfo["hnsid"] != nil {
  147. gwHNSID = gwInfo["hnsid"].(string)
  148. }
  149. }
  150. if data["hnsid"] != nil {
  151. epList = append(epList, data["hnsid"].(string))
  152. }
  153. if data["AllowUnqualifiedDNSQuery"] != nil {
  154. AllowUnqualifiedDNSQuery = true
  155. }
  156. }
  157. }
  158. var networkSharedContainerID string
  159. if c.HostConfig.NetworkMode.IsContainer() {
  160. networkSharedContainerID = c.NetworkSharedContainerID
  161. for _, ep := range c.SharedEndpointList {
  162. epList = append(epList, ep)
  163. }
  164. }
  165. if gwHNSID != "" {
  166. epList = append(epList, gwHNSID)
  167. }
  168. var dnsSearch []string
  169. if len(c.HostConfig.DNSSearch) > 0 {
  170. dnsSearch = c.HostConfig.DNSSearch
  171. } else if len(daemonCfg.DNSSearch) > 0 {
  172. dnsSearch = daemonCfg.DNSSearch
  173. }
  174. s.Windows.Network = &specs.WindowsNetwork{
  175. AllowUnqualifiedDNSQuery: AllowUnqualifiedDNSQuery,
  176. DNSSearchList: dnsSearch,
  177. EndpointList: epList,
  178. NetworkSharedContainerName: networkSharedContainerID,
  179. }
  180. if err := daemon.createSpecWindowsFields(c, &s, isHyperV); err != nil {
  181. return nil, err
  182. }
  183. if log.G(ctx).Level >= log.DebugLevel {
  184. if b, err := json.Marshal(&s); err == nil {
  185. log.G(ctx).Debugf("Generated spec: %s", string(b))
  186. }
  187. }
  188. return &s, nil
  189. }
  190. // Sets the Windows-specific fields of the OCI spec
  191. func (daemon *Daemon) createSpecWindowsFields(c *container.Container, s *specs.Spec, isHyperV bool) error {
  192. s.Hostname = c.FullHostname()
  193. if len(s.Process.Cwd) == 0 {
  194. // We default to C:\ to workaround the oddity of the case that the
  195. // default directory for cmd running as LocalSystem (or
  196. // ContainerAdministrator) is c:\windows\system32. Hence docker run
  197. // <image> cmd will by default end in c:\windows\system32, rather
  198. // than 'root' (/) on Linux. The oddity is that if you have a dockerfile
  199. // which has no WORKDIR and has a COPY file ., . will be interpreted
  200. // as c:\. Hence, setting it to default of c:\ makes for consistency.
  201. s.Process.Cwd = `C:\`
  202. }
  203. if c.Config.ArgsEscaped {
  204. s.Process.CommandLine = c.Path
  205. if len(c.Args) > 0 {
  206. s.Process.CommandLine += " " + system.EscapeArgs(c.Args)
  207. }
  208. } else {
  209. s.Process.Args = append([]string{c.Path}, c.Args...)
  210. }
  211. s.Root.Readonly = false // Windows does not support a read-only root filesystem
  212. if !isHyperV {
  213. if c.BaseFS == "" {
  214. return errors.New("createSpecWindowsFields: BaseFS of container " + c.ID + " is unexpectedly empty")
  215. }
  216. s.Root.Path = c.BaseFS // This is not set for Hyper-V containers
  217. if !strings.HasSuffix(s.Root.Path, `\`) {
  218. s.Root.Path = s.Root.Path + `\` // Ensure a correctly formatted volume GUID path \\?\Volume{GUID}\
  219. }
  220. }
  221. // First boot optimization
  222. s.Windows.IgnoreFlushesDuringBoot = !c.HasBeenStartedBefore
  223. setResourcesInSpec(c, s, isHyperV)
  224. // Read and add credentials from the security options if a credential spec has been provided.
  225. if err := daemon.setWindowsCredentialSpec(c, s); err != nil {
  226. return err
  227. }
  228. devices, err := setupWindowsDevices(c.HostConfig.Devices)
  229. if err != nil {
  230. return err
  231. }
  232. s.Windows.Devices = append(s.Windows.Devices, devices...)
  233. return nil
  234. }
  235. var errInvalidCredentialSpecSecOpt = errdefs.InvalidParameter(fmt.Errorf("invalid credential spec security option - value must be prefixed by 'file://', 'registry://', or 'raw://' followed by a non-empty value"))
  236. // setWindowsCredentialSpec sets the spec's `Windows.CredentialSpec`
  237. // field if relevant
  238. func (daemon *Daemon) setWindowsCredentialSpec(c *container.Container, s *specs.Spec) error {
  239. if c.HostConfig == nil || c.HostConfig.SecurityOpt == nil {
  240. return nil
  241. }
  242. // TODO (jrouge/wk8): if provided with several security options, we silently ignore
  243. // all but the last one (provided they're all valid, otherwise we do return an error);
  244. // this doesn't seem like a great idea?
  245. credentialSpec := ""
  246. // TODO(thaJeztah): extract validating and parsing SecurityOpt to a reusable function.
  247. for _, secOpt := range c.HostConfig.SecurityOpt {
  248. k, v, ok := strings.Cut(secOpt, "=")
  249. if !ok {
  250. return errdefs.InvalidParameter(fmt.Errorf("invalid security option: no equals sign in supplied value %s", secOpt))
  251. }
  252. // FIXME(thaJeztah): options should not be case-insensitive
  253. if !strings.EqualFold(k, "credentialspec") {
  254. return errdefs.InvalidParameter(fmt.Errorf("security option not supported: %s", k))
  255. }
  256. scheme, value, ok := strings.Cut(v, "://")
  257. if !ok || value == "" {
  258. return errInvalidCredentialSpecSecOpt
  259. }
  260. var err error
  261. switch strings.ToLower(scheme) {
  262. case "file":
  263. credentialSpec, err = readCredentialSpecFile(c.ID, daemon.root, filepath.Clean(value))
  264. if err != nil {
  265. return errdefs.InvalidParameter(err)
  266. }
  267. case "registry":
  268. credentialSpec, err = readCredentialSpecRegistry(c.ID, value)
  269. if err != nil {
  270. return errdefs.InvalidParameter(err)
  271. }
  272. case "config":
  273. // if the container does not have a DependencyStore, then it
  274. // isn't swarmkit managed. In order to avoid creating any
  275. // impression that `config://` is a valid API, return the same
  276. // error as if you'd passed any other random word.
  277. if c.DependencyStore == nil {
  278. return errInvalidCredentialSpecSecOpt
  279. }
  280. csConfig, err := c.DependencyStore.Configs().Get(value)
  281. if err != nil {
  282. return errdefs.System(errors.Wrap(err, "error getting value from config store"))
  283. }
  284. // stuff the resulting secret data into a string to use as the
  285. // CredentialSpec
  286. credentialSpec = string(csConfig.Spec.Data)
  287. case "raw":
  288. credentialSpec = value
  289. default:
  290. return errInvalidCredentialSpecSecOpt
  291. }
  292. }
  293. if credentialSpec != "" {
  294. if s.Windows == nil {
  295. s.Windows = &specs.Windows{}
  296. }
  297. s.Windows.CredentialSpec = credentialSpec
  298. }
  299. return nil
  300. }
  301. func setResourcesInSpec(c *container.Container, s *specs.Spec, isHyperV bool) {
  302. // In s.Windows.Resources
  303. cpuShares := uint16(c.HostConfig.CPUShares)
  304. cpuMaximum := uint16(c.HostConfig.CPUPercent) * 100
  305. cpuCount := uint64(c.HostConfig.CPUCount)
  306. if c.HostConfig.NanoCPUs > 0 {
  307. if isHyperV {
  308. cpuCount = uint64(c.HostConfig.NanoCPUs / 1e9)
  309. leftoverNanoCPUs := c.HostConfig.NanoCPUs % 1e9
  310. if leftoverNanoCPUs != 0 {
  311. cpuCount++
  312. cpuMaximum = uint16(c.HostConfig.NanoCPUs / int64(cpuCount) / (1e9 / 10000))
  313. if cpuMaximum < 1 {
  314. // The requested NanoCPUs is so small that we rounded to 0, use 1 instead
  315. cpuMaximum = 1
  316. }
  317. }
  318. } else {
  319. cpuMaximum = uint16(c.HostConfig.NanoCPUs / int64(sysinfo.NumCPU()) / (1e9 / 10000))
  320. if cpuMaximum < 1 {
  321. // The requested NanoCPUs is so small that we rounded to 0, use 1 instead
  322. cpuMaximum = 1
  323. }
  324. }
  325. }
  326. if cpuMaximum != 0 || cpuShares != 0 || cpuCount != 0 {
  327. if s.Windows.Resources == nil {
  328. s.Windows.Resources = &specs.WindowsResources{}
  329. }
  330. s.Windows.Resources.CPU = &specs.WindowsCPUResources{
  331. Maximum: &cpuMaximum,
  332. Shares: &cpuShares,
  333. Count: &cpuCount,
  334. }
  335. }
  336. memoryLimit := uint64(c.HostConfig.Memory)
  337. if memoryLimit != 0 {
  338. if s.Windows.Resources == nil {
  339. s.Windows.Resources = &specs.WindowsResources{}
  340. }
  341. s.Windows.Resources.Memory = &specs.WindowsMemoryResources{
  342. Limit: &memoryLimit,
  343. }
  344. }
  345. if c.HostConfig.IOMaximumBandwidth != 0 || c.HostConfig.IOMaximumIOps != 0 {
  346. if s.Windows.Resources == nil {
  347. s.Windows.Resources = &specs.WindowsResources{}
  348. }
  349. s.Windows.Resources.Storage = &specs.WindowsStorageResources{
  350. Bps: &c.HostConfig.IOMaximumBandwidth,
  351. Iops: &c.HostConfig.IOMaximumIOps,
  352. }
  353. }
  354. }
  355. // mergeUlimits merge the Ulimits from HostConfig with daemon defaults, and update HostConfig
  356. // It will do nothing on non-Linux platform
  357. func (daemon *Daemon) mergeUlimits(c *containertypes.HostConfig, daemonCfg *config.Config) {
  358. return
  359. }
  360. // registryKey is an interface wrapper around `registry.Key`,
  361. // listing only the methods we care about here.
  362. // It's mainly useful to easily allow mocking the registry in tests.
  363. type registryKey interface {
  364. GetStringValue(name string) (val string, valtype uint32, err error)
  365. Close() error
  366. }
  367. var registryOpenKeyFunc = func(baseKey registry.Key, path string, access uint32) (registryKey, error) {
  368. return registry.OpenKey(baseKey, path, access)
  369. }
  370. // readCredentialSpecRegistry is a helper function to read a credential spec from
  371. // the registry. If not found, we return an empty string and warn in the log.
  372. // This allows for staging on machines which do not have the necessary components.
  373. func readCredentialSpecRegistry(id, name string) (string, error) {
  374. key, err := registryOpenKeyFunc(registry.LOCAL_MACHINE, credentialSpecRegistryLocation, registry.QUERY_VALUE)
  375. if err != nil {
  376. return "", errors.Wrapf(err, "failed handling spec %q for container %s - registry key %s could not be opened", name, id, credentialSpecRegistryLocation)
  377. }
  378. defer key.Close()
  379. value, _, err := key.GetStringValue(name)
  380. if err != nil {
  381. if err == registry.ErrNotExist {
  382. return "", fmt.Errorf("registry credential spec %q for container %s was not found", name, id)
  383. }
  384. return "", errors.Wrapf(err, "error reading credential spec %q from registry for container %s", name, id)
  385. }
  386. return value, nil
  387. }
  388. // readCredentialSpecFile is a helper function to read a credential spec from
  389. // a file. If not found, we return an empty string and warn in the log.
  390. // This allows for staging on machines which do not have the necessary components.
  391. func readCredentialSpecFile(id, root, location string) (string, error) {
  392. if filepath.IsAbs(location) {
  393. return "", fmt.Errorf("invalid credential spec: file:// path cannot be absolute")
  394. }
  395. base := filepath.Join(root, credentialSpecFileLocation)
  396. full := filepath.Join(base, location)
  397. if !strings.HasPrefix(full, base) {
  398. return "", fmt.Errorf("invalid credential spec: file:// path must be under %s", base)
  399. }
  400. bcontents, err := os.ReadFile(full)
  401. if err != nil {
  402. return "", errors.Wrapf(err, "failed to load credential spec for container %s", id)
  403. }
  404. return string(bcontents[:]), nil
  405. }
  406. func setupWindowsDevices(devices []containertypes.DeviceMapping) (specDevices []specs.WindowsDevice, err error) {
  407. for _, deviceMapping := range devices {
  408. if strings.HasPrefix(deviceMapping.PathOnHost, "class/") {
  409. specDevices = append(specDevices, specs.WindowsDevice{
  410. ID: strings.TrimPrefix(deviceMapping.PathOnHost, "class/"),
  411. IDType: "class",
  412. })
  413. } else {
  414. idType, id, ok := strings.Cut(deviceMapping.PathOnHost, "://")
  415. if !ok {
  416. return nil, errors.Errorf("invalid device assignment path: '%s', must be 'class/ID' or 'IDType://ID'", deviceMapping.PathOnHost)
  417. }
  418. if idType == "" {
  419. return nil, errors.Errorf("invalid device assignment path: '%s', IDType cannot be empty", deviceMapping.PathOnHost)
  420. }
  421. specDevices = append(specDevices, specs.WindowsDevice{
  422. ID: id,
  423. IDType: idType,
  424. })
  425. }
  426. }
  427. return specDevices, nil
  428. }