daemon.go 53 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612
  1. // FIXME(thaJeztah): remove once we are a module; the go:build directive prevents go from downgrading language version to go1.16:
  2. //go:build go1.19
  3. // Package daemon exposes the functions that occur on the host server
  4. // that the Docker daemon is running.
  5. //
  6. // In implementing the various functions of the daemon, there is often
  7. // a method-specific struct for configuring the runtime behavior.
  8. package daemon // import "github.com/docker/docker/daemon"
  9. import (
  10. "context"
  11. "fmt"
  12. "net"
  13. "os"
  14. "path"
  15. "path/filepath"
  16. "runtime"
  17. "sync"
  18. "sync/atomic"
  19. "time"
  20. "github.com/containerd/containerd"
  21. "github.com/containerd/containerd/defaults"
  22. "github.com/containerd/containerd/pkg/dialer"
  23. "github.com/containerd/containerd/pkg/userns"
  24. "github.com/containerd/containerd/remotes/docker"
  25. "github.com/containerd/log"
  26. "github.com/distribution/reference"
  27. dist "github.com/docker/distribution"
  28. "github.com/docker/docker/api/types"
  29. "github.com/docker/docker/api/types/backend"
  30. containertypes "github.com/docker/docker/api/types/container"
  31. imagetypes "github.com/docker/docker/api/types/image"
  32. registrytypes "github.com/docker/docker/api/types/registry"
  33. "github.com/docker/docker/api/types/swarm"
  34. "github.com/docker/docker/api/types/volume"
  35. "github.com/docker/docker/builder"
  36. "github.com/docker/docker/container"
  37. executorpkg "github.com/docker/docker/daemon/cluster/executor"
  38. "github.com/docker/docker/daemon/config"
  39. ctrd "github.com/docker/docker/daemon/containerd"
  40. "github.com/docker/docker/daemon/events"
  41. _ "github.com/docker/docker/daemon/graphdriver/register" // register graph drivers
  42. "github.com/docker/docker/daemon/images"
  43. dlogger "github.com/docker/docker/daemon/logger"
  44. "github.com/docker/docker/daemon/logger/local"
  45. "github.com/docker/docker/daemon/network"
  46. "github.com/docker/docker/daemon/snapshotter"
  47. "github.com/docker/docker/daemon/stats"
  48. "github.com/docker/docker/distribution"
  49. dmetadata "github.com/docker/docker/distribution/metadata"
  50. "github.com/docker/docker/dockerversion"
  51. "github.com/docker/docker/errdefs"
  52. "github.com/docker/docker/image"
  53. "github.com/docker/docker/internal/compatcontext"
  54. "github.com/docker/docker/layer"
  55. libcontainerdtypes "github.com/docker/docker/libcontainerd/types"
  56. "github.com/docker/docker/libnetwork"
  57. "github.com/docker/docker/libnetwork/cluster"
  58. nwconfig "github.com/docker/docker/libnetwork/config"
  59. "github.com/docker/docker/pkg/authorization"
  60. "github.com/docker/docker/pkg/fileutils"
  61. "github.com/docker/docker/pkg/idtools"
  62. "github.com/docker/docker/pkg/plugingetter"
  63. "github.com/docker/docker/pkg/sysinfo"
  64. "github.com/docker/docker/pkg/system"
  65. "github.com/docker/docker/plugin"
  66. pluginexec "github.com/docker/docker/plugin/executor/containerd"
  67. refstore "github.com/docker/docker/reference"
  68. "github.com/docker/docker/registry"
  69. "github.com/docker/docker/runconfig"
  70. volumesservice "github.com/docker/docker/volume/service"
  71. "github.com/moby/buildkit/util/resolver"
  72. resolverconfig "github.com/moby/buildkit/util/resolver/config"
  73. "github.com/moby/locker"
  74. "github.com/pkg/errors"
  75. "go.etcd.io/bbolt"
  76. "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
  77. "golang.org/x/sync/semaphore"
  78. "google.golang.org/grpc"
  79. "google.golang.org/grpc/backoff"
  80. "google.golang.org/grpc/credentials/insecure"
  81. "resenje.org/singleflight"
  82. )
  83. type configStore struct {
  84. config.Config
  85. Runtimes runtimes
  86. }
  87. // Daemon holds information about the Docker daemon.
  88. type Daemon struct {
  89. id string
  90. repository string
  91. containers container.Store
  92. containersReplica *container.ViewDB
  93. execCommands *container.ExecStore
  94. imageService ImageService
  95. configStore atomic.Pointer[configStore]
  96. configReload sync.Mutex
  97. statsCollector *stats.Collector
  98. defaultLogConfig containertypes.LogConfig
  99. registryService *registry.Service
  100. EventsService *events.Events
  101. netController *libnetwork.Controller
  102. volumes *volumesservice.VolumesService
  103. root string
  104. sysInfoOnce sync.Once
  105. sysInfo *sysinfo.SysInfo
  106. shutdown bool
  107. idMapping idtools.IdentityMapping
  108. PluginStore *plugin.Store // TODO: remove
  109. pluginManager *plugin.Manager
  110. linkIndex *linkIndex
  111. containerdClient *containerd.Client
  112. containerd libcontainerdtypes.Client
  113. defaultIsolation containertypes.Isolation // Default isolation mode on Windows
  114. clusterProvider cluster.Provider
  115. cluster Cluster
  116. genericResources []swarm.GenericResource
  117. metricsPluginListener net.Listener
  118. ReferenceStore refstore.Store
  119. machineMemory uint64
  120. seccompProfile []byte
  121. seccompProfilePath string
  122. usageContainers singleflight.Group[struct{}, []*types.Container]
  123. usageImages singleflight.Group[struct{}, []*imagetypes.Summary]
  124. usageVolumes singleflight.Group[struct{}, []*volume.Volume]
  125. usageLayer singleflight.Group[struct{}, int64]
  126. pruneRunning int32
  127. hosts map[string]bool // hosts stores the addresses the daemon is listening on
  128. startupDone chan struct{}
  129. attachmentStore network.AttachmentStore
  130. attachableNetworkLock *locker.Locker
  131. // This is used for Windows which doesn't currently support running on containerd
  132. // It stores metadata for the content store (used for manifest caching)
  133. // This needs to be closed on daemon exit
  134. mdDB *bbolt.DB
  135. usesSnapshotter bool
  136. }
  137. // ID returns the daemon id
  138. func (daemon *Daemon) ID() string {
  139. return daemon.id
  140. }
  141. // StoreHosts stores the addresses the daemon is listening on
  142. func (daemon *Daemon) StoreHosts(hosts []string) {
  143. if daemon.hosts == nil {
  144. daemon.hosts = make(map[string]bool)
  145. }
  146. for _, h := range hosts {
  147. daemon.hosts[h] = true
  148. }
  149. }
  150. // config returns an immutable snapshot of the current daemon configuration.
  151. // Multiple calls to this function will return the same pointer until the
  152. // configuration is reloaded so callers must take care not to modify the
  153. // returned value.
  154. //
  155. // To ensure that the configuration used remains consistent throughout the
  156. // lifetime of an operation, the configuration pointer should be passed down the
  157. // call stack, like one would a [context.Context] value. Only the entrypoints
  158. // for operations, the outermost functions, should call this function.
  159. func (daemon *Daemon) config() *configStore {
  160. cfg := daemon.configStore.Load()
  161. if cfg == nil {
  162. return &configStore{}
  163. }
  164. return cfg
  165. }
  166. // Config returns daemon's config.
  167. func (daemon *Daemon) Config() config.Config {
  168. return daemon.config().Config
  169. }
  170. // HasExperimental returns whether the experimental features of the daemon are enabled or not
  171. func (daemon *Daemon) HasExperimental() bool {
  172. return daemon.config().Experimental
  173. }
  174. // Features returns the features map from configStore
  175. func (daemon *Daemon) Features() map[string]bool {
  176. return daemon.config().Features
  177. }
  178. // UsesSnapshotter returns true if feature flag to use containerd snapshotter is enabled
  179. func (daemon *Daemon) UsesSnapshotter() bool {
  180. return daemon.usesSnapshotter
  181. }
  182. // RegistryHosts returns the registry hosts configuration for the host component
  183. // of a distribution image reference.
  184. func (daemon *Daemon) RegistryHosts(host string) ([]docker.RegistryHost, error) {
  185. m := map[string]resolverconfig.RegistryConfig{
  186. "docker.io": {Mirrors: daemon.registryService.ServiceConfig().Mirrors},
  187. }
  188. conf := daemon.registryService.ServiceConfig().IndexConfigs
  189. for k, v := range conf {
  190. c := m[k]
  191. if !v.Secure {
  192. t := true
  193. c.PlainHTTP = &t
  194. c.Insecure = &t
  195. }
  196. m[k] = c
  197. }
  198. if c, ok := m[host]; !ok && daemon.registryService.IsInsecureRegistry(host) {
  199. t := true
  200. c.PlainHTTP = &t
  201. c.Insecure = &t
  202. m[host] = c
  203. }
  204. for k, v := range m {
  205. v.TLSConfigDir = []string{registry.HostCertsDir(k)}
  206. m[k] = v
  207. }
  208. certsDir := registry.CertsDir()
  209. if fis, err := os.ReadDir(certsDir); err == nil {
  210. for _, fi := range fis {
  211. if _, ok := m[fi.Name()]; !ok {
  212. m[fi.Name()] = resolverconfig.RegistryConfig{
  213. TLSConfigDir: []string{filepath.Join(certsDir, fi.Name())},
  214. }
  215. }
  216. }
  217. }
  218. return resolver.NewRegistryConfig(m)(host)
  219. }
  220. // layerAccessor may be implemented by ImageService
  221. type layerAccessor interface {
  222. GetLayerByID(cid string) (layer.RWLayer, error)
  223. }
  224. func (daemon *Daemon) restore(cfg *configStore) error {
  225. var mapLock sync.Mutex
  226. containers := make(map[string]*container.Container)
  227. log.G(context.TODO()).Info("Loading containers: start.")
  228. dir, err := os.ReadDir(daemon.repository)
  229. if err != nil {
  230. return err
  231. }
  232. // parallelLimit is the maximum number of parallel startup jobs that we
  233. // allow (this is the limited used for all startup semaphores). The multipler
  234. // (128) was chosen after some fairly significant benchmarking -- don't change
  235. // it unless you've tested it significantly (this value is adjusted if
  236. // RLIMIT_NOFILE is small to avoid EMFILE).
  237. parallelLimit := adjustParallelLimit(len(dir), 128*runtime.NumCPU())
  238. // Re-used for all parallel startup jobs.
  239. var group sync.WaitGroup
  240. sem := semaphore.NewWeighted(int64(parallelLimit))
  241. for _, v := range dir {
  242. group.Add(1)
  243. go func(id string) {
  244. defer group.Done()
  245. _ = sem.Acquire(context.Background(), 1)
  246. defer sem.Release(1)
  247. logger := log.G(context.TODO()).WithField("container", id)
  248. c, err := daemon.load(id)
  249. if err != nil {
  250. logger.WithError(err).Error("failed to load container")
  251. return
  252. }
  253. if c.Driver != daemon.imageService.StorageDriver() {
  254. // Ignore the container if it wasn't created with the current storage-driver
  255. logger.Debugf("not restoring container because it was created with another storage driver (%s)", c.Driver)
  256. return
  257. }
  258. if accessor, ok := daemon.imageService.(layerAccessor); ok {
  259. rwlayer, err := accessor.GetLayerByID(c.ID)
  260. if err != nil {
  261. logger.WithError(err).Error("failed to load container mount")
  262. return
  263. }
  264. c.RWLayer = rwlayer
  265. }
  266. logger.WithFields(log.Fields{
  267. "running": c.IsRunning(),
  268. "paused": c.IsPaused(),
  269. }).Debug("loaded container")
  270. mapLock.Lock()
  271. containers[c.ID] = c
  272. mapLock.Unlock()
  273. }(v.Name())
  274. }
  275. group.Wait()
  276. removeContainers := make(map[string]*container.Container)
  277. restartContainers := make(map[*container.Container]chan struct{})
  278. activeSandboxes := make(map[string]interface{})
  279. for _, c := range containers {
  280. group.Add(1)
  281. go func(c *container.Container) {
  282. defer group.Done()
  283. _ = sem.Acquire(context.Background(), 1)
  284. defer sem.Release(1)
  285. logger := log.G(context.TODO()).WithField("container", c.ID)
  286. if err := daemon.registerName(c); err != nil {
  287. logger.WithError(err).Errorf("failed to register container name: %s", c.Name)
  288. mapLock.Lock()
  289. delete(containers, c.ID)
  290. mapLock.Unlock()
  291. return
  292. }
  293. if err := daemon.Register(c); err != nil {
  294. logger.WithError(err).Error("failed to register container")
  295. mapLock.Lock()
  296. delete(containers, c.ID)
  297. mapLock.Unlock()
  298. return
  299. }
  300. }(c)
  301. }
  302. group.Wait()
  303. for _, c := range containers {
  304. group.Add(1)
  305. go func(c *container.Container) {
  306. defer group.Done()
  307. _ = sem.Acquire(context.Background(), 1)
  308. defer sem.Release(1)
  309. baseLogger := log.G(context.TODO()).WithField("container", c.ID)
  310. if c.HostConfig != nil {
  311. // Migrate containers that don't have the default ("no") restart-policy set.
  312. // The RestartPolicy.Name field may be empty for containers that were
  313. // created with versions before v25.0.0.
  314. //
  315. // We also need to set the MaximumRetryCount to 0, to prevent
  316. // validation from failing (MaximumRetryCount is not allowed if
  317. // no restart-policy ("none") is set).
  318. if c.HostConfig.RestartPolicy.Name == "" {
  319. baseLogger.Debug("migrated restart-policy")
  320. c.HostConfig.RestartPolicy.Name = containertypes.RestartPolicyDisabled
  321. c.HostConfig.RestartPolicy.MaximumRetryCount = 0
  322. }
  323. // Migrate containers that use the deprecated (and now non-functional)
  324. // logentries driver. Update them to use the "local" logging driver
  325. // instead.
  326. //
  327. // TODO(thaJeztah): remove logentries check and migration code in release v26.0.0.
  328. if c.HostConfig.LogConfig.Type == "logentries" {
  329. baseLogger.Warn("migrated deprecated logentries logging driver")
  330. c.HostConfig.LogConfig = containertypes.LogConfig{
  331. Type: local.Name,
  332. }
  333. }
  334. }
  335. if err := daemon.checkpointAndSave(c); err != nil {
  336. baseLogger.WithError(err).Error("failed to save migrated container config to disk")
  337. }
  338. daemon.setStateCounter(c)
  339. logger := func(c *container.Container) *log.Entry {
  340. return baseLogger.WithFields(log.Fields{
  341. "running": c.IsRunning(),
  342. "paused": c.IsPaused(),
  343. "restarting": c.IsRestarting(),
  344. })
  345. }
  346. logger(c).Debug("restoring container")
  347. var es *containerd.ExitStatus
  348. if err := c.RestoreTask(context.Background(), daemon.containerd); err != nil && !errdefs.IsNotFound(err) {
  349. logger(c).WithError(err).Error("failed to restore container with containerd")
  350. return
  351. }
  352. alive := false
  353. status := containerd.Unknown
  354. if tsk, ok := c.Task(); ok {
  355. s, err := tsk.Status(context.Background())
  356. if err != nil {
  357. logger(c).WithError(err).Error("failed to get task status")
  358. } else {
  359. status = s.Status
  360. alive = status != containerd.Stopped
  361. if !alive {
  362. logger(c).Debug("cleaning up dead container process")
  363. es, err = tsk.Delete(context.Background())
  364. if err != nil && !errdefs.IsNotFound(err) {
  365. logger(c).WithError(err).Error("failed to delete task from containerd")
  366. return
  367. }
  368. } else if !cfg.LiveRestoreEnabled {
  369. logger(c).Debug("shutting down container considered alive by containerd")
  370. if err := daemon.shutdownContainer(c); err != nil && !errdefs.IsNotFound(err) {
  371. baseLogger.WithError(err).Error("error shutting down container")
  372. return
  373. }
  374. status = containerd.Stopped
  375. alive = false
  376. c.ResetRestartManager(false)
  377. }
  378. }
  379. }
  380. // If the containerd task for the container was not found, docker's view of the
  381. // container state will be updated accordingly via SetStopped further down.
  382. if c.IsRunning() || c.IsPaused() {
  383. logger(c).Debug("syncing container on disk state with real state")
  384. c.RestartManager().Cancel() // manually start containers because some need to wait for swarm networking
  385. switch {
  386. case c.IsPaused() && alive:
  387. logger(c).WithField("state", status).Info("restored container paused")
  388. switch status {
  389. case containerd.Paused, containerd.Pausing:
  390. // nothing to do
  391. case containerd.Unknown, containerd.Stopped, "":
  392. baseLogger.WithField("status", status).Error("unexpected status for paused container during restore")
  393. default:
  394. // running
  395. c.Lock()
  396. c.Paused = false
  397. daemon.setStateCounter(c)
  398. daemon.initHealthMonitor(c)
  399. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  400. baseLogger.WithError(err).Error("failed to update paused container state")
  401. }
  402. c.Unlock()
  403. }
  404. case !c.IsPaused() && alive:
  405. logger(c).Debug("restoring healthcheck")
  406. c.Lock()
  407. daemon.initHealthMonitor(c)
  408. c.Unlock()
  409. }
  410. if !alive {
  411. logger(c).Debug("setting stopped state")
  412. c.Lock()
  413. var ces container.ExitStatus
  414. if es != nil {
  415. ces.ExitCode = int(es.ExitCode())
  416. ces.ExitedAt = es.ExitTime()
  417. } else {
  418. ces.ExitCode = 255
  419. }
  420. c.SetStopped(&ces)
  421. daemon.Cleanup(c)
  422. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  423. baseLogger.WithError(err).Error("failed to update stopped container state")
  424. }
  425. c.Unlock()
  426. logger(c).Debug("set stopped state")
  427. }
  428. // we call Mount and then Unmount to get BaseFs of the container
  429. if err := daemon.Mount(c); err != nil {
  430. // The mount is unlikely to fail. However, in case mount fails
  431. // the container should be allowed to restore here. Some functionalities
  432. // (like docker exec -u user) might be missing but container is able to be
  433. // stopped/restarted/removed.
  434. // See #29365 for related information.
  435. // The error is only logged here.
  436. logger(c).WithError(err).Warn("failed to mount container to get BaseFs path")
  437. } else {
  438. if err := daemon.Unmount(c); err != nil {
  439. logger(c).WithError(err).Warn("failed to umount container to get BaseFs path")
  440. }
  441. }
  442. c.ResetRestartManager(false)
  443. if !c.HostConfig.NetworkMode.IsContainer() && c.IsRunning() {
  444. options, err := daemon.buildSandboxOptions(&cfg.Config, c)
  445. if err != nil {
  446. logger(c).WithError(err).Warn("failed to build sandbox option to restore container")
  447. }
  448. mapLock.Lock()
  449. activeSandboxes[c.NetworkSettings.SandboxID] = options
  450. mapLock.Unlock()
  451. }
  452. }
  453. // get list of containers we need to restart
  454. // Do not autostart containers which
  455. // has endpoints in a swarm scope
  456. // network yet since the cluster is
  457. // not initialized yet. We will start
  458. // it after the cluster is
  459. // initialized.
  460. if cfg.AutoRestart && c.ShouldRestart() && !c.NetworkSettings.HasSwarmEndpoint && c.HasBeenStartedBefore {
  461. mapLock.Lock()
  462. restartContainers[c] = make(chan struct{})
  463. mapLock.Unlock()
  464. } else if c.HostConfig != nil && c.HostConfig.AutoRemove {
  465. // Remove the container if live-restore is disabled or if the container has already exited.
  466. if !cfg.LiveRestoreEnabled || !alive {
  467. mapLock.Lock()
  468. removeContainers[c.ID] = c
  469. mapLock.Unlock()
  470. }
  471. }
  472. c.Lock()
  473. if c.RemovalInProgress {
  474. // We probably crashed in the middle of a removal, reset
  475. // the flag.
  476. //
  477. // We DO NOT remove the container here as we do not
  478. // know if the user had requested for either the
  479. // associated volumes, network links or both to also
  480. // be removed. So we put the container in the "dead"
  481. // state and leave further processing up to them.
  482. c.RemovalInProgress = false
  483. c.Dead = true
  484. if err := c.CheckpointTo(daemon.containersReplica); err != nil {
  485. baseLogger.WithError(err).Error("failed to update RemovalInProgress container state")
  486. } else {
  487. baseLogger.Debugf("reset RemovalInProgress state for container")
  488. }
  489. }
  490. c.Unlock()
  491. logger(c).Debug("done restoring container")
  492. }(c)
  493. }
  494. group.Wait()
  495. // Initialize the network controller and configure network settings.
  496. //
  497. // Note that we cannot initialize the network controller earlier, as it
  498. // needs to know if there's active sandboxes (running containers).
  499. if err = daemon.initNetworkController(&cfg.Config, activeSandboxes); err != nil {
  500. return fmt.Errorf("Error initializing network controller: %v", err)
  501. }
  502. // Now that all the containers are registered, register the links
  503. for _, c := range containers {
  504. group.Add(1)
  505. go func(c *container.Container) {
  506. _ = sem.Acquire(context.Background(), 1)
  507. if err := daemon.registerLinks(c, c.HostConfig); err != nil {
  508. log.G(context.TODO()).WithField("container", c.ID).WithError(err).Error("failed to register link for container")
  509. }
  510. sem.Release(1)
  511. group.Done()
  512. }(c)
  513. }
  514. group.Wait()
  515. for c, notifyChan := range restartContainers {
  516. group.Add(1)
  517. go func(c *container.Container, chNotify chan struct{}) {
  518. _ = sem.Acquire(context.Background(), 1)
  519. logger := log.G(context.TODO()).WithField("container", c.ID)
  520. logger.Debug("starting container")
  521. // ignore errors here as this is a best effort to wait for children to be
  522. // running before we try to start the container
  523. children := daemon.children(c)
  524. timeout := time.NewTimer(5 * time.Second)
  525. defer timeout.Stop()
  526. for _, child := range children {
  527. if notifier, exists := restartContainers[child]; exists {
  528. select {
  529. case <-notifier:
  530. case <-timeout.C:
  531. }
  532. }
  533. }
  534. if err := daemon.prepareMountPoints(c); err != nil {
  535. logger.WithError(err).Error("failed to prepare mount points for container")
  536. }
  537. if err := daemon.containerStart(context.Background(), cfg, c, "", "", true); err != nil {
  538. logger.WithError(err).Error("failed to start container")
  539. }
  540. close(chNotify)
  541. sem.Release(1)
  542. group.Done()
  543. }(c, notifyChan)
  544. }
  545. group.Wait()
  546. for id := range removeContainers {
  547. group.Add(1)
  548. go func(cid string) {
  549. _ = sem.Acquire(context.Background(), 1)
  550. if err := daemon.containerRm(&cfg.Config, cid, &backend.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err != nil {
  551. log.G(context.TODO()).WithField("container", cid).WithError(err).Error("failed to remove container")
  552. }
  553. sem.Release(1)
  554. group.Done()
  555. }(id)
  556. }
  557. group.Wait()
  558. // any containers that were started above would already have had this done,
  559. // however we need to now prepare the mountpoints for the rest of the containers as well.
  560. // This shouldn't cause any issue running on the containers that already had this run.
  561. // This must be run after any containers with a restart policy so that containerized plugins
  562. // can have a chance to be running before we try to initialize them.
  563. for _, c := range containers {
  564. // if the container has restart policy, do not
  565. // prepare the mountpoints since it has been done on restarting.
  566. // This is to speed up the daemon start when a restart container
  567. // has a volume and the volume driver is not available.
  568. if _, ok := restartContainers[c]; ok {
  569. continue
  570. } else if _, ok := removeContainers[c.ID]; ok {
  571. // container is automatically removed, skip it.
  572. continue
  573. }
  574. group.Add(1)
  575. go func(c *container.Container) {
  576. _ = sem.Acquire(context.Background(), 1)
  577. if err := daemon.prepareMountPoints(c); err != nil {
  578. log.G(context.TODO()).WithField("container", c.ID).WithError(err).Error("failed to prepare mountpoints for container")
  579. }
  580. sem.Release(1)
  581. group.Done()
  582. }(c)
  583. }
  584. group.Wait()
  585. log.G(context.TODO()).Info("Loading containers: done.")
  586. return nil
  587. }
  588. // RestartSwarmContainers restarts any autostart container which has a
  589. // swarm endpoint.
  590. func (daemon *Daemon) RestartSwarmContainers() {
  591. daemon.restartSwarmContainers(context.Background(), daemon.config())
  592. }
  593. func (daemon *Daemon) restartSwarmContainers(ctx context.Context, cfg *configStore) {
  594. // parallelLimit is the maximum number of parallel startup jobs that we
  595. // allow (this is the limited used for all startup semaphores). The multipler
  596. // (128) was chosen after some fairly significant benchmarking -- don't change
  597. // it unless you've tested it significantly (this value is adjusted if
  598. // RLIMIT_NOFILE is small to avoid EMFILE).
  599. parallelLimit := adjustParallelLimit(len(daemon.List()), 128*runtime.NumCPU())
  600. var group sync.WaitGroup
  601. sem := semaphore.NewWeighted(int64(parallelLimit))
  602. for _, c := range daemon.List() {
  603. if !c.IsRunning() && !c.IsPaused() {
  604. // Autostart all the containers which has a
  605. // swarm endpoint now that the cluster is
  606. // initialized.
  607. if cfg.AutoRestart && c.ShouldRestart() && c.NetworkSettings.HasSwarmEndpoint && c.HasBeenStartedBefore {
  608. group.Add(1)
  609. go func(c *container.Container) {
  610. if err := sem.Acquire(ctx, 1); err != nil {
  611. // ctx is done.
  612. group.Done()
  613. return
  614. }
  615. if err := daemon.containerStart(ctx, cfg, c, "", "", true); err != nil {
  616. log.G(ctx).WithField("container", c.ID).WithError(err).Error("failed to start swarm container")
  617. }
  618. sem.Release(1)
  619. group.Done()
  620. }(c)
  621. }
  622. }
  623. }
  624. group.Wait()
  625. }
  626. func (daemon *Daemon) children(c *container.Container) map[string]*container.Container {
  627. return daemon.linkIndex.children(c)
  628. }
  629. // parents returns the names of the parent containers of the container
  630. // with the given name.
  631. func (daemon *Daemon) parents(c *container.Container) map[string]*container.Container {
  632. return daemon.linkIndex.parents(c)
  633. }
  634. func (daemon *Daemon) registerLink(parent, child *container.Container, alias string) error {
  635. fullName := path.Join(parent.Name, alias)
  636. if err := daemon.containersReplica.ReserveName(fullName, child.ID); err != nil {
  637. if errors.Is(err, container.ErrNameReserved) {
  638. log.G(context.TODO()).Warnf("error registering link for %s, to %s, as alias %s, ignoring: %v", parent.ID, child.ID, alias, err)
  639. return nil
  640. }
  641. return err
  642. }
  643. daemon.linkIndex.link(parent, child, fullName)
  644. return nil
  645. }
  646. // DaemonJoinsCluster informs the daemon has joined the cluster and provides
  647. // the handler to query the cluster component
  648. func (daemon *Daemon) DaemonJoinsCluster(clusterProvider cluster.Provider) {
  649. daemon.setClusterProvider(clusterProvider)
  650. }
  651. // DaemonLeavesCluster informs the daemon has left the cluster
  652. func (daemon *Daemon) DaemonLeavesCluster() {
  653. // Daemon is in charge of removing the attachable networks with
  654. // connected containers when the node leaves the swarm
  655. daemon.clearAttachableNetworks()
  656. // We no longer need the cluster provider, stop it now so that
  657. // the network agent will stop listening to cluster events.
  658. daemon.setClusterProvider(nil)
  659. // Wait for the networking cluster agent to stop
  660. daemon.netController.AgentStopWait()
  661. // Daemon is in charge of removing the ingress network when the
  662. // node leaves the swarm. Wait for job to be done or timeout.
  663. // This is called also on graceful daemon shutdown. We need to
  664. // wait, because the ingress release has to happen before the
  665. // network controller is stopped.
  666. if done, err := daemon.ReleaseIngress(); err == nil {
  667. timeout := time.NewTimer(5 * time.Second)
  668. defer timeout.Stop()
  669. select {
  670. case <-done:
  671. case <-timeout.C:
  672. log.G(context.TODO()).Warn("timeout while waiting for ingress network removal")
  673. }
  674. } else {
  675. log.G(context.TODO()).Warnf("failed to initiate ingress network removal: %v", err)
  676. }
  677. daemon.attachmentStore.ClearAttachments()
  678. }
  679. // setClusterProvider sets a component for querying the current cluster state.
  680. func (daemon *Daemon) setClusterProvider(clusterProvider cluster.Provider) {
  681. daemon.clusterProvider = clusterProvider
  682. daemon.netController.SetClusterProvider(clusterProvider)
  683. daemon.attachableNetworkLock = locker.New()
  684. }
  685. // IsSwarmCompatible verifies if the current daemon
  686. // configuration is compatible with the swarm mode
  687. func (daemon *Daemon) IsSwarmCompatible() error {
  688. return daemon.config().IsSwarmCompatible()
  689. }
  690. // NewDaemon sets up everything for the daemon to be able to service
  691. // requests from the webserver.
  692. func NewDaemon(ctx context.Context, config *config.Config, pluginStore *plugin.Store, authzMiddleware *authorization.Middleware) (daemon *Daemon, err error) {
  693. // Verify platform-specific requirements.
  694. // TODO(thaJeztah): this should be called before we try to create the daemon; perhaps together with the config validation.
  695. if err := checkSystem(); err != nil {
  696. return nil, err
  697. }
  698. registryService, err := registry.NewService(config.ServiceOptions)
  699. if err != nil {
  700. return nil, err
  701. }
  702. // Ensure that we have a correct root key limit for launching containers.
  703. if err := modifyRootKeyLimit(); err != nil {
  704. log.G(ctx).Warnf("unable to modify root key limit, number of containers could be limited by this quota: %v", err)
  705. }
  706. // Ensure we have compatible and valid configuration options
  707. if err := verifyDaemonSettings(config); err != nil {
  708. return nil, err
  709. }
  710. // Do we have a disabled network?
  711. config.DisableBridge = isBridgeNetworkDisabled(config)
  712. // Setup the resolv.conf
  713. setupResolvConf(config)
  714. idMapping, err := setupRemappedRoot(config)
  715. if err != nil {
  716. return nil, err
  717. }
  718. rootIDs := idMapping.RootPair()
  719. if err := setMayDetachMounts(); err != nil {
  720. log.G(ctx).WithError(err).Warn("Could not set may_detach_mounts kernel parameter")
  721. }
  722. // set up the tmpDir to use a canonical path
  723. tmp, err := prepareTempDir(config.Root)
  724. if err != nil {
  725. return nil, fmt.Errorf("Unable to get the TempDir under %s: %s", config.Root, err)
  726. }
  727. realTmp, err := fileutils.ReadSymlinkedDirectory(tmp)
  728. if err != nil {
  729. return nil, fmt.Errorf("Unable to get the full path to the TempDir (%s): %s", tmp, err)
  730. }
  731. if isWindows {
  732. if err := system.MkdirAll(realTmp, 0); err != nil {
  733. return nil, fmt.Errorf("Unable to create the TempDir (%s): %s", realTmp, err)
  734. }
  735. os.Setenv("TEMP", realTmp)
  736. os.Setenv("TMP", realTmp)
  737. } else {
  738. os.Setenv("TMPDIR", realTmp)
  739. }
  740. if err := initRuntimesDir(config); err != nil {
  741. return nil, err
  742. }
  743. rts, err := setupRuntimes(config)
  744. if err != nil {
  745. return nil, err
  746. }
  747. d := &Daemon{
  748. PluginStore: pluginStore,
  749. startupDone: make(chan struct{}),
  750. }
  751. cfgStore := &configStore{
  752. Config: *config,
  753. Runtimes: rts,
  754. }
  755. d.configStore.Store(cfgStore)
  756. // TEST_INTEGRATION_USE_SNAPSHOTTER is used for integration tests only.
  757. if os.Getenv("TEST_INTEGRATION_USE_SNAPSHOTTER") != "" {
  758. d.usesSnapshotter = true
  759. } else {
  760. d.usesSnapshotter = config.Features["containerd-snapshotter"]
  761. }
  762. // Ensure the daemon is properly shutdown if there is a failure during
  763. // initialization
  764. defer func() {
  765. if err != nil {
  766. // Use a fresh context here. Passed context could be cancelled.
  767. if err := d.Shutdown(context.Background()); err != nil {
  768. log.G(ctx).Error(err)
  769. }
  770. }
  771. }()
  772. if err := d.setGenericResources(&cfgStore.Config); err != nil {
  773. return nil, err
  774. }
  775. // set up SIGUSR1 handler on Unix-like systems, or a Win32 global event
  776. // on Windows to dump Go routine stacks
  777. stackDumpDir := cfgStore.Root
  778. if execRoot := cfgStore.GetExecRoot(); execRoot != "" {
  779. stackDumpDir = execRoot
  780. }
  781. d.setupDumpStackTrap(stackDumpDir)
  782. if err := d.setupSeccompProfile(&cfgStore.Config); err != nil {
  783. return nil, err
  784. }
  785. // Set the default isolation mode (only applicable on Windows)
  786. if err := d.setDefaultIsolation(&cfgStore.Config); err != nil {
  787. return nil, fmt.Errorf("error setting default isolation mode: %v", err)
  788. }
  789. if err := configureMaxThreads(&cfgStore.Config); err != nil {
  790. log.G(ctx).Warnf("Failed to configure golang's threads limit: %v", err)
  791. }
  792. // ensureDefaultAppArmorProfile does nothing if apparmor is disabled
  793. if err := ensureDefaultAppArmorProfile(); err != nil {
  794. log.G(ctx).Errorf(err.Error())
  795. }
  796. daemonRepo := filepath.Join(cfgStore.Root, "containers")
  797. if err := idtools.MkdirAllAndChown(daemonRepo, 0o710, idtools.Identity{
  798. UID: idtools.CurrentIdentity().UID,
  799. GID: rootIDs.GID,
  800. }); err != nil {
  801. return nil, err
  802. }
  803. if isWindows {
  804. // Note that permissions (0o700) are ignored on Windows; passing them to
  805. // show intent only. We could consider using idtools.MkdirAndChown here
  806. // to apply an ACL.
  807. if err = os.Mkdir(filepath.Join(cfgStore.Root, "credentialspecs"), 0o700); err != nil && !errors.Is(err, os.ErrExist) {
  808. return nil, err
  809. }
  810. }
  811. d.registryService = registryService
  812. dlogger.RegisterPluginGetter(d.PluginStore)
  813. metricsSockPath, err := d.listenMetricsSock(&cfgStore.Config)
  814. if err != nil {
  815. return nil, err
  816. }
  817. registerMetricsPluginCallback(d.PluginStore, metricsSockPath)
  818. backoffConfig := backoff.DefaultConfig
  819. backoffConfig.MaxDelay = 3 * time.Second
  820. connParams := grpc.ConnectParams{
  821. Backoff: backoffConfig,
  822. }
  823. gopts := []grpc.DialOption{
  824. // WithBlock makes sure that the following containerd request
  825. // is reliable.
  826. //
  827. // NOTE: In one edge case with high load pressure, kernel kills
  828. // dockerd, containerd and containerd-shims caused by OOM.
  829. // When both dockerd and containerd restart, but containerd
  830. // will take time to recover all the existing containers. Before
  831. // containerd serving, dockerd will failed with gRPC error.
  832. // That bad thing is that restore action will still ignore the
  833. // any non-NotFound errors and returns running state for
  834. // already stopped container. It is unexpected behavior. And
  835. // we need to restart dockerd to make sure that anything is OK.
  836. //
  837. // It is painful. Add WithBlock can prevent the edge case. And
  838. // n common case, the containerd will be serving in shortly.
  839. // It is not harm to add WithBlock for containerd connection.
  840. grpc.WithBlock(),
  841. grpc.WithTransportCredentials(insecure.NewCredentials()),
  842. grpc.WithConnectParams(connParams),
  843. grpc.WithContextDialer(dialer.ContextDialer),
  844. // TODO(stevvooe): We may need to allow configuration of this on the client.
  845. grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(defaults.DefaultMaxRecvMsgSize)),
  846. grpc.WithDefaultCallOptions(grpc.MaxCallSendMsgSize(defaults.DefaultMaxSendMsgSize)),
  847. grpc.WithUnaryInterceptor(otelgrpc.UnaryClientInterceptor()),
  848. grpc.WithStreamInterceptor(otelgrpc.StreamClientInterceptor()),
  849. }
  850. if cfgStore.ContainerdAddr != "" {
  851. d.containerdClient, err = containerd.New(
  852. cfgStore.ContainerdAddr,
  853. containerd.WithDefaultNamespace(cfgStore.ContainerdNamespace),
  854. containerd.WithDialOpts(gopts),
  855. containerd.WithTimeout(60*time.Second),
  856. )
  857. if err != nil {
  858. return nil, errors.Wrapf(err, "failed to dial %q", cfgStore.ContainerdAddr)
  859. }
  860. }
  861. createPluginExec := func(m *plugin.Manager) (plugin.Executor, error) {
  862. var pluginCli *containerd.Client
  863. if cfgStore.ContainerdAddr != "" {
  864. pluginCli, err = containerd.New(
  865. cfgStore.ContainerdAddr,
  866. containerd.WithDefaultNamespace(cfgStore.ContainerdPluginNamespace),
  867. containerd.WithDialOpts(gopts),
  868. containerd.WithTimeout(60*time.Second),
  869. )
  870. if err != nil {
  871. return nil, errors.Wrapf(err, "failed to dial %q", cfgStore.ContainerdAddr)
  872. }
  873. }
  874. var (
  875. shim string
  876. shimOpts interface{}
  877. )
  878. if runtime.GOOS != "windows" {
  879. shim, shimOpts, err = rts.Get("")
  880. if err != nil {
  881. return nil, err
  882. }
  883. }
  884. return pluginexec.New(ctx, getPluginExecRoot(&cfgStore.Config), pluginCli, cfgStore.ContainerdPluginNamespace, m, shim, shimOpts)
  885. }
  886. // Plugin system initialization should happen before restore. Do not change order.
  887. d.pluginManager, err = plugin.NewManager(plugin.ManagerConfig{
  888. Root: filepath.Join(cfgStore.Root, "plugins"),
  889. ExecRoot: getPluginExecRoot(&cfgStore.Config),
  890. Store: d.PluginStore,
  891. CreateExecutor: createPluginExec,
  892. RegistryService: registryService,
  893. LiveRestoreEnabled: cfgStore.LiveRestoreEnabled,
  894. LogPluginEvent: d.LogPluginEvent, // todo: make private
  895. AuthzMiddleware: authzMiddleware,
  896. })
  897. if err != nil {
  898. return nil, errors.Wrap(err, "couldn't create plugin manager")
  899. }
  900. d.defaultLogConfig, err = defaultLogConfig(&cfgStore.Config)
  901. if err != nil {
  902. return nil, errors.Wrap(err, "failed to set log opts")
  903. }
  904. log.G(ctx).Debugf("Using default logging driver %s", d.defaultLogConfig.Type)
  905. d.volumes, err = volumesservice.NewVolumeService(cfgStore.Root, d.PluginStore, rootIDs, d)
  906. if err != nil {
  907. return nil, err
  908. }
  909. // Check if Devices cgroup is mounted, it is hard requirement for container security,
  910. // on Linux.
  911. //
  912. // Important: we call getSysInfo() directly here, without storing the results,
  913. // as networking has not yet been set up, so we only have partial system info
  914. // at this point.
  915. //
  916. // TODO(thaJeztah) add a utility to only collect the CgroupDevicesEnabled information
  917. if runtime.GOOS == "linux" && !userns.RunningInUserNS() && !getSysInfo(&cfgStore.Config).CgroupDevicesEnabled {
  918. return nil, errors.New("Devices cgroup isn't mounted")
  919. }
  920. d.id, err = LoadOrCreateID(cfgStore.Root)
  921. if err != nil {
  922. return nil, err
  923. }
  924. d.repository = daemonRepo
  925. d.containers = container.NewMemoryStore()
  926. if d.containersReplica, err = container.NewViewDB(); err != nil {
  927. return nil, err
  928. }
  929. d.execCommands = container.NewExecStore()
  930. d.statsCollector = d.newStatsCollector(1 * time.Second)
  931. d.EventsService = events.New()
  932. d.root = cfgStore.Root
  933. d.idMapping = idMapping
  934. d.linkIndex = newLinkIndex()
  935. // On Windows we don't support the environment variable, or a user supplied graphdriver
  936. // Unix platforms however run a single graphdriver for all containers, and it can
  937. // be set through an environment variable, a daemon start parameter, or chosen through
  938. // initialization of the layerstore through driver priority order for example.
  939. driverName := os.Getenv("DOCKER_DRIVER")
  940. if isWindows && d.UsesSnapshotter() {
  941. // Containerd WCOW snapshotter
  942. driverName = "windows"
  943. } else if isWindows {
  944. // Docker WCOW graphdriver
  945. driverName = "windowsfilter"
  946. } else if driverName != "" {
  947. log.G(ctx).Infof("Setting the storage driver from the $DOCKER_DRIVER environment variable (%s)", driverName)
  948. } else {
  949. driverName = cfgStore.GraphDriver
  950. }
  951. if d.UsesSnapshotter() {
  952. if os.Getenv("TEST_INTEGRATION_USE_SNAPSHOTTER") != "" {
  953. log.G(ctx).Warn("Enabling containerd snapshotter through the $TEST_INTEGRATION_USE_SNAPSHOTTER environment variable. This should only be used for testing.")
  954. }
  955. log.G(ctx).Info("Starting daemon with containerd snapshotter integration enabled")
  956. // FIXME(thaJeztah): implement automatic snapshotter-selection similar to graph-driver selection; see https://github.com/moby/moby/issues/44076
  957. if driverName == "" {
  958. driverName = containerd.DefaultSnapshotter
  959. }
  960. // Configure and validate the kernels security support. Note this is a Linux/FreeBSD
  961. // operation only, so it is safe to pass *just* the runtime OS graphdriver.
  962. if err := configureKernelSecuritySupport(&cfgStore.Config, driverName); err != nil {
  963. return nil, err
  964. }
  965. d.imageService = ctrd.NewService(ctrd.ImageServiceConfig{
  966. Client: d.containerdClient,
  967. Containers: d.containers,
  968. Snapshotter: driverName,
  969. RegistryHosts: d.RegistryHosts,
  970. Registry: d.registryService,
  971. EventsService: d.EventsService,
  972. IDMapping: idMapping,
  973. RefCountMounter: snapshotter.NewMounter(config.Root, driverName, idMapping),
  974. })
  975. } else {
  976. layerStore, err := layer.NewStoreFromOptions(layer.StoreOptions{
  977. Root: cfgStore.Root,
  978. MetadataStorePathTemplate: filepath.Join(cfgStore.Root, "image", "%s", "layerdb"),
  979. GraphDriver: driverName,
  980. GraphDriverOptions: cfgStore.GraphOptions,
  981. IDMapping: idMapping,
  982. PluginGetter: d.PluginStore,
  983. ExperimentalEnabled: cfgStore.Experimental,
  984. })
  985. if err != nil {
  986. return nil, err
  987. }
  988. // Configure and validate the kernels security support. Note this is a Linux/FreeBSD
  989. // operation only, so it is safe to pass *just* the runtime OS graphdriver.
  990. if err := configureKernelSecuritySupport(&cfgStore.Config, layerStore.DriverName()); err != nil {
  991. return nil, err
  992. }
  993. imageRoot := filepath.Join(cfgStore.Root, "image", layerStore.DriverName())
  994. ifs, err := image.NewFSStoreBackend(filepath.Join(imageRoot, "imagedb"))
  995. if err != nil {
  996. return nil, err
  997. }
  998. // We have a single tag/reference store for the daemon globally. However, it's
  999. // stored under the graphdriver. On host platforms which only support a single
  1000. // container OS, but multiple selectable graphdrivers, this means depending on which
  1001. // graphdriver is chosen, the global reference store is under there. For
  1002. // platforms which support multiple container operating systems, this is slightly
  1003. // more problematic as where does the global ref store get located? Fortunately,
  1004. // for Windows, which is currently the only daemon supporting multiple container
  1005. // operating systems, the list of graphdrivers available isn't user configurable.
  1006. // For backwards compatibility, we just put it under the windowsfilter
  1007. // directory regardless.
  1008. refStoreLocation := filepath.Join(imageRoot, `repositories.json`)
  1009. rs, err := refstore.NewReferenceStore(refStoreLocation)
  1010. if err != nil {
  1011. return nil, fmt.Errorf("Couldn't create reference store repository: %s", err)
  1012. }
  1013. d.ReferenceStore = rs
  1014. imageStore, err := image.NewImageStore(ifs, layerStore)
  1015. if err != nil {
  1016. return nil, err
  1017. }
  1018. distributionMetadataStore, err := dmetadata.NewFSMetadataStore(filepath.Join(imageRoot, "distribution"))
  1019. if err != nil {
  1020. return nil, err
  1021. }
  1022. imgSvcConfig := images.ImageServiceConfig{
  1023. ContainerStore: d.containers,
  1024. DistributionMetadataStore: distributionMetadataStore,
  1025. EventsService: d.EventsService,
  1026. ImageStore: imageStore,
  1027. LayerStore: layerStore,
  1028. MaxConcurrentDownloads: config.MaxConcurrentDownloads,
  1029. MaxConcurrentUploads: config.MaxConcurrentUploads,
  1030. MaxDownloadAttempts: config.MaxDownloadAttempts,
  1031. ReferenceStore: rs,
  1032. RegistryService: registryService,
  1033. ContentNamespace: config.ContainerdNamespace,
  1034. }
  1035. // containerd is not currently supported with Windows.
  1036. // So sometimes d.containerdCli will be nil
  1037. // In that case we'll create a local content store... but otherwise we'll use containerd
  1038. if d.containerdClient != nil {
  1039. imgSvcConfig.Leases = d.containerdClient.LeasesService()
  1040. imgSvcConfig.ContentStore = d.containerdClient.ContentStore()
  1041. } else {
  1042. imgSvcConfig.ContentStore, imgSvcConfig.Leases, err = d.configureLocalContentStore(config.ContainerdNamespace)
  1043. if err != nil {
  1044. return nil, err
  1045. }
  1046. }
  1047. // TODO: imageStore, distributionMetadataStore, and ReferenceStore are only
  1048. // used above to run migration. They could be initialized in ImageService
  1049. // if migration is called from daemon/images. layerStore might move as well.
  1050. d.imageService = images.NewImageService(imgSvcConfig)
  1051. log.G(ctx).Debugf("Max Concurrent Downloads: %d", imgSvcConfig.MaxConcurrentDownloads)
  1052. log.G(ctx).Debugf("Max Concurrent Uploads: %d", imgSvcConfig.MaxConcurrentUploads)
  1053. log.G(ctx).Debugf("Max Download Attempts: %d", imgSvcConfig.MaxDownloadAttempts)
  1054. }
  1055. go d.execCommandGC()
  1056. if err := d.initLibcontainerd(ctx, &cfgStore.Config); err != nil {
  1057. return nil, err
  1058. }
  1059. if err := d.restore(cfgStore); err != nil {
  1060. return nil, err
  1061. }
  1062. close(d.startupDone)
  1063. info, err := d.SystemInfo(ctx)
  1064. if err != nil {
  1065. return nil, err
  1066. }
  1067. for _, w := range info.Warnings {
  1068. log.G(ctx).Warn(w)
  1069. }
  1070. engineInfo.WithValues(
  1071. dockerversion.Version,
  1072. dockerversion.GitCommit,
  1073. info.Architecture,
  1074. info.Driver,
  1075. info.KernelVersion,
  1076. info.OperatingSystem,
  1077. info.OSType,
  1078. info.OSVersion,
  1079. info.ID,
  1080. ).Set(1)
  1081. engineCpus.Set(float64(info.NCPU))
  1082. engineMemory.Set(float64(info.MemTotal))
  1083. log.G(ctx).WithFields(log.Fields{
  1084. "version": dockerversion.Version,
  1085. "commit": dockerversion.GitCommit,
  1086. "storage-driver": d.ImageService().StorageDriver(),
  1087. "containerd-snapshotter": d.UsesSnapshotter(),
  1088. }).Info("Docker daemon")
  1089. return d, nil
  1090. }
  1091. // DistributionServices returns services controlling daemon storage
  1092. func (daemon *Daemon) DistributionServices() images.DistributionServices {
  1093. return daemon.imageService.DistributionServices()
  1094. }
  1095. func (daemon *Daemon) waitForStartupDone() {
  1096. <-daemon.startupDone
  1097. }
  1098. func (daemon *Daemon) shutdownContainer(c *container.Container) error {
  1099. ctx := compatcontext.WithoutCancel(context.TODO())
  1100. // If container failed to exit in stopTimeout seconds of SIGTERM, then using the force
  1101. if err := daemon.containerStop(ctx, c, containertypes.StopOptions{}); err != nil {
  1102. return fmt.Errorf("Failed to stop container %s with error: %v", c.ID, err)
  1103. }
  1104. // Wait without timeout for the container to exit.
  1105. // Ignore the result.
  1106. <-c.Wait(ctx, container.WaitConditionNotRunning)
  1107. return nil
  1108. }
  1109. // ShutdownTimeout returns the timeout (in seconds) before containers are forcibly
  1110. // killed during shutdown. The default timeout can be configured both on the daemon
  1111. // and per container, and the longest timeout will be used. A grace-period of
  1112. // 5 seconds is added to the configured timeout.
  1113. //
  1114. // A negative (-1) timeout means "indefinitely", which means that containers
  1115. // are not forcibly killed, and the daemon shuts down after all containers exit.
  1116. func (daemon *Daemon) ShutdownTimeout() int {
  1117. return daemon.shutdownTimeout(&daemon.config().Config)
  1118. }
  1119. func (daemon *Daemon) shutdownTimeout(cfg *config.Config) int {
  1120. shutdownTimeout := cfg.ShutdownTimeout
  1121. if shutdownTimeout < 0 {
  1122. return -1
  1123. }
  1124. if daemon.containers == nil {
  1125. return shutdownTimeout
  1126. }
  1127. graceTimeout := 5
  1128. for _, c := range daemon.containers.List() {
  1129. stopTimeout := c.StopTimeout()
  1130. if stopTimeout < 0 {
  1131. return -1
  1132. }
  1133. if stopTimeout+graceTimeout > shutdownTimeout {
  1134. shutdownTimeout = stopTimeout + graceTimeout
  1135. }
  1136. }
  1137. return shutdownTimeout
  1138. }
  1139. // Shutdown stops the daemon.
  1140. func (daemon *Daemon) Shutdown(ctx context.Context) error {
  1141. daemon.shutdown = true
  1142. // Keep mounts and networking running on daemon shutdown if
  1143. // we are to keep containers running and restore them.
  1144. cfg := &daemon.config().Config
  1145. if cfg.LiveRestoreEnabled && daemon.containers != nil {
  1146. // check if there are any running containers, if none we should do some cleanup
  1147. if ls, err := daemon.Containers(ctx, &containertypes.ListOptions{}); len(ls) != 0 || err != nil {
  1148. // metrics plugins still need some cleanup
  1149. daemon.cleanupMetricsPlugins()
  1150. return err
  1151. }
  1152. }
  1153. if daemon.containers != nil {
  1154. log.G(ctx).Debugf("daemon configured with a %d seconds minimum shutdown timeout", cfg.ShutdownTimeout)
  1155. log.G(ctx).Debugf("start clean shutdown of all containers with a %d seconds timeout...", daemon.shutdownTimeout(cfg))
  1156. daemon.containers.ApplyAll(func(c *container.Container) {
  1157. if !c.IsRunning() {
  1158. return
  1159. }
  1160. logger := log.G(ctx).WithField("container", c.ID)
  1161. logger.Debug("shutting down container")
  1162. if err := daemon.shutdownContainer(c); err != nil {
  1163. logger.WithError(err).Error("failed to shut down container")
  1164. return
  1165. }
  1166. if mountid, err := daemon.imageService.GetLayerMountID(c.ID); err == nil {
  1167. daemon.cleanupMountsByID(mountid)
  1168. }
  1169. logger.Debugf("shut down container")
  1170. })
  1171. }
  1172. if daemon.volumes != nil {
  1173. if err := daemon.volumes.Shutdown(); err != nil {
  1174. log.G(ctx).Errorf("Error shutting down volume store: %v", err)
  1175. }
  1176. }
  1177. if daemon.imageService != nil {
  1178. if err := daemon.imageService.Cleanup(); err != nil {
  1179. log.G(ctx).Error(err)
  1180. }
  1181. }
  1182. // If we are part of a cluster, clean up cluster's stuff
  1183. if daemon.clusterProvider != nil {
  1184. log.G(ctx).Debugf("start clean shutdown of cluster resources...")
  1185. daemon.DaemonLeavesCluster()
  1186. }
  1187. daemon.cleanupMetricsPlugins()
  1188. // Shutdown plugins after containers and layerstore. Don't change the order.
  1189. daemon.pluginShutdown()
  1190. // trigger libnetwork Stop only if it's initialized
  1191. if daemon.netController != nil {
  1192. daemon.netController.Stop()
  1193. }
  1194. if daemon.containerdClient != nil {
  1195. daemon.containerdClient.Close()
  1196. }
  1197. if daemon.mdDB != nil {
  1198. daemon.mdDB.Close()
  1199. }
  1200. return daemon.cleanupMounts(cfg)
  1201. }
  1202. // Mount sets container.BaseFS
  1203. func (daemon *Daemon) Mount(container *container.Container) error {
  1204. return daemon.imageService.Mount(context.Background(), container)
  1205. }
  1206. // Unmount unsets the container base filesystem
  1207. func (daemon *Daemon) Unmount(container *container.Container) error {
  1208. return daemon.imageService.Unmount(context.Background(), container)
  1209. }
  1210. // Subnets return the IPv4 and IPv6 subnets of networks that are manager by Docker.
  1211. func (daemon *Daemon) Subnets() ([]net.IPNet, []net.IPNet) {
  1212. var v4Subnets []net.IPNet
  1213. var v6Subnets []net.IPNet
  1214. for _, managedNetwork := range daemon.netController.Networks(context.TODO()) {
  1215. v4infos, v6infos := managedNetwork.IpamInfo()
  1216. for _, info := range v4infos {
  1217. if info.IPAMData.Pool != nil {
  1218. v4Subnets = append(v4Subnets, *info.IPAMData.Pool)
  1219. }
  1220. }
  1221. for _, info := range v6infos {
  1222. if info.IPAMData.Pool != nil {
  1223. v6Subnets = append(v6Subnets, *info.IPAMData.Pool)
  1224. }
  1225. }
  1226. }
  1227. return v4Subnets, v6Subnets
  1228. }
  1229. // prepareTempDir prepares and returns the default directory to use
  1230. // for temporary files.
  1231. // If it doesn't exist, it is created. If it exists, its content is removed.
  1232. func prepareTempDir(rootDir string) (string, error) {
  1233. var tmpDir string
  1234. if tmpDir = os.Getenv("DOCKER_TMPDIR"); tmpDir == "" {
  1235. tmpDir = filepath.Join(rootDir, "tmp")
  1236. newName := tmpDir + "-old"
  1237. if err := os.Rename(tmpDir, newName); err == nil {
  1238. go func() {
  1239. if err := os.RemoveAll(newName); err != nil {
  1240. log.G(context.TODO()).Warnf("failed to delete old tmp directory: %s", newName)
  1241. }
  1242. }()
  1243. } else if !os.IsNotExist(err) {
  1244. log.G(context.TODO()).Warnf("failed to rename %s for background deletion: %s. Deleting synchronously", tmpDir, err)
  1245. if err := os.RemoveAll(tmpDir); err != nil {
  1246. log.G(context.TODO()).Warnf("failed to delete old tmp directory: %s", tmpDir)
  1247. }
  1248. }
  1249. }
  1250. return tmpDir, idtools.MkdirAllAndChown(tmpDir, 0o700, idtools.CurrentIdentity())
  1251. }
  1252. func (daemon *Daemon) setGenericResources(conf *config.Config) error {
  1253. genericResources, err := config.ParseGenericResources(conf.NodeGenericResources)
  1254. if err != nil {
  1255. return err
  1256. }
  1257. daemon.genericResources = genericResources
  1258. return nil
  1259. }
  1260. // IsShuttingDown tells whether the daemon is shutting down or not
  1261. func (daemon *Daemon) IsShuttingDown() bool {
  1262. return daemon.shutdown
  1263. }
  1264. func isBridgeNetworkDisabled(conf *config.Config) bool {
  1265. return conf.BridgeConfig.Iface == config.DisableNetworkBridge
  1266. }
  1267. func (daemon *Daemon) networkOptions(conf *config.Config, pg plugingetter.PluginGetter, activeSandboxes map[string]interface{}) ([]nwconfig.Option, error) {
  1268. dd := runconfig.DefaultDaemonNetworkMode()
  1269. options := []nwconfig.Option{
  1270. nwconfig.OptionDataDir(conf.Root),
  1271. nwconfig.OptionExecRoot(conf.GetExecRoot()),
  1272. nwconfig.OptionDefaultDriver(string(dd)),
  1273. nwconfig.OptionDefaultNetwork(dd.NetworkName()),
  1274. nwconfig.OptionLabels(conf.Labels),
  1275. nwconfig.OptionNetworkControlPlaneMTU(conf.NetworkControlPlaneMTU),
  1276. driverOptions(conf),
  1277. }
  1278. if len(conf.NetworkConfig.DefaultAddressPools.Value()) > 0 {
  1279. options = append(options, nwconfig.OptionDefaultAddressPoolConfig(conf.NetworkConfig.DefaultAddressPools.Value()))
  1280. }
  1281. if conf.LiveRestoreEnabled && len(activeSandboxes) != 0 {
  1282. options = append(options, nwconfig.OptionActiveSandboxes(activeSandboxes))
  1283. }
  1284. if pg != nil {
  1285. options = append(options, nwconfig.OptionPluginGetter(pg))
  1286. }
  1287. return options, nil
  1288. }
  1289. // GetCluster returns the cluster
  1290. func (daemon *Daemon) GetCluster() Cluster {
  1291. return daemon.cluster
  1292. }
  1293. // SetCluster sets the cluster
  1294. func (daemon *Daemon) SetCluster(cluster Cluster) {
  1295. daemon.cluster = cluster
  1296. }
  1297. func (daemon *Daemon) pluginShutdown() {
  1298. manager := daemon.pluginManager
  1299. // Check for a valid manager object. In error conditions, daemon init can fail
  1300. // and shutdown called, before plugin manager is initialized.
  1301. if manager != nil {
  1302. manager.Shutdown()
  1303. }
  1304. }
  1305. // PluginManager returns current pluginManager associated with the daemon
  1306. func (daemon *Daemon) PluginManager() *plugin.Manager { // set up before daemon to avoid this method
  1307. return daemon.pluginManager
  1308. }
  1309. // PluginGetter returns current pluginStore associated with the daemon
  1310. func (daemon *Daemon) PluginGetter() *plugin.Store {
  1311. return daemon.PluginStore
  1312. }
  1313. // CreateDaemonRoot creates the root for the daemon
  1314. func CreateDaemonRoot(config *config.Config) error {
  1315. // get the canonical path to the Docker root directory
  1316. var realRoot string
  1317. if _, err := os.Stat(config.Root); err != nil && os.IsNotExist(err) {
  1318. realRoot = config.Root
  1319. } else {
  1320. realRoot, err = fileutils.ReadSymlinkedDirectory(config.Root)
  1321. if err != nil {
  1322. return fmt.Errorf("Unable to get the full path to root (%s): %s", config.Root, err)
  1323. }
  1324. }
  1325. idMapping, err := setupRemappedRoot(config)
  1326. if err != nil {
  1327. return err
  1328. }
  1329. return setupDaemonRoot(config, realRoot, idMapping.RootPair())
  1330. }
  1331. // checkpointAndSave grabs a container lock to safely call container.CheckpointTo
  1332. func (daemon *Daemon) checkpointAndSave(container *container.Container) error {
  1333. container.Lock()
  1334. defer container.Unlock()
  1335. if err := container.CheckpointTo(daemon.containersReplica); err != nil {
  1336. return fmt.Errorf("Error saving container state: %v", err)
  1337. }
  1338. return nil
  1339. }
  1340. // because the CLI sends a -1 when it wants to unset the swappiness value
  1341. // we need to clear it on the server side
  1342. func fixMemorySwappiness(resources *containertypes.Resources) {
  1343. if resources.MemorySwappiness != nil && *resources.MemorySwappiness == -1 {
  1344. resources.MemorySwappiness = nil
  1345. }
  1346. }
  1347. // GetAttachmentStore returns current attachment store associated with the daemon
  1348. func (daemon *Daemon) GetAttachmentStore() *network.AttachmentStore {
  1349. return &daemon.attachmentStore
  1350. }
  1351. // IdentityMapping returns uid/gid mapping or a SID (in the case of Windows) for the builder
  1352. func (daemon *Daemon) IdentityMapping() idtools.IdentityMapping {
  1353. return daemon.idMapping
  1354. }
  1355. // ImageService returns the Daemon's ImageService
  1356. func (daemon *Daemon) ImageService() ImageService {
  1357. return daemon.imageService
  1358. }
  1359. // ImageBackend returns an image-backend for Swarm and the distribution router.
  1360. func (daemon *Daemon) ImageBackend() executorpkg.ImageBackend {
  1361. return &imageBackend{
  1362. ImageService: daemon.imageService,
  1363. registryService: daemon.registryService,
  1364. }
  1365. }
  1366. // RegistryService returns the Daemon's RegistryService
  1367. func (daemon *Daemon) RegistryService() *registry.Service {
  1368. return daemon.registryService
  1369. }
  1370. // BuilderBackend returns the backend used by builder
  1371. func (daemon *Daemon) BuilderBackend() builder.Backend {
  1372. return struct {
  1373. *Daemon
  1374. ImageService
  1375. }{daemon, daemon.imageService}
  1376. }
  1377. // RawSysInfo returns *sysinfo.SysInfo .
  1378. func (daemon *Daemon) RawSysInfo() *sysinfo.SysInfo {
  1379. daemon.sysInfoOnce.Do(func() {
  1380. // We check if sysInfo is not set here, to allow some test to
  1381. // override the actual sysInfo.
  1382. if daemon.sysInfo == nil {
  1383. daemon.sysInfo = getSysInfo(&daemon.config().Config)
  1384. }
  1385. })
  1386. return daemon.sysInfo
  1387. }
  1388. // imageBackend is used to satisfy the [executorpkg.ImageBackend] and
  1389. // [github.com/docker/docker/api/server/router/distribution.Backend]
  1390. // interfaces.
  1391. type imageBackend struct {
  1392. ImageService
  1393. registryService *registry.Service
  1394. }
  1395. // GetRepositories returns a list of repositories configured for the given
  1396. // reference. Multiple repositories can be returned if the reference is for
  1397. // the default (Docker Hub) registry and a mirror is configured, but it omits
  1398. // registries that were not reachable (pinging the /v2/ endpoint failed).
  1399. //
  1400. // It returns an error if it was unable to reach any of the registries for
  1401. // the given reference, or if the provided reference is invalid.
  1402. func (i *imageBackend) GetRepositories(ctx context.Context, ref reference.Named, authConfig *registrytypes.AuthConfig) ([]dist.Repository, error) {
  1403. return distribution.GetRepositories(ctx, ref, &distribution.ImagePullConfig{
  1404. Config: distribution.Config{
  1405. AuthConfig: authConfig,
  1406. RegistryService: i.registryService,
  1407. },
  1408. })
  1409. }