client_daemon.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894
  1. // +build !windows
  2. package libcontainerd // import "github.com/docker/docker/libcontainerd"
  3. import (
  4. "context"
  5. "encoding/json"
  6. "fmt"
  7. "io"
  8. "os"
  9. "path/filepath"
  10. "reflect"
  11. "runtime"
  12. "strings"
  13. "sync"
  14. "syscall"
  15. "time"
  16. "github.com/containerd/containerd"
  17. apievents "github.com/containerd/containerd/api/events"
  18. "github.com/containerd/containerd/api/types"
  19. "github.com/containerd/containerd/archive"
  20. "github.com/containerd/containerd/cio"
  21. "github.com/containerd/containerd/content"
  22. containerderrors "github.com/containerd/containerd/errdefs"
  23. "github.com/containerd/containerd/events"
  24. "github.com/containerd/containerd/images"
  25. "github.com/containerd/containerd/runtime/linux/runctypes"
  26. "github.com/containerd/typeurl"
  27. "github.com/docker/docker/errdefs"
  28. "github.com/docker/docker/pkg/ioutils"
  29. "github.com/opencontainers/image-spec/specs-go/v1"
  30. specs "github.com/opencontainers/runtime-spec/specs-go"
  31. "github.com/pkg/errors"
  32. "github.com/sirupsen/logrus"
  33. "google.golang.org/grpc/codes"
  34. "google.golang.org/grpc/status"
  35. )
  36. // InitProcessName is the name given to the first process of a
  37. // container
  38. const InitProcessName = "init"
  39. type container struct {
  40. mu sync.Mutex
  41. bundleDir string
  42. ctr containerd.Container
  43. task containerd.Task
  44. execs map[string]containerd.Process
  45. oomKilled bool
  46. }
  47. func (c *container) setTask(t containerd.Task) {
  48. c.mu.Lock()
  49. c.task = t
  50. c.mu.Unlock()
  51. }
  52. func (c *container) getTask() containerd.Task {
  53. c.mu.Lock()
  54. t := c.task
  55. c.mu.Unlock()
  56. return t
  57. }
  58. func (c *container) addProcess(id string, p containerd.Process) {
  59. c.mu.Lock()
  60. if c.execs == nil {
  61. c.execs = make(map[string]containerd.Process)
  62. }
  63. c.execs[id] = p
  64. c.mu.Unlock()
  65. }
  66. func (c *container) deleteProcess(id string) {
  67. c.mu.Lock()
  68. delete(c.execs, id)
  69. c.mu.Unlock()
  70. }
  71. func (c *container) getProcess(id string) containerd.Process {
  72. c.mu.Lock()
  73. p := c.execs[id]
  74. c.mu.Unlock()
  75. return p
  76. }
  77. func (c *container) setOOMKilled(killed bool) {
  78. c.mu.Lock()
  79. c.oomKilled = killed
  80. c.mu.Unlock()
  81. }
  82. func (c *container) getOOMKilled() bool {
  83. c.mu.Lock()
  84. killed := c.oomKilled
  85. c.mu.Unlock()
  86. return killed
  87. }
  88. type client struct {
  89. sync.RWMutex // protects containers map
  90. remote *containerd.Client
  91. stateDir string
  92. logger *logrus.Entry
  93. namespace string
  94. backend Backend
  95. eventQ queue
  96. containers map[string]*container
  97. }
  98. func (c *client) reconnect() error {
  99. c.Lock()
  100. err := c.remote.Reconnect()
  101. c.Unlock()
  102. return err
  103. }
  104. func (c *client) setRemote(remote *containerd.Client) {
  105. c.Lock()
  106. c.remote = remote
  107. c.Unlock()
  108. }
  109. func (c *client) getRemote() *containerd.Client {
  110. c.RLock()
  111. remote := c.remote
  112. c.RUnlock()
  113. return remote
  114. }
  115. func (c *client) Version(ctx context.Context) (containerd.Version, error) {
  116. return c.getRemote().Version(ctx)
  117. }
  118. // Restore loads the containerd container.
  119. // It should not be called concurrently with any other operation for the given ID.
  120. func (c *client) Restore(ctx context.Context, id string, attachStdio StdioCallback) (alive bool, pid int, err error) {
  121. c.Lock()
  122. _, ok := c.containers[id]
  123. if ok {
  124. c.Unlock()
  125. return false, 0, errors.WithStack(newConflictError("id already in use"))
  126. }
  127. cntr := &container{}
  128. c.containers[id] = cntr
  129. cntr.mu.Lock()
  130. defer cntr.mu.Unlock()
  131. c.Unlock()
  132. defer func() {
  133. if err != nil {
  134. c.Lock()
  135. delete(c.containers, id)
  136. c.Unlock()
  137. }
  138. }()
  139. var dio *cio.DirectIO
  140. defer func() {
  141. if err != nil && dio != nil {
  142. dio.Cancel()
  143. dio.Close()
  144. }
  145. err = wrapError(err)
  146. }()
  147. ctr, err := c.getRemote().LoadContainer(ctx, id)
  148. if err != nil {
  149. return false, -1, errors.WithStack(wrapError(err))
  150. }
  151. attachIO := func(fifos *cio.FIFOSet) (cio.IO, error) {
  152. // dio must be assigned to the previously defined dio for the defer above
  153. // to handle cleanup
  154. dio, err = cio.NewDirectIO(ctx, fifos)
  155. if err != nil {
  156. return nil, err
  157. }
  158. return attachStdio(dio)
  159. }
  160. t, err := ctr.Task(ctx, attachIO)
  161. if err != nil && !containerderrors.IsNotFound(err) {
  162. return false, -1, errors.Wrap(wrapError(err), "error getting containerd task for container")
  163. }
  164. if t != nil {
  165. s, err := t.Status(ctx)
  166. if err != nil {
  167. return false, -1, errors.Wrap(wrapError(err), "error getting task status")
  168. }
  169. alive = s.Status != containerd.Stopped
  170. pid = int(t.Pid())
  171. }
  172. cntr.bundleDir = filepath.Join(c.stateDir, id)
  173. cntr.ctr = ctr
  174. cntr.task = t
  175. // TODO(mlaventure): load execs
  176. c.logger.WithFields(logrus.Fields{
  177. "container": id,
  178. "alive": alive,
  179. "pid": pid,
  180. }).Debug("restored container")
  181. return alive, pid, nil
  182. }
  183. func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, runtimeOptions interface{}) error {
  184. if ctr := c.getContainer(id); ctr != nil {
  185. return errors.WithStack(newConflictError("id already in use"))
  186. }
  187. bdir, err := prepareBundleDir(filepath.Join(c.stateDir, id), ociSpec)
  188. if err != nil {
  189. return errdefs.System(errors.Wrap(err, "prepare bundle dir failed"))
  190. }
  191. c.logger.WithField("bundle", bdir).WithField("root", ociSpec.Root.Path).Debug("bundle dir created")
  192. cdCtr, err := c.getRemote().NewContainer(ctx, id,
  193. containerd.WithSpec(ociSpec),
  194. // TODO(mlaventure): when containerd support lcow, revisit runtime value
  195. containerd.WithRuntime(fmt.Sprintf("io.containerd.runtime.v1.%s", runtime.GOOS), runtimeOptions))
  196. if err != nil {
  197. return wrapError(err)
  198. }
  199. c.Lock()
  200. c.containers[id] = &container{
  201. bundleDir: bdir,
  202. ctr: cdCtr,
  203. }
  204. c.Unlock()
  205. return nil
  206. }
  207. // Start create and start a task for the specified containerd id
  208. func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin bool, attachStdio StdioCallback) (int, error) {
  209. ctr := c.getContainer(id)
  210. if ctr == nil {
  211. return -1, errors.WithStack(newNotFoundError("no such container"))
  212. }
  213. if t := ctr.getTask(); t != nil {
  214. return -1, errors.WithStack(newConflictError("container already started"))
  215. }
  216. var (
  217. cp *types.Descriptor
  218. t containerd.Task
  219. rio cio.IO
  220. err error
  221. stdinCloseSync = make(chan struct{})
  222. )
  223. if checkpointDir != "" {
  224. // write checkpoint to the content store
  225. tar := archive.Diff(ctx, "", checkpointDir)
  226. cp, err = c.writeContent(ctx, images.MediaTypeContainerd1Checkpoint, checkpointDir, tar)
  227. // remove the checkpoint when we're done
  228. defer func() {
  229. if cp != nil {
  230. err := c.getRemote().ContentStore().Delete(context.Background(), cp.Digest)
  231. if err != nil {
  232. c.logger.WithError(err).WithFields(logrus.Fields{
  233. "ref": checkpointDir,
  234. "digest": cp.Digest,
  235. }).Warnf("failed to delete temporary checkpoint entry")
  236. }
  237. }
  238. }()
  239. if err := tar.Close(); err != nil {
  240. return -1, errors.Wrap(err, "failed to close checkpoint tar stream")
  241. }
  242. if err != nil {
  243. return -1, errors.Wrapf(err, "failed to upload checkpoint to containerd")
  244. }
  245. }
  246. spec, err := ctr.ctr.Spec(ctx)
  247. if err != nil {
  248. return -1, errors.Wrap(err, "failed to retrieve spec")
  249. }
  250. uid, gid := getSpecUser(spec)
  251. t, err = ctr.ctr.NewTask(ctx,
  252. func(id string) (cio.IO, error) {
  253. fifos := newFIFOSet(ctr.bundleDir, InitProcessName, withStdin, spec.Process.Terminal)
  254. rio, err = c.createIO(fifos, id, InitProcessName, stdinCloseSync, attachStdio)
  255. return rio, err
  256. },
  257. func(_ context.Context, _ *containerd.Client, info *containerd.TaskInfo) error {
  258. info.Checkpoint = cp
  259. info.Options = &runctypes.CreateOptions{
  260. IoUid: uint32(uid),
  261. IoGid: uint32(gid),
  262. NoPivotRoot: os.Getenv("DOCKER_RAMDISK") != "",
  263. }
  264. return nil
  265. })
  266. if err != nil {
  267. close(stdinCloseSync)
  268. if rio != nil {
  269. rio.Cancel()
  270. rio.Close()
  271. }
  272. return -1, wrapError(err)
  273. }
  274. ctr.setTask(t)
  275. // Signal c.createIO that it can call CloseIO
  276. close(stdinCloseSync)
  277. if err := t.Start(ctx); err != nil {
  278. if _, err := t.Delete(ctx); err != nil {
  279. c.logger.WithError(err).WithField("container", id).
  280. Error("failed to delete task after fail start")
  281. }
  282. ctr.setTask(nil)
  283. return -1, wrapError(err)
  284. }
  285. return int(t.Pid()), nil
  286. }
  287. func (c *client) Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio StdioCallback) (int, error) {
  288. ctr := c.getContainer(containerID)
  289. if ctr == nil {
  290. return -1, errors.WithStack(newNotFoundError("no such container"))
  291. }
  292. t := ctr.getTask()
  293. if t == nil {
  294. return -1, errors.WithStack(newInvalidParameterError("container is not running"))
  295. }
  296. if p := ctr.getProcess(processID); p != nil {
  297. return -1, errors.WithStack(newConflictError("id already in use"))
  298. }
  299. var (
  300. p containerd.Process
  301. rio cio.IO
  302. err error
  303. stdinCloseSync = make(chan struct{})
  304. )
  305. fifos := newFIFOSet(ctr.bundleDir, processID, withStdin, spec.Terminal)
  306. defer func() {
  307. if err != nil {
  308. if rio != nil {
  309. rio.Cancel()
  310. rio.Close()
  311. }
  312. }
  313. }()
  314. p, err = t.Exec(ctx, processID, spec, func(id string) (cio.IO, error) {
  315. rio, err = c.createIO(fifos, containerID, processID, stdinCloseSync, attachStdio)
  316. return rio, err
  317. })
  318. if err != nil {
  319. close(stdinCloseSync)
  320. return -1, wrapError(err)
  321. }
  322. ctr.addProcess(processID, p)
  323. // Signal c.createIO that it can call CloseIO
  324. close(stdinCloseSync)
  325. if err = p.Start(ctx); err != nil {
  326. p.Delete(context.Background())
  327. ctr.deleteProcess(processID)
  328. return -1, wrapError(err)
  329. }
  330. return int(p.Pid()), nil
  331. }
  332. func (c *client) SignalProcess(ctx context.Context, containerID, processID string, signal int) error {
  333. p, err := c.getProcess(containerID, processID)
  334. if err != nil {
  335. return err
  336. }
  337. return wrapError(p.Kill(ctx, syscall.Signal(signal)))
  338. }
  339. func (c *client) ResizeTerminal(ctx context.Context, containerID, processID string, width, height int) error {
  340. p, err := c.getProcess(containerID, processID)
  341. if err != nil {
  342. return err
  343. }
  344. return p.Resize(ctx, uint32(width), uint32(height))
  345. }
  346. func (c *client) CloseStdin(ctx context.Context, containerID, processID string) error {
  347. p, err := c.getProcess(containerID, processID)
  348. if err != nil {
  349. return err
  350. }
  351. return p.CloseIO(ctx, containerd.WithStdinCloser)
  352. }
  353. func (c *client) Pause(ctx context.Context, containerID string) error {
  354. p, err := c.getProcess(containerID, InitProcessName)
  355. if err != nil {
  356. return err
  357. }
  358. return wrapError(p.(containerd.Task).Pause(ctx))
  359. }
  360. func (c *client) Resume(ctx context.Context, containerID string) error {
  361. p, err := c.getProcess(containerID, InitProcessName)
  362. if err != nil {
  363. return err
  364. }
  365. return p.(containerd.Task).Resume(ctx)
  366. }
  367. func (c *client) Stats(ctx context.Context, containerID string) (*Stats, error) {
  368. p, err := c.getProcess(containerID, InitProcessName)
  369. if err != nil {
  370. return nil, err
  371. }
  372. m, err := p.(containerd.Task).Metrics(ctx)
  373. if err != nil {
  374. return nil, err
  375. }
  376. v, err := typeurl.UnmarshalAny(m.Data)
  377. if err != nil {
  378. return nil, err
  379. }
  380. return interfaceToStats(m.Timestamp, v), nil
  381. }
  382. func (c *client) ListPids(ctx context.Context, containerID string) ([]uint32, error) {
  383. p, err := c.getProcess(containerID, InitProcessName)
  384. if err != nil {
  385. return nil, err
  386. }
  387. pis, err := p.(containerd.Task).Pids(ctx)
  388. if err != nil {
  389. return nil, err
  390. }
  391. var pids []uint32
  392. for _, i := range pis {
  393. pids = append(pids, i.Pid)
  394. }
  395. return pids, nil
  396. }
  397. func (c *client) Summary(ctx context.Context, containerID string) ([]Summary, error) {
  398. p, err := c.getProcess(containerID, InitProcessName)
  399. if err != nil {
  400. return nil, err
  401. }
  402. pis, err := p.(containerd.Task).Pids(ctx)
  403. if err != nil {
  404. return nil, err
  405. }
  406. var infos []Summary
  407. for _, pi := range pis {
  408. i, err := typeurl.UnmarshalAny(pi.Info)
  409. if err != nil {
  410. return nil, errors.Wrap(err, "unable to decode process details")
  411. }
  412. s, err := summaryFromInterface(i)
  413. if err != nil {
  414. return nil, err
  415. }
  416. infos = append(infos, *s)
  417. }
  418. return infos, nil
  419. }
  420. func (c *client) DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error) {
  421. p, err := c.getProcess(containerID, InitProcessName)
  422. if err != nil {
  423. return 255, time.Now(), nil
  424. }
  425. status, err := p.(containerd.Task).Delete(ctx)
  426. if err != nil {
  427. return 255, time.Now(), nil
  428. }
  429. if ctr := c.getContainer(containerID); ctr != nil {
  430. ctr.setTask(nil)
  431. }
  432. return status.ExitCode(), status.ExitTime(), nil
  433. }
  434. func (c *client) Delete(ctx context.Context, containerID string) error {
  435. ctr := c.getContainer(containerID)
  436. if ctr == nil {
  437. return errors.WithStack(newNotFoundError("no such container"))
  438. }
  439. if err := ctr.ctr.Delete(ctx); err != nil {
  440. return wrapError(err)
  441. }
  442. if os.Getenv("LIBCONTAINERD_NOCLEAN") != "1" {
  443. if err := os.RemoveAll(ctr.bundleDir); err != nil {
  444. c.logger.WithError(err).WithFields(logrus.Fields{
  445. "container": containerID,
  446. "bundle": ctr.bundleDir,
  447. }).Error("failed to remove state dir")
  448. }
  449. }
  450. c.removeContainer(containerID)
  451. return nil
  452. }
  453. func (c *client) Status(ctx context.Context, containerID string) (Status, error) {
  454. ctr := c.getContainer(containerID)
  455. if ctr == nil {
  456. return StatusUnknown, errors.WithStack(newNotFoundError("no such container"))
  457. }
  458. t := ctr.getTask()
  459. if t == nil {
  460. return StatusUnknown, errors.WithStack(newNotFoundError("no such task"))
  461. }
  462. s, err := t.Status(ctx)
  463. if err != nil {
  464. return StatusUnknown, wrapError(err)
  465. }
  466. return Status(s.Status), nil
  467. }
  468. func (c *client) CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error {
  469. p, err := c.getProcess(containerID, InitProcessName)
  470. if err != nil {
  471. return err
  472. }
  473. img, err := p.(containerd.Task).Checkpoint(ctx)
  474. if err != nil {
  475. return wrapError(err)
  476. }
  477. // Whatever happens, delete the checkpoint from containerd
  478. defer func() {
  479. err := c.getRemote().ImageService().Delete(context.Background(), img.Name())
  480. if err != nil {
  481. c.logger.WithError(err).WithField("digest", img.Target().Digest).
  482. Warnf("failed to delete checkpoint image")
  483. }
  484. }()
  485. b, err := content.ReadBlob(ctx, c.getRemote().ContentStore(), img.Target())
  486. if err != nil {
  487. return errdefs.System(errors.Wrapf(err, "failed to retrieve checkpoint data"))
  488. }
  489. var index v1.Index
  490. if err := json.Unmarshal(b, &index); err != nil {
  491. return errdefs.System(errors.Wrapf(err, "failed to decode checkpoint data"))
  492. }
  493. var cpDesc *v1.Descriptor
  494. for _, m := range index.Manifests {
  495. if m.MediaType == images.MediaTypeContainerd1Checkpoint {
  496. cpDesc = &m
  497. break
  498. }
  499. }
  500. if cpDesc == nil {
  501. return errdefs.System(errors.Wrapf(err, "invalid checkpoint"))
  502. }
  503. rat, err := c.getRemote().ContentStore().ReaderAt(ctx, *cpDesc)
  504. if err != nil {
  505. return errdefs.System(errors.Wrapf(err, "failed to get checkpoint reader"))
  506. }
  507. defer rat.Close()
  508. _, err = archive.Apply(ctx, checkpointDir, content.NewReader(rat))
  509. if err != nil {
  510. return errdefs.System(errors.Wrapf(err, "failed to read checkpoint reader"))
  511. }
  512. return err
  513. }
  514. func (c *client) getContainer(id string) *container {
  515. c.RLock()
  516. ctr := c.containers[id]
  517. c.RUnlock()
  518. return ctr
  519. }
  520. func (c *client) removeContainer(id string) {
  521. c.Lock()
  522. delete(c.containers, id)
  523. c.Unlock()
  524. }
  525. func (c *client) getProcess(containerID, processID string) (containerd.Process, error) {
  526. ctr := c.getContainer(containerID)
  527. if ctr == nil {
  528. return nil, errors.WithStack(newNotFoundError("no such container"))
  529. }
  530. t := ctr.getTask()
  531. if t == nil {
  532. return nil, errors.WithStack(newNotFoundError("container is not running"))
  533. }
  534. if processID == InitProcessName {
  535. return t, nil
  536. }
  537. p := ctr.getProcess(processID)
  538. if p == nil {
  539. return nil, errors.WithStack(newNotFoundError("no such exec"))
  540. }
  541. return p, nil
  542. }
  543. // createIO creates the io to be used by a process
  544. // This needs to get a pointer to interface as upon closure the process may not have yet been registered
  545. func (c *client) createIO(fifos *cio.FIFOSet, containerID, processID string, stdinCloseSync chan struct{}, attachStdio StdioCallback) (cio.IO, error) {
  546. var (
  547. io *cio.DirectIO
  548. err error
  549. )
  550. io, err = cio.NewDirectIO(context.Background(), fifos)
  551. if err != nil {
  552. return nil, err
  553. }
  554. if io.Stdin != nil {
  555. var (
  556. err error
  557. stdinOnce sync.Once
  558. )
  559. pipe := io.Stdin
  560. io.Stdin = ioutils.NewWriteCloserWrapper(pipe, func() error {
  561. stdinOnce.Do(func() {
  562. err = pipe.Close()
  563. // Do the rest in a new routine to avoid a deadlock if the
  564. // Exec/Start call failed.
  565. go func() {
  566. <-stdinCloseSync
  567. p, err := c.getProcess(containerID, processID)
  568. if err == nil {
  569. err = p.CloseIO(context.Background(), containerd.WithStdinCloser)
  570. if err != nil && strings.Contains(err.Error(), "transport is closing") {
  571. err = nil
  572. }
  573. }
  574. }()
  575. })
  576. return err
  577. })
  578. }
  579. rio, err := attachStdio(io)
  580. if err != nil {
  581. io.Cancel()
  582. io.Close()
  583. }
  584. return rio, err
  585. }
  586. func (c *client) processEvent(ctr *container, et EventType, ei EventInfo) {
  587. c.eventQ.append(ei.ContainerID, func() {
  588. err := c.backend.ProcessEvent(ei.ContainerID, et, ei)
  589. if err != nil {
  590. c.logger.WithError(err).WithFields(logrus.Fields{
  591. "container": ei.ContainerID,
  592. "event": et,
  593. "event-info": ei,
  594. }).Error("failed to process event")
  595. }
  596. if et == EventExit && ei.ProcessID != ei.ContainerID {
  597. p := ctr.getProcess(ei.ProcessID)
  598. if p == nil {
  599. c.logger.WithError(errors.New("no such process")).
  600. WithFields(logrus.Fields{
  601. "container": ei.ContainerID,
  602. "process": ei.ProcessID,
  603. }).Error("exit event")
  604. return
  605. }
  606. _, err = p.Delete(context.Background())
  607. if err != nil {
  608. c.logger.WithError(err).WithFields(logrus.Fields{
  609. "container": ei.ContainerID,
  610. "process": ei.ProcessID,
  611. }).Warn("failed to delete process")
  612. }
  613. ctr.deleteProcess(ei.ProcessID)
  614. ctr := c.getContainer(ei.ContainerID)
  615. if ctr == nil {
  616. c.logger.WithFields(logrus.Fields{
  617. "container": ei.ContainerID,
  618. }).Error("failed to find container")
  619. } else {
  620. newFIFOSet(ctr.bundleDir, ei.ProcessID, true, false).Close()
  621. }
  622. }
  623. })
  624. }
  625. func (c *client) processEventStream(ctx context.Context) {
  626. var (
  627. err error
  628. ev *events.Envelope
  629. et EventType
  630. ei EventInfo
  631. ctr *container
  632. )
  633. // Filter on both namespace *and* topic. To create an "and" filter,
  634. // this must be a single, comma-separated string
  635. eventStream, errC := c.getRemote().EventService().Subscribe(ctx, "namespace=="+c.namespace+",topic~=|^/tasks/|")
  636. c.logger.WithField("namespace", c.namespace).Debug("processing event stream")
  637. var oomKilled bool
  638. for {
  639. select {
  640. case err = <-errC:
  641. if err != nil {
  642. errStatus, ok := status.FromError(err)
  643. if !ok || errStatus.Code() != codes.Canceled {
  644. c.logger.WithError(err).Error("failed to get event")
  645. go c.processEventStream(ctx)
  646. } else {
  647. c.logger.WithError(ctx.Err()).Info("stopping event stream following graceful shutdown")
  648. }
  649. }
  650. return
  651. case ev = <-eventStream:
  652. if ev.Event == nil {
  653. c.logger.WithField("event", ev).Warn("invalid event")
  654. continue
  655. }
  656. v, err := typeurl.UnmarshalAny(ev.Event)
  657. if err != nil {
  658. c.logger.WithError(err).WithField("event", ev).Warn("failed to unmarshal event")
  659. continue
  660. }
  661. c.logger.WithField("topic", ev.Topic).Debug("event")
  662. switch t := v.(type) {
  663. case *apievents.TaskCreate:
  664. et = EventCreate
  665. ei = EventInfo{
  666. ContainerID: t.ContainerID,
  667. ProcessID: t.ContainerID,
  668. Pid: t.Pid,
  669. }
  670. case *apievents.TaskStart:
  671. et = EventStart
  672. ei = EventInfo{
  673. ContainerID: t.ContainerID,
  674. ProcessID: t.ContainerID,
  675. Pid: t.Pid,
  676. }
  677. case *apievents.TaskExit:
  678. et = EventExit
  679. ei = EventInfo{
  680. ContainerID: t.ContainerID,
  681. ProcessID: t.ID,
  682. Pid: t.Pid,
  683. ExitCode: t.ExitStatus,
  684. ExitedAt: t.ExitedAt,
  685. }
  686. case *apievents.TaskOOM:
  687. et = EventOOM
  688. ei = EventInfo{
  689. ContainerID: t.ContainerID,
  690. OOMKilled: true,
  691. }
  692. oomKilled = true
  693. case *apievents.TaskExecAdded:
  694. et = EventExecAdded
  695. ei = EventInfo{
  696. ContainerID: t.ContainerID,
  697. ProcessID: t.ExecID,
  698. }
  699. case *apievents.TaskExecStarted:
  700. et = EventExecStarted
  701. ei = EventInfo{
  702. ContainerID: t.ContainerID,
  703. ProcessID: t.ExecID,
  704. Pid: t.Pid,
  705. }
  706. case *apievents.TaskPaused:
  707. et = EventPaused
  708. ei = EventInfo{
  709. ContainerID: t.ContainerID,
  710. }
  711. case *apievents.TaskResumed:
  712. et = EventResumed
  713. ei = EventInfo{
  714. ContainerID: t.ContainerID,
  715. }
  716. default:
  717. c.logger.WithFields(logrus.Fields{
  718. "topic": ev.Topic,
  719. "type": reflect.TypeOf(t)},
  720. ).Info("ignoring event")
  721. continue
  722. }
  723. ctr = c.getContainer(ei.ContainerID)
  724. if ctr == nil {
  725. c.logger.WithField("container", ei.ContainerID).Warn("unknown container")
  726. continue
  727. }
  728. if oomKilled {
  729. ctr.setOOMKilled(true)
  730. oomKilled = false
  731. }
  732. ei.OOMKilled = ctr.getOOMKilled()
  733. c.processEvent(ctr, et, ei)
  734. }
  735. }
  736. }
  737. func (c *client) writeContent(ctx context.Context, mediaType, ref string, r io.Reader) (*types.Descriptor, error) {
  738. writer, err := c.getRemote().ContentStore().Writer(ctx, content.WithRef(ref))
  739. if err != nil {
  740. return nil, err
  741. }
  742. defer writer.Close()
  743. size, err := io.Copy(writer, r)
  744. if err != nil {
  745. return nil, err
  746. }
  747. labels := map[string]string{
  748. "containerd.io/gc.root": time.Now().UTC().Format(time.RFC3339),
  749. }
  750. if err := writer.Commit(ctx, 0, "", content.WithLabels(labels)); err != nil {
  751. return nil, err
  752. }
  753. return &types.Descriptor{
  754. MediaType: mediaType,
  755. Digest: writer.Digest(),
  756. Size_: size,
  757. }, nil
  758. }
  759. func wrapError(err error) error {
  760. switch {
  761. case err == nil:
  762. return nil
  763. case containerderrors.IsNotFound(err):
  764. return errdefs.NotFound(err)
  765. }
  766. msg := err.Error()
  767. for _, s := range []string{"container does not exist", "not found", "no such container"} {
  768. if strings.Contains(msg, s) {
  769. return errdefs.NotFound(err)
  770. }
  771. }
  772. return err
  773. }