client_daemon.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904
  1. // +build !windows
  2. package libcontainerd // import "github.com/docker/docker/libcontainerd"
  3. import (
  4. "context"
  5. "encoding/json"
  6. "fmt"
  7. "io"
  8. "os"
  9. "path/filepath"
  10. "reflect"
  11. "runtime"
  12. "strings"
  13. "sync"
  14. "syscall"
  15. "time"
  16. "github.com/containerd/containerd"
  17. apievents "github.com/containerd/containerd/api/events"
  18. "github.com/containerd/containerd/api/types"
  19. "github.com/containerd/containerd/archive"
  20. "github.com/containerd/containerd/cio"
  21. "github.com/containerd/containerd/content"
  22. containerderrors "github.com/containerd/containerd/errdefs"
  23. "github.com/containerd/containerd/events"
  24. "github.com/containerd/containerd/images"
  25. "github.com/containerd/containerd/runtime/linux/runctypes"
  26. "github.com/containerd/typeurl"
  27. "github.com/docker/docker/errdefs"
  28. "github.com/docker/docker/pkg/ioutils"
  29. "github.com/opencontainers/image-spec/specs-go/v1"
  30. specs "github.com/opencontainers/runtime-spec/specs-go"
  31. "github.com/pkg/errors"
  32. "github.com/sirupsen/logrus"
  33. "google.golang.org/grpc/codes"
  34. "google.golang.org/grpc/status"
  35. )
  36. // InitProcessName is the name given to the first process of a
  37. // container
  38. const InitProcessName = "init"
  39. type container struct {
  40. mu sync.Mutex
  41. bundleDir string
  42. ctr containerd.Container
  43. task containerd.Task
  44. execs map[string]containerd.Process
  45. oomKilled bool
  46. }
  47. func (c *container) setTask(t containerd.Task) {
  48. c.mu.Lock()
  49. c.task = t
  50. c.mu.Unlock()
  51. }
  52. func (c *container) getTask() containerd.Task {
  53. c.mu.Lock()
  54. t := c.task
  55. c.mu.Unlock()
  56. return t
  57. }
  58. func (c *container) addProcess(id string, p containerd.Process) {
  59. c.mu.Lock()
  60. if c.execs == nil {
  61. c.execs = make(map[string]containerd.Process)
  62. }
  63. c.execs[id] = p
  64. c.mu.Unlock()
  65. }
  66. func (c *container) deleteProcess(id string) {
  67. c.mu.Lock()
  68. delete(c.execs, id)
  69. c.mu.Unlock()
  70. }
  71. func (c *container) getProcess(id string) containerd.Process {
  72. c.mu.Lock()
  73. p := c.execs[id]
  74. c.mu.Unlock()
  75. return p
  76. }
  77. func (c *container) setOOMKilled(killed bool) {
  78. c.mu.Lock()
  79. c.oomKilled = killed
  80. c.mu.Unlock()
  81. }
  82. func (c *container) getOOMKilled() bool {
  83. c.mu.Lock()
  84. killed := c.oomKilled
  85. c.mu.Unlock()
  86. return killed
  87. }
  88. type client struct {
  89. sync.RWMutex // protects containers map
  90. client *containerd.Client
  91. stateDir string
  92. logger *logrus.Entry
  93. ns string
  94. backend Backend
  95. eventQ queue
  96. containers map[string]*container
  97. }
  98. // NewClient creates a new libcontainerd client from a containerd client
  99. func NewClient(ctx context.Context, cli *containerd.Client, stateDir, ns string, b Backend) (Client, error) {
  100. c := &client{
  101. client: cli,
  102. stateDir: stateDir,
  103. logger: logrus.WithField("module", "libcontainerd").WithField("namespace", ns),
  104. ns: ns,
  105. backend: b,
  106. containers: make(map[string]*container),
  107. }
  108. go c.processEventStream(ctx, ns)
  109. return c, nil
  110. }
  111. func (c *client) Version(ctx context.Context) (containerd.Version, error) {
  112. return c.client.Version(ctx)
  113. }
  114. // Restore loads the containerd container.
  115. // It should not be called concurrently with any other operation for the given ID.
  116. func (c *client) Restore(ctx context.Context, id string, attachStdio StdioCallback) (alive bool, pid int, err error) {
  117. c.Lock()
  118. _, ok := c.containers[id]
  119. if ok {
  120. c.Unlock()
  121. return false, 0, errors.WithStack(newConflictError("id already in use"))
  122. }
  123. cntr := &container{}
  124. c.containers[id] = cntr
  125. cntr.mu.Lock()
  126. defer cntr.mu.Unlock()
  127. c.Unlock()
  128. defer func() {
  129. if err != nil {
  130. c.Lock()
  131. delete(c.containers, id)
  132. c.Unlock()
  133. }
  134. }()
  135. var dio *cio.DirectIO
  136. defer func() {
  137. if err != nil && dio != nil {
  138. dio.Cancel()
  139. dio.Close()
  140. }
  141. err = wrapError(err)
  142. }()
  143. ctr, err := c.client.LoadContainer(ctx, id)
  144. if err != nil {
  145. return false, -1, errors.WithStack(wrapError(err))
  146. }
  147. attachIO := func(fifos *cio.FIFOSet) (cio.IO, error) {
  148. // dio must be assigned to the previously defined dio for the defer above
  149. // to handle cleanup
  150. dio, err = cio.NewDirectIO(ctx, fifos)
  151. if err != nil {
  152. return nil, err
  153. }
  154. return attachStdio(dio)
  155. }
  156. t, err := ctr.Task(ctx, attachIO)
  157. if err != nil && !containerderrors.IsNotFound(err) {
  158. return false, -1, errors.Wrap(wrapError(err), "error getting containerd task for container")
  159. }
  160. if t != nil {
  161. s, err := t.Status(ctx)
  162. if err != nil {
  163. return false, -1, errors.Wrap(wrapError(err), "error getting task status")
  164. }
  165. alive = s.Status != containerd.Stopped
  166. pid = int(t.Pid())
  167. }
  168. cntr.bundleDir = filepath.Join(c.stateDir, id)
  169. cntr.ctr = ctr
  170. cntr.task = t
  171. // TODO(mlaventure): load execs
  172. c.logger.WithFields(logrus.Fields{
  173. "container": id,
  174. "alive": alive,
  175. "pid": pid,
  176. }).Debug("restored container")
  177. return alive, pid, nil
  178. }
  179. func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, runtimeOptions interface{}) error {
  180. if ctr := c.getContainer(id); ctr != nil {
  181. return errors.WithStack(newConflictError("id already in use"))
  182. }
  183. bdir, err := prepareBundleDir(filepath.Join(c.stateDir, id), ociSpec)
  184. if err != nil {
  185. return errdefs.System(errors.Wrap(err, "prepare bundle dir failed"))
  186. }
  187. c.logger.WithField("bundle", bdir).WithField("root", ociSpec.Root.Path).Debug("bundle dir created")
  188. cdCtr, err := c.client.NewContainer(ctx, id,
  189. containerd.WithSpec(ociSpec),
  190. // TODO(mlaventure): when containerd support lcow, revisit runtime value
  191. containerd.WithRuntime(fmt.Sprintf("io.containerd.runtime.v1.%s", runtime.GOOS), runtimeOptions))
  192. if err != nil {
  193. return wrapError(err)
  194. }
  195. c.Lock()
  196. c.containers[id] = &container{
  197. bundleDir: bdir,
  198. ctr: cdCtr,
  199. }
  200. c.Unlock()
  201. return nil
  202. }
  203. // Start create and start a task for the specified containerd id
  204. func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin bool, attachStdio StdioCallback) (int, error) {
  205. ctr := c.getContainer(id)
  206. if ctr == nil {
  207. return -1, errors.WithStack(newNotFoundError("no such container"))
  208. }
  209. if t := ctr.getTask(); t != nil {
  210. return -1, errors.WithStack(newConflictError("container already started"))
  211. }
  212. var (
  213. cp *types.Descriptor
  214. t containerd.Task
  215. rio cio.IO
  216. err error
  217. stdinCloseSync = make(chan struct{})
  218. )
  219. if checkpointDir != "" {
  220. // write checkpoint to the content store
  221. tar := archive.Diff(ctx, "", checkpointDir)
  222. cp, err = c.writeContent(ctx, images.MediaTypeContainerd1Checkpoint, checkpointDir, tar)
  223. // remove the checkpoint when we're done
  224. defer func() {
  225. if cp != nil {
  226. err := c.client.ContentStore().Delete(context.Background(), cp.Digest)
  227. if err != nil {
  228. c.logger.WithError(err).WithFields(logrus.Fields{
  229. "ref": checkpointDir,
  230. "digest": cp.Digest,
  231. }).Warnf("failed to delete temporary checkpoint entry")
  232. }
  233. }
  234. }()
  235. if err := tar.Close(); err != nil {
  236. return -1, errors.Wrap(err, "failed to close checkpoint tar stream")
  237. }
  238. if err != nil {
  239. return -1, errors.Wrapf(err, "failed to upload checkpoint to containerd")
  240. }
  241. }
  242. spec, err := ctr.ctr.Spec(ctx)
  243. if err != nil {
  244. return -1, errors.Wrap(err, "failed to retrieve spec")
  245. }
  246. uid, gid := getSpecUser(spec)
  247. t, err = ctr.ctr.NewTask(ctx,
  248. func(id string) (cio.IO, error) {
  249. fifos := newFIFOSet(ctr.bundleDir, InitProcessName, withStdin, spec.Process.Terminal)
  250. rio, err = c.createIO(fifos, id, InitProcessName, stdinCloseSync, attachStdio)
  251. return rio, err
  252. },
  253. func(_ context.Context, _ *containerd.Client, info *containerd.TaskInfo) error {
  254. info.Checkpoint = cp
  255. info.Options = &runctypes.CreateOptions{
  256. IoUid: uint32(uid),
  257. IoGid: uint32(gid),
  258. NoPivotRoot: os.Getenv("DOCKER_RAMDISK") != "",
  259. }
  260. return nil
  261. })
  262. if err != nil {
  263. close(stdinCloseSync)
  264. if rio != nil {
  265. rio.Cancel()
  266. rio.Close()
  267. }
  268. return -1, wrapError(err)
  269. }
  270. ctr.setTask(t)
  271. // Signal c.createIO that it can call CloseIO
  272. close(stdinCloseSync)
  273. if err := t.Start(ctx); err != nil {
  274. if _, err := t.Delete(ctx); err != nil {
  275. c.logger.WithError(err).WithField("container", id).
  276. Error("failed to delete task after fail start")
  277. }
  278. ctr.setTask(nil)
  279. return -1, wrapError(err)
  280. }
  281. return int(t.Pid()), nil
  282. }
  283. func (c *client) Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio StdioCallback) (int, error) {
  284. ctr := c.getContainer(containerID)
  285. if ctr == nil {
  286. return -1, errors.WithStack(newNotFoundError("no such container"))
  287. }
  288. t := ctr.getTask()
  289. if t == nil {
  290. return -1, errors.WithStack(newInvalidParameterError("container is not running"))
  291. }
  292. if p := ctr.getProcess(processID); p != nil {
  293. return -1, errors.WithStack(newConflictError("id already in use"))
  294. }
  295. var (
  296. p containerd.Process
  297. rio cio.IO
  298. err error
  299. stdinCloseSync = make(chan struct{})
  300. )
  301. fifos := newFIFOSet(ctr.bundleDir, processID, withStdin, spec.Terminal)
  302. defer func() {
  303. if err != nil {
  304. if rio != nil {
  305. rio.Cancel()
  306. rio.Close()
  307. }
  308. }
  309. }()
  310. p, err = t.Exec(ctx, processID, spec, func(id string) (cio.IO, error) {
  311. rio, err = c.createIO(fifos, containerID, processID, stdinCloseSync, attachStdio)
  312. return rio, err
  313. })
  314. if err != nil {
  315. close(stdinCloseSync)
  316. return -1, wrapError(err)
  317. }
  318. ctr.addProcess(processID, p)
  319. // Signal c.createIO that it can call CloseIO
  320. close(stdinCloseSync)
  321. if err = p.Start(ctx); err != nil {
  322. p.Delete(context.Background())
  323. ctr.deleteProcess(processID)
  324. return -1, wrapError(err)
  325. }
  326. return int(p.Pid()), nil
  327. }
  328. func (c *client) SignalProcess(ctx context.Context, containerID, processID string, signal int) error {
  329. p, err := c.getProcess(containerID, processID)
  330. if err != nil {
  331. return err
  332. }
  333. return wrapError(p.Kill(ctx, syscall.Signal(signal)))
  334. }
  335. func (c *client) ResizeTerminal(ctx context.Context, containerID, processID string, width, height int) error {
  336. p, err := c.getProcess(containerID, processID)
  337. if err != nil {
  338. return err
  339. }
  340. return p.Resize(ctx, uint32(width), uint32(height))
  341. }
  342. func (c *client) CloseStdin(ctx context.Context, containerID, processID string) error {
  343. p, err := c.getProcess(containerID, processID)
  344. if err != nil {
  345. return err
  346. }
  347. return p.CloseIO(ctx, containerd.WithStdinCloser)
  348. }
  349. func (c *client) Pause(ctx context.Context, containerID string) error {
  350. p, err := c.getProcess(containerID, InitProcessName)
  351. if err != nil {
  352. return err
  353. }
  354. return wrapError(p.(containerd.Task).Pause(ctx))
  355. }
  356. func (c *client) Resume(ctx context.Context, containerID string) error {
  357. p, err := c.getProcess(containerID, InitProcessName)
  358. if err != nil {
  359. return err
  360. }
  361. return p.(containerd.Task).Resume(ctx)
  362. }
  363. func (c *client) Stats(ctx context.Context, containerID string) (*Stats, error) {
  364. p, err := c.getProcess(containerID, InitProcessName)
  365. if err != nil {
  366. return nil, err
  367. }
  368. m, err := p.(containerd.Task).Metrics(ctx)
  369. if err != nil {
  370. return nil, err
  371. }
  372. v, err := typeurl.UnmarshalAny(m.Data)
  373. if err != nil {
  374. return nil, err
  375. }
  376. return interfaceToStats(m.Timestamp, v), nil
  377. }
  378. func (c *client) ListPids(ctx context.Context, containerID string) ([]uint32, error) {
  379. p, err := c.getProcess(containerID, InitProcessName)
  380. if err != nil {
  381. return nil, err
  382. }
  383. pis, err := p.(containerd.Task).Pids(ctx)
  384. if err != nil {
  385. return nil, err
  386. }
  387. var pids []uint32
  388. for _, i := range pis {
  389. pids = append(pids, i.Pid)
  390. }
  391. return pids, nil
  392. }
  393. func (c *client) Summary(ctx context.Context, containerID string) ([]Summary, error) {
  394. p, err := c.getProcess(containerID, InitProcessName)
  395. if err != nil {
  396. return nil, err
  397. }
  398. pis, err := p.(containerd.Task).Pids(ctx)
  399. if err != nil {
  400. return nil, err
  401. }
  402. var infos []Summary
  403. for _, pi := range pis {
  404. i, err := typeurl.UnmarshalAny(pi.Info)
  405. if err != nil {
  406. return nil, errors.Wrap(err, "unable to decode process details")
  407. }
  408. s, err := summaryFromInterface(i)
  409. if err != nil {
  410. return nil, err
  411. }
  412. infos = append(infos, *s)
  413. }
  414. return infos, nil
  415. }
  416. func (c *client) DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error) {
  417. p, err := c.getProcess(containerID, InitProcessName)
  418. if err != nil {
  419. return 255, time.Now(), nil
  420. }
  421. status, err := p.(containerd.Task).Delete(ctx)
  422. if err != nil {
  423. return 255, time.Now(), nil
  424. }
  425. if ctr := c.getContainer(containerID); ctr != nil {
  426. ctr.setTask(nil)
  427. }
  428. return status.ExitCode(), status.ExitTime(), nil
  429. }
  430. func (c *client) Delete(ctx context.Context, containerID string) error {
  431. ctr := c.getContainer(containerID)
  432. if ctr == nil {
  433. return errors.WithStack(newNotFoundError("no such container"))
  434. }
  435. if err := ctr.ctr.Delete(ctx); err != nil {
  436. return wrapError(err)
  437. }
  438. if os.Getenv("LIBCONTAINERD_NOCLEAN") != "1" {
  439. if err := os.RemoveAll(ctr.bundleDir); err != nil {
  440. c.logger.WithError(err).WithFields(logrus.Fields{
  441. "container": containerID,
  442. "bundle": ctr.bundleDir,
  443. }).Error("failed to remove state dir")
  444. }
  445. }
  446. c.removeContainer(containerID)
  447. return nil
  448. }
  449. func (c *client) Status(ctx context.Context, containerID string) (Status, error) {
  450. ctr := c.getContainer(containerID)
  451. if ctr == nil {
  452. return StatusUnknown, errors.WithStack(newNotFoundError("no such container"))
  453. }
  454. t := ctr.getTask()
  455. if t == nil {
  456. return StatusUnknown, errors.WithStack(newNotFoundError("no such task"))
  457. }
  458. s, err := t.Status(ctx)
  459. if err != nil {
  460. return StatusUnknown, wrapError(err)
  461. }
  462. return Status(s.Status), nil
  463. }
  464. func (c *client) CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error {
  465. p, err := c.getProcess(containerID, InitProcessName)
  466. if err != nil {
  467. return err
  468. }
  469. opts := []containerd.CheckpointTaskOpts{}
  470. if exit {
  471. opts = append(opts, func(r *containerd.CheckpointTaskInfo) error {
  472. if r.Options == nil {
  473. r.Options = &runctypes.CheckpointOptions{
  474. Exit: true,
  475. }
  476. } else {
  477. opts, _ := r.Options.(*runctypes.CheckpointOptions)
  478. opts.Exit = true
  479. }
  480. return nil
  481. })
  482. }
  483. img, err := p.(containerd.Task).Checkpoint(ctx, opts...)
  484. if err != nil {
  485. return wrapError(err)
  486. }
  487. // Whatever happens, delete the checkpoint from containerd
  488. defer func() {
  489. err := c.client.ImageService().Delete(context.Background(), img.Name())
  490. if err != nil {
  491. c.logger.WithError(err).WithField("digest", img.Target().Digest).
  492. Warnf("failed to delete checkpoint image")
  493. }
  494. }()
  495. b, err := content.ReadBlob(ctx, c.client.ContentStore(), img.Target())
  496. if err != nil {
  497. return errdefs.System(errors.Wrapf(err, "failed to retrieve checkpoint data"))
  498. }
  499. var index v1.Index
  500. if err := json.Unmarshal(b, &index); err != nil {
  501. return errdefs.System(errors.Wrapf(err, "failed to decode checkpoint data"))
  502. }
  503. var cpDesc *v1.Descriptor
  504. for _, m := range index.Manifests {
  505. if m.MediaType == images.MediaTypeContainerd1Checkpoint {
  506. cpDesc = &m
  507. break
  508. }
  509. }
  510. if cpDesc == nil {
  511. return errdefs.System(errors.Wrapf(err, "invalid checkpoint"))
  512. }
  513. rat, err := c.client.ContentStore().ReaderAt(ctx, *cpDesc)
  514. if err != nil {
  515. return errdefs.System(errors.Wrapf(err, "failed to get checkpoint reader"))
  516. }
  517. defer rat.Close()
  518. _, err = archive.Apply(ctx, checkpointDir, content.NewReader(rat))
  519. if err != nil {
  520. return errdefs.System(errors.Wrapf(err, "failed to read checkpoint reader"))
  521. }
  522. return err
  523. }
  524. func (c *client) getContainer(id string) *container {
  525. c.RLock()
  526. ctr := c.containers[id]
  527. c.RUnlock()
  528. return ctr
  529. }
  530. func (c *client) removeContainer(id string) {
  531. c.Lock()
  532. delete(c.containers, id)
  533. c.Unlock()
  534. }
  535. func (c *client) getProcess(containerID, processID string) (containerd.Process, error) {
  536. ctr := c.getContainer(containerID)
  537. if ctr == nil {
  538. return nil, errors.WithStack(newNotFoundError("no such container"))
  539. }
  540. t := ctr.getTask()
  541. if t == nil {
  542. return nil, errors.WithStack(newNotFoundError("container is not running"))
  543. }
  544. if processID == InitProcessName {
  545. return t, nil
  546. }
  547. p := ctr.getProcess(processID)
  548. if p == nil {
  549. return nil, errors.WithStack(newNotFoundError("no such exec"))
  550. }
  551. return p, nil
  552. }
  553. // createIO creates the io to be used by a process
  554. // This needs to get a pointer to interface as upon closure the process may not have yet been registered
  555. func (c *client) createIO(fifos *cio.FIFOSet, containerID, processID string, stdinCloseSync chan struct{}, attachStdio StdioCallback) (cio.IO, error) {
  556. var (
  557. io *cio.DirectIO
  558. err error
  559. )
  560. io, err = cio.NewDirectIO(context.Background(), fifos)
  561. if err != nil {
  562. return nil, err
  563. }
  564. if io.Stdin != nil {
  565. var (
  566. err error
  567. stdinOnce sync.Once
  568. )
  569. pipe := io.Stdin
  570. io.Stdin = ioutils.NewWriteCloserWrapper(pipe, func() error {
  571. stdinOnce.Do(func() {
  572. err = pipe.Close()
  573. // Do the rest in a new routine to avoid a deadlock if the
  574. // Exec/Start call failed.
  575. go func() {
  576. <-stdinCloseSync
  577. p, err := c.getProcess(containerID, processID)
  578. if err == nil {
  579. err = p.CloseIO(context.Background(), containerd.WithStdinCloser)
  580. if err != nil && strings.Contains(err.Error(), "transport is closing") {
  581. err = nil
  582. }
  583. }
  584. }()
  585. })
  586. return err
  587. })
  588. }
  589. rio, err := attachStdio(io)
  590. if err != nil {
  591. io.Cancel()
  592. io.Close()
  593. }
  594. return rio, err
  595. }
  596. func (c *client) processEvent(ctr *container, et EventType, ei EventInfo) {
  597. c.eventQ.append(ei.ContainerID, func() {
  598. err := c.backend.ProcessEvent(ei.ContainerID, et, ei)
  599. if err != nil {
  600. c.logger.WithError(err).WithFields(logrus.Fields{
  601. "container": ei.ContainerID,
  602. "event": et,
  603. "event-info": ei,
  604. }).Error("failed to process event")
  605. }
  606. if et == EventExit && ei.ProcessID != ei.ContainerID {
  607. p := ctr.getProcess(ei.ProcessID)
  608. if p == nil {
  609. c.logger.WithError(errors.New("no such process")).
  610. WithFields(logrus.Fields{
  611. "container": ei.ContainerID,
  612. "process": ei.ProcessID,
  613. }).Error("exit event")
  614. return
  615. }
  616. _, err = p.Delete(context.Background())
  617. if err != nil {
  618. c.logger.WithError(err).WithFields(logrus.Fields{
  619. "container": ei.ContainerID,
  620. "process": ei.ProcessID,
  621. }).Warn("failed to delete process")
  622. }
  623. ctr.deleteProcess(ei.ProcessID)
  624. ctr := c.getContainer(ei.ContainerID)
  625. if ctr == nil {
  626. c.logger.WithFields(logrus.Fields{
  627. "container": ei.ContainerID,
  628. }).Error("failed to find container")
  629. } else {
  630. newFIFOSet(ctr.bundleDir, ei.ProcessID, true, false).Close()
  631. }
  632. }
  633. })
  634. }
  635. func (c *client) processEventStream(ctx context.Context, ns string) {
  636. var (
  637. err error
  638. ev *events.Envelope
  639. et EventType
  640. ei EventInfo
  641. ctr *container
  642. )
  643. // Filter on both namespace *and* topic. To create an "and" filter,
  644. // this must be a single, comma-separated string
  645. eventStream, errC := c.client.EventService().Subscribe(ctx, "namespace=="+ns+",topic~=|^/tasks/|")
  646. c.logger.Debug("processing event stream")
  647. var oomKilled bool
  648. for {
  649. select {
  650. case err = <-errC:
  651. if err != nil {
  652. errStatus, ok := status.FromError(err)
  653. if !ok || errStatus.Code() != codes.Canceled {
  654. c.logger.WithError(err).Error("failed to get event")
  655. go c.processEventStream(ctx, ns)
  656. } else {
  657. c.logger.WithError(ctx.Err()).Info("stopping event stream following graceful shutdown")
  658. }
  659. }
  660. return
  661. case ev = <-eventStream:
  662. if ev.Event == nil {
  663. c.logger.WithField("event", ev).Warn("invalid event")
  664. continue
  665. }
  666. v, err := typeurl.UnmarshalAny(ev.Event)
  667. if err != nil {
  668. c.logger.WithError(err).WithField("event", ev).Warn("failed to unmarshal event")
  669. continue
  670. }
  671. c.logger.WithField("topic", ev.Topic).Debug("event")
  672. switch t := v.(type) {
  673. case *apievents.TaskCreate:
  674. et = EventCreate
  675. ei = EventInfo{
  676. ContainerID: t.ContainerID,
  677. ProcessID: t.ContainerID,
  678. Pid: t.Pid,
  679. }
  680. case *apievents.TaskStart:
  681. et = EventStart
  682. ei = EventInfo{
  683. ContainerID: t.ContainerID,
  684. ProcessID: t.ContainerID,
  685. Pid: t.Pid,
  686. }
  687. case *apievents.TaskExit:
  688. et = EventExit
  689. ei = EventInfo{
  690. ContainerID: t.ContainerID,
  691. ProcessID: t.ID,
  692. Pid: t.Pid,
  693. ExitCode: t.ExitStatus,
  694. ExitedAt: t.ExitedAt,
  695. }
  696. case *apievents.TaskOOM:
  697. et = EventOOM
  698. ei = EventInfo{
  699. ContainerID: t.ContainerID,
  700. OOMKilled: true,
  701. }
  702. oomKilled = true
  703. case *apievents.TaskExecAdded:
  704. et = EventExecAdded
  705. ei = EventInfo{
  706. ContainerID: t.ContainerID,
  707. ProcessID: t.ExecID,
  708. }
  709. case *apievents.TaskExecStarted:
  710. et = EventExecStarted
  711. ei = EventInfo{
  712. ContainerID: t.ContainerID,
  713. ProcessID: t.ExecID,
  714. Pid: t.Pid,
  715. }
  716. case *apievents.TaskPaused:
  717. et = EventPaused
  718. ei = EventInfo{
  719. ContainerID: t.ContainerID,
  720. }
  721. case *apievents.TaskResumed:
  722. et = EventResumed
  723. ei = EventInfo{
  724. ContainerID: t.ContainerID,
  725. }
  726. default:
  727. c.logger.WithFields(logrus.Fields{
  728. "topic": ev.Topic,
  729. "type": reflect.TypeOf(t)},
  730. ).Info("ignoring event")
  731. continue
  732. }
  733. ctr = c.getContainer(ei.ContainerID)
  734. if ctr == nil {
  735. c.logger.WithField("container", ei.ContainerID).Warn("unknown container")
  736. continue
  737. }
  738. if oomKilled {
  739. ctr.setOOMKilled(true)
  740. oomKilled = false
  741. }
  742. ei.OOMKilled = ctr.getOOMKilled()
  743. c.processEvent(ctr, et, ei)
  744. }
  745. }
  746. }
  747. func (c *client) writeContent(ctx context.Context, mediaType, ref string, r io.Reader) (*types.Descriptor, error) {
  748. writer, err := c.client.ContentStore().Writer(ctx, content.WithRef(ref))
  749. if err != nil {
  750. return nil, err
  751. }
  752. defer writer.Close()
  753. size, err := io.Copy(writer, r)
  754. if err != nil {
  755. return nil, err
  756. }
  757. labels := map[string]string{
  758. "containerd.io/gc.root": time.Now().UTC().Format(time.RFC3339),
  759. }
  760. if err := writer.Commit(ctx, 0, "", content.WithLabels(labels)); err != nil {
  761. return nil, err
  762. }
  763. return &types.Descriptor{
  764. MediaType: mediaType,
  765. Digest: writer.Digest(),
  766. Size_: size,
  767. }, nil
  768. }
  769. func wrapError(err error) error {
  770. switch {
  771. case err == nil:
  772. return nil
  773. case containerderrors.IsNotFound(err):
  774. return errdefs.NotFound(err)
  775. }
  776. msg := err.Error()
  777. for _, s := range []string{"container does not exist", "not found", "no such container"} {
  778. if strings.Contains(msg, s) {
  779. return errdefs.NotFound(err)
  780. }
  781. }
  782. return err
  783. }