runtime.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. // +build linux
  2. /*
  3. Copyright The containerd Authors.
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License.
  13. */
  14. package linux
  15. import (
  16. "context"
  17. "fmt"
  18. "io"
  19. "io/ioutil"
  20. "os"
  21. "path/filepath"
  22. "time"
  23. eventstypes "github.com/containerd/containerd/api/events"
  24. "github.com/containerd/containerd/api/types"
  25. "github.com/containerd/containerd/containers"
  26. "github.com/containerd/containerd/errdefs"
  27. "github.com/containerd/containerd/events/exchange"
  28. "github.com/containerd/containerd/identifiers"
  29. "github.com/containerd/containerd/log"
  30. "github.com/containerd/containerd/metadata"
  31. "github.com/containerd/containerd/mount"
  32. "github.com/containerd/containerd/namespaces"
  33. "github.com/containerd/containerd/pkg/process"
  34. "github.com/containerd/containerd/platforms"
  35. "github.com/containerd/containerd/plugin"
  36. "github.com/containerd/containerd/runtime"
  37. "github.com/containerd/containerd/runtime/linux/runctypes"
  38. v1 "github.com/containerd/containerd/runtime/v1"
  39. shim "github.com/containerd/containerd/runtime/v1/shim/v1"
  40. runc "github.com/containerd/go-runc"
  41. "github.com/containerd/typeurl"
  42. ptypes "github.com/gogo/protobuf/types"
  43. ocispec "github.com/opencontainers/image-spec/specs-go/v1"
  44. "github.com/pkg/errors"
  45. "github.com/sirupsen/logrus"
  46. "golang.org/x/sys/unix"
  47. )
  48. var (
  49. pluginID = fmt.Sprintf("%s.%s", plugin.RuntimePlugin, "linux")
  50. empty = &ptypes.Empty{}
  51. )
  52. const (
  53. configFilename = "config.json"
  54. defaultRuntime = "runc"
  55. defaultShim = "containerd-shim"
  56. // cleanupTimeout is default timeout for cleanup operations
  57. cleanupTimeout = 1 * time.Minute
  58. )
  59. func init() {
  60. plugin.Register(&plugin.Registration{
  61. Type: plugin.RuntimePlugin,
  62. ID: "linux",
  63. InitFn: New,
  64. Requires: []plugin.Type{
  65. plugin.MetadataPlugin,
  66. },
  67. Config: &Config{
  68. Shim: defaultShim,
  69. Runtime: defaultRuntime,
  70. },
  71. })
  72. }
  73. var _ = (runtime.PlatformRuntime)(&Runtime{})
  74. // Config options for the runtime
  75. type Config struct {
  76. // Shim is a path or name of binary implementing the Shim GRPC API
  77. Shim string `toml:"shim"`
  78. // Runtime is a path or name of an OCI runtime used by the shim
  79. Runtime string `toml:"runtime"`
  80. // RuntimeRoot is the path that shall be used by the OCI runtime for its data
  81. RuntimeRoot string `toml:"runtime_root"`
  82. // NoShim calls runc directly from within the pkg
  83. NoShim bool `toml:"no_shim"`
  84. // Debug enable debug on the shim
  85. ShimDebug bool `toml:"shim_debug"`
  86. }
  87. // New returns a configured runtime
  88. func New(ic *plugin.InitContext) (interface{}, error) {
  89. ic.Meta.Platforms = []ocispec.Platform{platforms.DefaultSpec()}
  90. if err := os.MkdirAll(ic.Root, 0711); err != nil {
  91. return nil, err
  92. }
  93. if err := os.MkdirAll(ic.State, 0711); err != nil {
  94. return nil, err
  95. }
  96. m, err := ic.Get(plugin.MetadataPlugin)
  97. if err != nil {
  98. return nil, err
  99. }
  100. cfg := ic.Config.(*Config)
  101. r := &Runtime{
  102. root: ic.Root,
  103. state: ic.State,
  104. tasks: runtime.NewTaskList(),
  105. containers: metadata.NewContainerStore(m.(*metadata.DB)),
  106. address: ic.Address,
  107. events: ic.Events,
  108. config: cfg,
  109. }
  110. tasks, err := r.restoreTasks(ic.Context)
  111. if err != nil {
  112. return nil, err
  113. }
  114. for _, t := range tasks {
  115. if err := r.tasks.AddWithNamespace(t.namespace, t); err != nil {
  116. return nil, err
  117. }
  118. }
  119. return r, nil
  120. }
  121. // Runtime for a linux based system
  122. type Runtime struct {
  123. root string
  124. state string
  125. address string
  126. tasks *runtime.TaskList
  127. containers containers.Store
  128. events *exchange.Exchange
  129. config *Config
  130. }
  131. // ID of the runtime
  132. func (r *Runtime) ID() string {
  133. return pluginID
  134. }
  135. // Create a new task
  136. func (r *Runtime) Create(ctx context.Context, id string, opts runtime.CreateOpts) (_ runtime.Task, err error) {
  137. namespace, err := namespaces.NamespaceRequired(ctx)
  138. if err != nil {
  139. return nil, err
  140. }
  141. if err := identifiers.Validate(id); err != nil {
  142. return nil, errors.Wrapf(err, "invalid task id")
  143. }
  144. ropts, err := r.getRuncOptions(ctx, id)
  145. if err != nil {
  146. return nil, err
  147. }
  148. bundle, err := newBundle(id,
  149. filepath.Join(r.state, namespace),
  150. filepath.Join(r.root, namespace),
  151. opts.Spec.Value)
  152. if err != nil {
  153. return nil, err
  154. }
  155. defer func() {
  156. if err != nil {
  157. bundle.Delete()
  158. }
  159. }()
  160. shimopt := ShimLocal(r.config, r.events)
  161. if !r.config.NoShim {
  162. var cgroup string
  163. if opts.TaskOptions != nil {
  164. v, err := typeurl.UnmarshalAny(opts.TaskOptions)
  165. if err != nil {
  166. return nil, err
  167. }
  168. cgroup = v.(*runctypes.CreateOptions).ShimCgroup
  169. }
  170. exitHandler := func() {
  171. log.G(ctx).WithField("id", id).Info("shim reaped")
  172. if _, err := r.tasks.Get(ctx, id); err != nil {
  173. // Task was never started or was already successfully deleted
  174. return
  175. }
  176. if err = r.cleanupAfterDeadShim(context.Background(), bundle, namespace, id); err != nil {
  177. log.G(ctx).WithError(err).WithFields(logrus.Fields{
  178. "id": id,
  179. "namespace": namespace,
  180. }).Warn("failed to clean up after killed shim")
  181. }
  182. }
  183. shimopt = ShimRemote(r.config, r.address, cgroup, exitHandler)
  184. }
  185. s, err := bundle.NewShimClient(ctx, namespace, shimopt, ropts)
  186. if err != nil {
  187. return nil, err
  188. }
  189. defer func() {
  190. if err != nil {
  191. deferCtx, deferCancel := context.WithTimeout(
  192. namespaces.WithNamespace(context.TODO(), namespace), cleanupTimeout)
  193. defer deferCancel()
  194. if kerr := s.KillShim(deferCtx); kerr != nil {
  195. log.G(ctx).WithError(err).Error("failed to kill shim")
  196. }
  197. }
  198. }()
  199. rt := r.config.Runtime
  200. if ropts != nil && ropts.Runtime != "" {
  201. rt = ropts.Runtime
  202. }
  203. sopts := &shim.CreateTaskRequest{
  204. ID: id,
  205. Bundle: bundle.path,
  206. Runtime: rt,
  207. Stdin: opts.IO.Stdin,
  208. Stdout: opts.IO.Stdout,
  209. Stderr: opts.IO.Stderr,
  210. Terminal: opts.IO.Terminal,
  211. Checkpoint: opts.Checkpoint,
  212. Options: opts.TaskOptions,
  213. }
  214. for _, m := range opts.Rootfs {
  215. sopts.Rootfs = append(sopts.Rootfs, &types.Mount{
  216. Type: m.Type,
  217. Source: m.Source,
  218. Options: m.Options,
  219. })
  220. }
  221. cr, err := s.Create(ctx, sopts)
  222. if err != nil {
  223. return nil, errdefs.FromGRPC(err)
  224. }
  225. t, err := newTask(id, namespace, int(cr.Pid), s, r.events, r.tasks, bundle)
  226. if err != nil {
  227. return nil, err
  228. }
  229. if err := r.tasks.Add(ctx, t); err != nil {
  230. return nil, err
  231. }
  232. r.events.Publish(ctx, runtime.TaskCreateEventTopic, &eventstypes.TaskCreate{
  233. ContainerID: sopts.ID,
  234. Bundle: sopts.Bundle,
  235. Rootfs: sopts.Rootfs,
  236. IO: &eventstypes.TaskIO{
  237. Stdin: sopts.Stdin,
  238. Stdout: sopts.Stdout,
  239. Stderr: sopts.Stderr,
  240. Terminal: sopts.Terminal,
  241. },
  242. Checkpoint: sopts.Checkpoint,
  243. Pid: uint32(t.pid),
  244. })
  245. return t, nil
  246. }
  247. // Tasks returns all tasks known to the runtime
  248. func (r *Runtime) Tasks(ctx context.Context, all bool) ([]runtime.Task, error) {
  249. return r.tasks.GetAll(ctx, all)
  250. }
  251. func (r *Runtime) restoreTasks(ctx context.Context) ([]*Task, error) {
  252. dir, err := ioutil.ReadDir(r.state)
  253. if err != nil {
  254. return nil, err
  255. }
  256. var o []*Task
  257. for _, namespace := range dir {
  258. if !namespace.IsDir() {
  259. continue
  260. }
  261. name := namespace.Name()
  262. // skip hidden directories
  263. if len(name) > 0 && name[0] == '.' {
  264. continue
  265. }
  266. log.G(ctx).WithField("namespace", name).Debug("loading tasks in namespace")
  267. tasks, err := r.loadTasks(ctx, name)
  268. if err != nil {
  269. return nil, err
  270. }
  271. o = append(o, tasks...)
  272. }
  273. return o, nil
  274. }
  275. // Get a specific task by task id
  276. func (r *Runtime) Get(ctx context.Context, id string) (runtime.Task, error) {
  277. return r.tasks.Get(ctx, id)
  278. }
  279. // Add a runtime task
  280. func (r *Runtime) Add(ctx context.Context, task runtime.Task) error {
  281. return r.tasks.Add(ctx, task)
  282. }
  283. // Delete a runtime task
  284. func (r *Runtime) Delete(ctx context.Context, id string) {
  285. r.tasks.Delete(ctx, id)
  286. }
  287. func (r *Runtime) loadTasks(ctx context.Context, ns string) ([]*Task, error) {
  288. dir, err := ioutil.ReadDir(filepath.Join(r.state, ns))
  289. if err != nil {
  290. return nil, err
  291. }
  292. var o []*Task
  293. for _, path := range dir {
  294. if !path.IsDir() {
  295. continue
  296. }
  297. id := path.Name()
  298. // skip hidden directories
  299. if len(id) > 0 && id[0] == '.' {
  300. continue
  301. }
  302. bundle := loadBundle(
  303. id,
  304. filepath.Join(r.state, ns, id),
  305. filepath.Join(r.root, ns, id),
  306. )
  307. ctx = namespaces.WithNamespace(ctx, ns)
  308. pid, _ := runc.ReadPidFile(filepath.Join(bundle.path, process.InitPidFile))
  309. shimExit := make(chan struct{})
  310. s, err := bundle.NewShimClient(ctx, ns, ShimConnect(r.config, func() {
  311. defer close(shimExit)
  312. if _, err := r.tasks.Get(ctx, id); err != nil {
  313. // Task was never started or was already successfully deleted
  314. return
  315. }
  316. if err := r.cleanupAfterDeadShim(ctx, bundle, ns, id); err != nil {
  317. log.G(ctx).WithError(err).WithField("bundle", bundle.path).
  318. Error("cleaning up after dead shim")
  319. }
  320. }), nil)
  321. if err != nil {
  322. log.G(ctx).WithError(err).WithFields(logrus.Fields{
  323. "id": id,
  324. "namespace": ns,
  325. }).Error("connecting to shim")
  326. err := r.cleanupAfterDeadShim(ctx, bundle, ns, id)
  327. if err != nil {
  328. log.G(ctx).WithError(err).WithField("bundle", bundle.path).
  329. Error("cleaning up after dead shim")
  330. }
  331. continue
  332. }
  333. logDirPath := filepath.Join(r.root, ns, id)
  334. copyAndClose := func(dst io.Writer, src io.ReadWriteCloser) {
  335. copyDone := make(chan struct{})
  336. go func() {
  337. io.Copy(dst, src)
  338. close(copyDone)
  339. }()
  340. select {
  341. case <-shimExit:
  342. case <-copyDone:
  343. }
  344. src.Close()
  345. }
  346. shimStdoutLog, err := v1.OpenShimStdoutLog(ctx, logDirPath)
  347. if err != nil {
  348. log.G(ctx).WithError(err).WithFields(logrus.Fields{
  349. "id": id,
  350. "namespace": ns,
  351. "logDirPath": logDirPath,
  352. }).Error("opening shim stdout log pipe")
  353. continue
  354. }
  355. if r.config.ShimDebug {
  356. go copyAndClose(os.Stdout, shimStdoutLog)
  357. } else {
  358. go copyAndClose(ioutil.Discard, shimStdoutLog)
  359. }
  360. shimStderrLog, err := v1.OpenShimStderrLog(ctx, logDirPath)
  361. if err != nil {
  362. log.G(ctx).WithError(err).WithFields(logrus.Fields{
  363. "id": id,
  364. "namespace": ns,
  365. "logDirPath": logDirPath,
  366. }).Error("opening shim stderr log pipe")
  367. continue
  368. }
  369. if r.config.ShimDebug {
  370. go copyAndClose(os.Stderr, shimStderrLog)
  371. } else {
  372. go copyAndClose(ioutil.Discard, shimStderrLog)
  373. }
  374. t, err := newTask(id, ns, pid, s, r.events, r.tasks, bundle)
  375. if err != nil {
  376. log.G(ctx).WithError(err).Error("loading task type")
  377. continue
  378. }
  379. o = append(o, t)
  380. }
  381. return o, nil
  382. }
  383. func (r *Runtime) cleanupAfterDeadShim(ctx context.Context, bundle *bundle, ns, id string) error {
  384. log.G(ctx).WithFields(logrus.Fields{
  385. "id": id,
  386. "namespace": ns,
  387. }).Warn("cleaning up after shim dead")
  388. pid, _ := runc.ReadPidFile(filepath.Join(bundle.path, process.InitPidFile))
  389. ctx = namespaces.WithNamespace(ctx, ns)
  390. if err := r.terminate(ctx, bundle, ns, id); err != nil {
  391. if r.config.ShimDebug {
  392. return errors.Wrap(err, "failed to terminate task, leaving bundle for debugging")
  393. }
  394. log.G(ctx).WithError(err).Warn("failed to terminate task")
  395. }
  396. // Notify Client
  397. exitedAt := time.Now().UTC()
  398. r.events.Publish(ctx, runtime.TaskExitEventTopic, &eventstypes.TaskExit{
  399. ContainerID: id,
  400. ID: id,
  401. Pid: uint32(pid),
  402. ExitStatus: 128 + uint32(unix.SIGKILL),
  403. ExitedAt: exitedAt,
  404. })
  405. r.tasks.Delete(ctx, id)
  406. if err := bundle.Delete(); err != nil {
  407. log.G(ctx).WithError(err).Error("delete bundle")
  408. }
  409. // kill shim
  410. if shimPid, err := runc.ReadPidFile(filepath.Join(bundle.path, "shim.pid")); err == nil && shimPid > 0 {
  411. unix.Kill(shimPid, unix.SIGKILL)
  412. }
  413. r.events.Publish(ctx, runtime.TaskDeleteEventTopic, &eventstypes.TaskDelete{
  414. ContainerID: id,
  415. Pid: uint32(pid),
  416. ExitStatus: 128 + uint32(unix.SIGKILL),
  417. ExitedAt: exitedAt,
  418. })
  419. return nil
  420. }
  421. func (r *Runtime) terminate(ctx context.Context, bundle *bundle, ns, id string) error {
  422. rt, err := r.getRuntime(ctx, ns, id)
  423. if err != nil {
  424. return err
  425. }
  426. if err := rt.Delete(ctx, id, &runc.DeleteOpts{
  427. Force: true,
  428. }); err != nil {
  429. log.G(ctx).WithError(err).Warnf("delete runtime state %s", id)
  430. }
  431. if err := mount.Unmount(filepath.Join(bundle.path, "rootfs"), 0); err != nil {
  432. log.G(ctx).WithError(err).WithFields(logrus.Fields{
  433. "path": bundle.path,
  434. "id": id,
  435. }).Warnf("unmount task rootfs")
  436. }
  437. return nil
  438. }
  439. func (r *Runtime) getRuntime(ctx context.Context, ns, id string) (*runc.Runc, error) {
  440. ropts, err := r.getRuncOptions(ctx, id)
  441. if err != nil {
  442. return nil, err
  443. }
  444. var (
  445. cmd = r.config.Runtime
  446. root = process.RuncRoot
  447. )
  448. if ropts != nil {
  449. if ropts.Runtime != "" {
  450. cmd = ropts.Runtime
  451. }
  452. if ropts.RuntimeRoot != "" {
  453. root = ropts.RuntimeRoot
  454. }
  455. }
  456. return &runc.Runc{
  457. Command: cmd,
  458. LogFormat: runc.JSON,
  459. PdeathSignal: unix.SIGKILL,
  460. Root: filepath.Join(root, ns),
  461. Debug: r.config.ShimDebug,
  462. }, nil
  463. }
  464. func (r *Runtime) getRuncOptions(ctx context.Context, id string) (*runctypes.RuncOptions, error) {
  465. container, err := r.containers.Get(ctx, id)
  466. if err != nil {
  467. return nil, err
  468. }
  469. if container.Runtime.Options != nil {
  470. v, err := typeurl.UnmarshalAny(container.Runtime.Options)
  471. if err != nil {
  472. return nil, err
  473. }
  474. ropts, ok := v.(*runctypes.RuncOptions)
  475. if !ok {
  476. return nil, errors.New("invalid runtime options format")
  477. }
  478. return ropts, nil
  479. }
  480. return &runctypes.RuncOptions{}, nil
  481. }