runtime.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468
  1. // +build linux
  2. /*
  3. Copyright The containerd Authors.
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License.
  13. */
  14. package linux
  15. import (
  16. "context"
  17. "fmt"
  18. "io/ioutil"
  19. "os"
  20. "path/filepath"
  21. "time"
  22. "github.com/boltdb/bolt"
  23. eventstypes "github.com/containerd/containerd/api/events"
  24. "github.com/containerd/containerd/api/types"
  25. "github.com/containerd/containerd/containers"
  26. "github.com/containerd/containerd/errdefs"
  27. "github.com/containerd/containerd/events/exchange"
  28. "github.com/containerd/containerd/identifiers"
  29. "github.com/containerd/containerd/log"
  30. "github.com/containerd/containerd/metadata"
  31. "github.com/containerd/containerd/mount"
  32. "github.com/containerd/containerd/namespaces"
  33. "github.com/containerd/containerd/platforms"
  34. "github.com/containerd/containerd/plugin"
  35. "github.com/containerd/containerd/runtime"
  36. "github.com/containerd/containerd/runtime/linux/runctypes"
  37. "github.com/containerd/containerd/runtime/v1/linux/proc"
  38. shim "github.com/containerd/containerd/runtime/v1/shim/v1"
  39. runc "github.com/containerd/go-runc"
  40. "github.com/containerd/typeurl"
  41. ptypes "github.com/gogo/protobuf/types"
  42. ocispec "github.com/opencontainers/image-spec/specs-go/v1"
  43. "github.com/pkg/errors"
  44. "github.com/sirupsen/logrus"
  45. "golang.org/x/sys/unix"
  46. )
  47. var (
  48. pluginID = fmt.Sprintf("%s.%s", plugin.RuntimePlugin, "linux")
  49. empty = &ptypes.Empty{}
  50. )
  51. const (
  52. configFilename = "config.json"
  53. defaultRuntime = "runc"
  54. defaultShim = "containerd-shim"
  55. )
  56. func init() {
  57. plugin.Register(&plugin.Registration{
  58. Type: plugin.RuntimePlugin,
  59. ID: "linux",
  60. InitFn: New,
  61. Requires: []plugin.Type{
  62. plugin.MetadataPlugin,
  63. },
  64. Config: &Config{
  65. Shim: defaultShim,
  66. Runtime: defaultRuntime,
  67. },
  68. })
  69. }
  70. var _ = (runtime.PlatformRuntime)(&Runtime{})
  71. // Config options for the runtime
  72. type Config struct {
  73. // Shim is a path or name of binary implementing the Shim GRPC API
  74. Shim string `toml:"shim"`
  75. // Runtime is a path or name of an OCI runtime used by the shim
  76. Runtime string `toml:"runtime"`
  77. // RuntimeRoot is the path that shall be used by the OCI runtime for its data
  78. RuntimeRoot string `toml:"runtime_root"`
  79. // NoShim calls runc directly from within the pkg
  80. NoShim bool `toml:"no_shim"`
  81. // Debug enable debug on the shim
  82. ShimDebug bool `toml:"shim_debug"`
  83. }
  84. // New returns a configured runtime
  85. func New(ic *plugin.InitContext) (interface{}, error) {
  86. ic.Meta.Platforms = []ocispec.Platform{platforms.DefaultSpec()}
  87. if err := os.MkdirAll(ic.Root, 0711); err != nil {
  88. return nil, err
  89. }
  90. if err := os.MkdirAll(ic.State, 0711); err != nil {
  91. return nil, err
  92. }
  93. m, err := ic.Get(plugin.MetadataPlugin)
  94. if err != nil {
  95. return nil, err
  96. }
  97. cfg := ic.Config.(*Config)
  98. r := &Runtime{
  99. root: ic.Root,
  100. state: ic.State,
  101. tasks: runtime.NewTaskList(),
  102. db: m.(*metadata.DB),
  103. address: ic.Address,
  104. events: ic.Events,
  105. config: cfg,
  106. }
  107. tasks, err := r.restoreTasks(ic.Context)
  108. if err != nil {
  109. return nil, err
  110. }
  111. for _, t := range tasks {
  112. if err := r.tasks.AddWithNamespace(t.namespace, t); err != nil {
  113. return nil, err
  114. }
  115. }
  116. return r, nil
  117. }
  118. // Runtime for a linux based system
  119. type Runtime struct {
  120. root string
  121. state string
  122. address string
  123. tasks *runtime.TaskList
  124. db *metadata.DB
  125. events *exchange.Exchange
  126. config *Config
  127. }
  128. // ID of the runtime
  129. func (r *Runtime) ID() string {
  130. return pluginID
  131. }
  132. // Create a new task
  133. func (r *Runtime) Create(ctx context.Context, id string, opts runtime.CreateOpts) (_ runtime.Task, err error) {
  134. namespace, err := namespaces.NamespaceRequired(ctx)
  135. if err != nil {
  136. return nil, err
  137. }
  138. if err := identifiers.Validate(id); err != nil {
  139. return nil, errors.Wrapf(err, "invalid task id")
  140. }
  141. ropts, err := r.getRuncOptions(ctx, id)
  142. if err != nil {
  143. return nil, err
  144. }
  145. bundle, err := newBundle(id,
  146. filepath.Join(r.state, namespace),
  147. filepath.Join(r.root, namespace),
  148. opts.Spec.Value)
  149. if err != nil {
  150. return nil, err
  151. }
  152. defer func() {
  153. if err != nil {
  154. bundle.Delete()
  155. }
  156. }()
  157. shimopt := ShimLocal(r.config, r.events)
  158. if !r.config.NoShim {
  159. var cgroup string
  160. if opts.TaskOptions != nil {
  161. v, err := typeurl.UnmarshalAny(opts.TaskOptions)
  162. if err != nil {
  163. return nil, err
  164. }
  165. cgroup = v.(*runctypes.CreateOptions).ShimCgroup
  166. }
  167. exitHandler := func() {
  168. log.G(ctx).WithField("id", id).Info("shim reaped")
  169. t, err := r.tasks.Get(ctx, id)
  170. if err != nil {
  171. // Task was never started or was already successfully deleted
  172. return
  173. }
  174. lc := t.(*Task)
  175. log.G(ctx).WithFields(logrus.Fields{
  176. "id": id,
  177. "namespace": namespace,
  178. }).Warn("cleaning up after killed shim")
  179. if err = r.cleanupAfterDeadShim(context.Background(), bundle, namespace, id, lc.pid); err != nil {
  180. log.G(ctx).WithError(err).WithFields(logrus.Fields{
  181. "id": id,
  182. "namespace": namespace,
  183. }).Warn("failed to clen up after killed shim")
  184. }
  185. }
  186. shimopt = ShimRemote(r.config, r.address, cgroup, exitHandler)
  187. }
  188. s, err := bundle.NewShimClient(ctx, namespace, shimopt, ropts)
  189. if err != nil {
  190. return nil, err
  191. }
  192. defer func() {
  193. if err != nil {
  194. if kerr := s.KillShim(ctx); kerr != nil {
  195. log.G(ctx).WithError(err).Error("failed to kill shim")
  196. }
  197. }
  198. }()
  199. rt := r.config.Runtime
  200. if ropts != nil && ropts.Runtime != "" {
  201. rt = ropts.Runtime
  202. }
  203. sopts := &shim.CreateTaskRequest{
  204. ID: id,
  205. Bundle: bundle.path,
  206. Runtime: rt,
  207. Stdin: opts.IO.Stdin,
  208. Stdout: opts.IO.Stdout,
  209. Stderr: opts.IO.Stderr,
  210. Terminal: opts.IO.Terminal,
  211. Checkpoint: opts.Checkpoint,
  212. Options: opts.TaskOptions,
  213. }
  214. for _, m := range opts.Rootfs {
  215. sopts.Rootfs = append(sopts.Rootfs, &types.Mount{
  216. Type: m.Type,
  217. Source: m.Source,
  218. Options: m.Options,
  219. })
  220. }
  221. cr, err := s.Create(ctx, sopts)
  222. if err != nil {
  223. return nil, errdefs.FromGRPC(err)
  224. }
  225. t, err := newTask(id, namespace, int(cr.Pid), s, r.events,
  226. proc.NewRunc(ropts.RuntimeRoot, sopts.Bundle, namespace, rt, ropts.CriuPath, ropts.SystemdCgroup), r.tasks, bundle)
  227. if err != nil {
  228. return nil, err
  229. }
  230. if err := r.tasks.Add(ctx, t); err != nil {
  231. return nil, err
  232. }
  233. r.events.Publish(ctx, runtime.TaskCreateEventTopic, &eventstypes.TaskCreate{
  234. ContainerID: sopts.ID,
  235. Bundle: sopts.Bundle,
  236. Rootfs: sopts.Rootfs,
  237. IO: &eventstypes.TaskIO{
  238. Stdin: sopts.Stdin,
  239. Stdout: sopts.Stdout,
  240. Stderr: sopts.Stderr,
  241. Terminal: sopts.Terminal,
  242. },
  243. Checkpoint: sopts.Checkpoint,
  244. Pid: uint32(t.pid),
  245. })
  246. return t, nil
  247. }
  248. // Tasks returns all tasks known to the runtime
  249. func (r *Runtime) Tasks(ctx context.Context, all bool) ([]runtime.Task, error) {
  250. return r.tasks.GetAll(ctx, all)
  251. }
  252. func (r *Runtime) restoreTasks(ctx context.Context) ([]*Task, error) {
  253. dir, err := ioutil.ReadDir(r.state)
  254. if err != nil {
  255. return nil, err
  256. }
  257. var o []*Task
  258. for _, namespace := range dir {
  259. if !namespace.IsDir() {
  260. continue
  261. }
  262. name := namespace.Name()
  263. log.G(ctx).WithField("namespace", name).Debug("loading tasks in namespace")
  264. tasks, err := r.loadTasks(ctx, name)
  265. if err != nil {
  266. return nil, err
  267. }
  268. o = append(o, tasks...)
  269. }
  270. return o, nil
  271. }
  272. // Get a specific task by task id
  273. func (r *Runtime) Get(ctx context.Context, id string) (runtime.Task, error) {
  274. return r.tasks.Get(ctx, id)
  275. }
  276. func (r *Runtime) loadTasks(ctx context.Context, ns string) ([]*Task, error) {
  277. dir, err := ioutil.ReadDir(filepath.Join(r.state, ns))
  278. if err != nil {
  279. return nil, err
  280. }
  281. var o []*Task
  282. for _, path := range dir {
  283. if !path.IsDir() {
  284. continue
  285. }
  286. id := path.Name()
  287. bundle := loadBundle(
  288. id,
  289. filepath.Join(r.state, ns, id),
  290. filepath.Join(r.root, ns, id),
  291. )
  292. ctx = namespaces.WithNamespace(ctx, ns)
  293. pid, _ := runc.ReadPidFile(filepath.Join(bundle.path, proc.InitPidFile))
  294. s, err := bundle.NewShimClient(ctx, ns, ShimConnect(r.config, func() {
  295. err := r.cleanupAfterDeadShim(ctx, bundle, ns, id, pid)
  296. if err != nil {
  297. log.G(ctx).WithError(err).WithField("bundle", bundle.path).
  298. Error("cleaning up after dead shim")
  299. }
  300. }), nil)
  301. if err != nil {
  302. log.G(ctx).WithError(err).WithFields(logrus.Fields{
  303. "id": id,
  304. "namespace": ns,
  305. }).Error("connecting to shim")
  306. err := r.cleanupAfterDeadShim(ctx, bundle, ns, id, pid)
  307. if err != nil {
  308. log.G(ctx).WithError(err).WithField("bundle", bundle.path).
  309. Error("cleaning up after dead shim")
  310. }
  311. continue
  312. }
  313. ropts, err := r.getRuncOptions(ctx, id)
  314. if err != nil {
  315. log.G(ctx).WithError(err).WithField("id", id).
  316. Error("get runtime options")
  317. continue
  318. }
  319. t, err := newTask(id, ns, pid, s, r.events,
  320. proc.NewRunc(ropts.RuntimeRoot, bundle.path, ns, ropts.Runtime, ropts.CriuPath, ropts.SystemdCgroup), r.tasks, bundle)
  321. if err != nil {
  322. log.G(ctx).WithError(err).Error("loading task type")
  323. continue
  324. }
  325. o = append(o, t)
  326. }
  327. return o, nil
  328. }
  329. func (r *Runtime) cleanupAfterDeadShim(ctx context.Context, bundle *bundle, ns, id string, pid int) error {
  330. ctx = namespaces.WithNamespace(ctx, ns)
  331. if err := r.terminate(ctx, bundle, ns, id); err != nil {
  332. if r.config.ShimDebug {
  333. return errors.Wrap(err, "failed to terminate task, leaving bundle for debugging")
  334. }
  335. log.G(ctx).WithError(err).Warn("failed to terminate task")
  336. }
  337. // Notify Client
  338. exitedAt := time.Now().UTC()
  339. r.events.Publish(ctx, runtime.TaskExitEventTopic, &eventstypes.TaskExit{
  340. ContainerID: id,
  341. ID: id,
  342. Pid: uint32(pid),
  343. ExitStatus: 128 + uint32(unix.SIGKILL),
  344. ExitedAt: exitedAt,
  345. })
  346. r.tasks.Delete(ctx, id)
  347. if err := bundle.Delete(); err != nil {
  348. log.G(ctx).WithError(err).Error("delete bundle")
  349. }
  350. r.events.Publish(ctx, runtime.TaskDeleteEventTopic, &eventstypes.TaskDelete{
  351. ContainerID: id,
  352. Pid: uint32(pid),
  353. ExitStatus: 128 + uint32(unix.SIGKILL),
  354. ExitedAt: exitedAt,
  355. })
  356. return nil
  357. }
  358. func (r *Runtime) terminate(ctx context.Context, bundle *bundle, ns, id string) error {
  359. rt, err := r.getRuntime(ctx, ns, id)
  360. if err != nil {
  361. return err
  362. }
  363. if err := rt.Delete(ctx, id, &runc.DeleteOpts{
  364. Force: true,
  365. }); err != nil {
  366. log.G(ctx).WithError(err).Warnf("delete runtime state %s", id)
  367. }
  368. if err := mount.Unmount(filepath.Join(bundle.path, "rootfs"), 0); err != nil {
  369. log.G(ctx).WithError(err).WithFields(logrus.Fields{
  370. "path": bundle.path,
  371. "id": id,
  372. }).Warnf("unmount task rootfs")
  373. }
  374. return nil
  375. }
  376. func (r *Runtime) getRuntime(ctx context.Context, ns, id string) (*runc.Runc, error) {
  377. ropts, err := r.getRuncOptions(ctx, id)
  378. if err != nil {
  379. return nil, err
  380. }
  381. var (
  382. cmd = r.config.Runtime
  383. root = proc.RuncRoot
  384. )
  385. if ropts != nil {
  386. if ropts.Runtime != "" {
  387. cmd = ropts.Runtime
  388. }
  389. if ropts.RuntimeRoot != "" {
  390. root = ropts.RuntimeRoot
  391. }
  392. }
  393. return &runc.Runc{
  394. Command: cmd,
  395. LogFormat: runc.JSON,
  396. PdeathSignal: unix.SIGKILL,
  397. Root: filepath.Join(root, ns),
  398. Debug: r.config.ShimDebug,
  399. }, nil
  400. }
  401. func (r *Runtime) getRuncOptions(ctx context.Context, id string) (*runctypes.RuncOptions, error) {
  402. var container containers.Container
  403. if err := r.db.View(func(tx *bolt.Tx) error {
  404. store := metadata.NewContainerStore(tx)
  405. var err error
  406. container, err = store.Get(ctx, id)
  407. return err
  408. }); err != nil {
  409. return nil, err
  410. }
  411. if container.Runtime.Options != nil {
  412. v, err := typeurl.UnmarshalAny(container.Runtime.Options)
  413. if err != nil {
  414. return nil, err
  415. }
  416. ropts, ok := v.(*runctypes.RuncOptions)
  417. if !ok {
  418. return nil, errors.New("invalid runtime options format")
  419. }
  420. return ropts, nil
  421. }
  422. return &runctypes.RuncOptions{}, nil
  423. }