init.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496
  1. // +build !windows
  2. /*
  3. Copyright The containerd Authors.
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License.
  13. */
  14. package proc
  15. import (
  16. "context"
  17. "encoding/json"
  18. "fmt"
  19. "io"
  20. "os"
  21. "path/filepath"
  22. "strings"
  23. "sync"
  24. "syscall"
  25. "time"
  26. "github.com/containerd/console"
  27. "github.com/containerd/containerd/log"
  28. "github.com/containerd/containerd/mount"
  29. "github.com/containerd/containerd/runtime/proc"
  30. "github.com/containerd/fifo"
  31. runc "github.com/containerd/go-runc"
  32. google_protobuf "github.com/gogo/protobuf/types"
  33. specs "github.com/opencontainers/runtime-spec/specs-go"
  34. "github.com/pkg/errors"
  35. )
  36. // InitPidFile name of the file that contains the init pid
  37. const InitPidFile = "init.pid"
  38. // Init represents an initial process for a container
  39. type Init struct {
  40. wg sync.WaitGroup
  41. initState initState
  42. // mu is used to ensure that `Start()` and `Exited()` calls return in
  43. // the right order when invoked in separate go routines.
  44. // This is the case within the shim implementation as it makes use of
  45. // the reaper interface.
  46. mu sync.Mutex
  47. waitBlock chan struct{}
  48. WorkDir string
  49. id string
  50. Bundle string
  51. console console.Console
  52. Platform proc.Platform
  53. io runc.IO
  54. runtime *runc.Runc
  55. status int
  56. exited time.Time
  57. pid int
  58. closers []io.Closer
  59. stdin io.Closer
  60. stdio proc.Stdio
  61. Rootfs string
  62. IoUID int
  63. IoGID int
  64. NoPivotRoot bool
  65. NoNewKeyring bool
  66. CriuWorkPath string
  67. }
  68. // NewRunc returns a new runc instance for a process
  69. func NewRunc(root, path, namespace, runtime, criu string, systemd bool) *runc.Runc {
  70. if root == "" {
  71. root = RuncRoot
  72. }
  73. return &runc.Runc{
  74. Command: runtime,
  75. Log: filepath.Join(path, "log.json"),
  76. LogFormat: runc.JSON,
  77. PdeathSignal: syscall.SIGKILL,
  78. Root: filepath.Join(root, namespace),
  79. Criu: criu,
  80. SystemdCgroup: systemd,
  81. }
  82. }
  83. // New returns a new process
  84. func New(id string, runtime *runc.Runc, stdio proc.Stdio) *Init {
  85. p := &Init{
  86. id: id,
  87. runtime: runtime,
  88. stdio: stdio,
  89. status: 0,
  90. waitBlock: make(chan struct{}),
  91. }
  92. p.initState = &createdState{p: p}
  93. return p
  94. }
  95. // Create the process with the provided config
  96. func (p *Init) Create(ctx context.Context, r *CreateConfig) error {
  97. var (
  98. err error
  99. socket *runc.Socket
  100. )
  101. if r.Terminal {
  102. if socket, err = runc.NewTempConsoleSocket(); err != nil {
  103. return errors.Wrap(err, "failed to create OCI runtime console socket")
  104. }
  105. defer socket.Close()
  106. } else if hasNoIO(r) {
  107. if p.io, err = runc.NewNullIO(); err != nil {
  108. return errors.Wrap(err, "creating new NULL IO")
  109. }
  110. } else {
  111. if p.io, err = runc.NewPipeIO(p.IoUID, p.IoGID, withConditionalIO(p.stdio)); err != nil {
  112. return errors.Wrap(err, "failed to create OCI runtime io pipes")
  113. }
  114. }
  115. pidFile := filepath.Join(p.Bundle, InitPidFile)
  116. if r.Checkpoint != "" {
  117. opts := &runc.RestoreOpts{
  118. CheckpointOpts: runc.CheckpointOpts{
  119. ImagePath: r.Checkpoint,
  120. WorkDir: p.CriuWorkPath,
  121. ParentPath: r.ParentCheckpoint,
  122. },
  123. PidFile: pidFile,
  124. IO: p.io,
  125. NoPivot: p.NoPivotRoot,
  126. Detach: true,
  127. NoSubreaper: true,
  128. }
  129. p.initState = &createdCheckpointState{
  130. p: p,
  131. opts: opts,
  132. }
  133. return nil
  134. }
  135. opts := &runc.CreateOpts{
  136. PidFile: pidFile,
  137. IO: p.io,
  138. NoPivot: p.NoPivotRoot,
  139. NoNewKeyring: p.NoNewKeyring,
  140. }
  141. if socket != nil {
  142. opts.ConsoleSocket = socket
  143. }
  144. if err := p.runtime.Create(ctx, r.ID, r.Bundle, opts); err != nil {
  145. return p.runtimeError(err, "OCI runtime create failed")
  146. }
  147. if r.Stdin != "" {
  148. sc, err := fifo.OpenFifo(context.Background(), r.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0)
  149. if err != nil {
  150. return errors.Wrapf(err, "failed to open stdin fifo %s", r.Stdin)
  151. }
  152. p.stdin = sc
  153. p.closers = append(p.closers, sc)
  154. }
  155. var copyWaitGroup sync.WaitGroup
  156. ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
  157. defer cancel()
  158. if socket != nil {
  159. console, err := socket.ReceiveMaster()
  160. if err != nil {
  161. return errors.Wrap(err, "failed to retrieve console master")
  162. }
  163. console, err = p.Platform.CopyConsole(ctx, console, r.Stdin, r.Stdout, r.Stderr, &p.wg, &copyWaitGroup)
  164. if err != nil {
  165. return errors.Wrap(err, "failed to start console copy")
  166. }
  167. p.console = console
  168. } else if !hasNoIO(r) {
  169. if err := copyPipes(ctx, p.io, r.Stdin, r.Stdout, r.Stderr, &p.wg, &copyWaitGroup); err != nil {
  170. return errors.Wrap(err, "failed to start io pipe copy")
  171. }
  172. }
  173. copyWaitGroup.Wait()
  174. pid, err := runc.ReadPidFile(pidFile)
  175. if err != nil {
  176. return errors.Wrap(err, "failed to retrieve OCI runtime container pid")
  177. }
  178. p.pid = pid
  179. return nil
  180. }
  181. // Wait for the process to exit
  182. func (p *Init) Wait() {
  183. <-p.waitBlock
  184. }
  185. // ID of the process
  186. func (p *Init) ID() string {
  187. return p.id
  188. }
  189. // Pid of the process
  190. func (p *Init) Pid() int {
  191. return p.pid
  192. }
  193. // ExitStatus of the process
  194. func (p *Init) ExitStatus() int {
  195. p.mu.Lock()
  196. defer p.mu.Unlock()
  197. return p.status
  198. }
  199. // ExitedAt at time when the process exited
  200. func (p *Init) ExitedAt() time.Time {
  201. p.mu.Lock()
  202. defer p.mu.Unlock()
  203. return p.exited
  204. }
  205. // Status of the process
  206. func (p *Init) Status(ctx context.Context) (string, error) {
  207. p.mu.Lock()
  208. defer p.mu.Unlock()
  209. c, err := p.runtime.State(ctx, p.id)
  210. if err != nil {
  211. if strings.Contains(err.Error(), "does not exist") {
  212. return "stopped", nil
  213. }
  214. return "", p.runtimeError(err, "OCI runtime state failed")
  215. }
  216. return c.Status, nil
  217. }
  218. // Start the init process
  219. func (p *Init) Start(ctx context.Context) error {
  220. p.mu.Lock()
  221. defer p.mu.Unlock()
  222. return p.initState.Start(ctx)
  223. }
  224. func (p *Init) start(ctx context.Context) error {
  225. err := p.runtime.Start(ctx, p.id)
  226. return p.runtimeError(err, "OCI runtime start failed")
  227. }
  228. // SetExited of the init process with the next status
  229. func (p *Init) SetExited(status int) {
  230. p.mu.Lock()
  231. defer p.mu.Unlock()
  232. p.initState.SetExited(status)
  233. }
  234. func (p *Init) setExited(status int) {
  235. p.exited = time.Now()
  236. p.status = status
  237. p.Platform.ShutdownConsole(context.Background(), p.console)
  238. close(p.waitBlock)
  239. }
  240. // Delete the init process
  241. func (p *Init) Delete(ctx context.Context) error {
  242. p.mu.Lock()
  243. defer p.mu.Unlock()
  244. return p.initState.Delete(ctx)
  245. }
  246. func (p *Init) delete(ctx context.Context) error {
  247. p.wg.Wait()
  248. err := p.runtime.Delete(ctx, p.id, nil)
  249. // ignore errors if a runtime has already deleted the process
  250. // but we still hold metadata and pipes
  251. //
  252. // this is common during a checkpoint, runc will delete the container state
  253. // after a checkpoint and the container will no longer exist within runc
  254. if err != nil {
  255. if strings.Contains(err.Error(), "does not exist") {
  256. err = nil
  257. } else {
  258. err = p.runtimeError(err, "failed to delete task")
  259. }
  260. }
  261. if p.io != nil {
  262. for _, c := range p.closers {
  263. c.Close()
  264. }
  265. p.io.Close()
  266. }
  267. if err2 := mount.UnmountAll(p.Rootfs, 0); err2 != nil {
  268. log.G(ctx).WithError(err2).Warn("failed to cleanup rootfs mount")
  269. if err == nil {
  270. err = errors.Wrap(err2, "failed rootfs umount")
  271. }
  272. }
  273. return err
  274. }
  275. // Resize the init processes console
  276. func (p *Init) Resize(ws console.WinSize) error {
  277. p.mu.Lock()
  278. defer p.mu.Unlock()
  279. if p.console == nil {
  280. return nil
  281. }
  282. return p.console.Resize(ws)
  283. }
  284. func (p *Init) resize(ws console.WinSize) error {
  285. if p.console == nil {
  286. return nil
  287. }
  288. return p.console.Resize(ws)
  289. }
  290. // Pause the init process and all its child processes
  291. func (p *Init) Pause(ctx context.Context) error {
  292. p.mu.Lock()
  293. defer p.mu.Unlock()
  294. return p.initState.Pause(ctx)
  295. }
  296. // Resume the init process and all its child processes
  297. func (p *Init) Resume(ctx context.Context) error {
  298. p.mu.Lock()
  299. defer p.mu.Unlock()
  300. return p.initState.Resume(ctx)
  301. }
  302. // Kill the init process
  303. func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error {
  304. p.mu.Lock()
  305. defer p.mu.Unlock()
  306. return p.initState.Kill(ctx, signal, all)
  307. }
  308. func (p *Init) kill(ctx context.Context, signal uint32, all bool) error {
  309. err := p.runtime.Kill(ctx, p.id, int(signal), &runc.KillOpts{
  310. All: all,
  311. })
  312. return checkKillError(err)
  313. }
  314. // KillAll processes belonging to the init process
  315. func (p *Init) KillAll(ctx context.Context) error {
  316. p.mu.Lock()
  317. defer p.mu.Unlock()
  318. err := p.runtime.Kill(ctx, p.id, int(syscall.SIGKILL), &runc.KillOpts{
  319. All: true,
  320. })
  321. return p.runtimeError(err, "OCI runtime killall failed")
  322. }
  323. // Stdin of the process
  324. func (p *Init) Stdin() io.Closer {
  325. return p.stdin
  326. }
  327. // Runtime returns the OCI runtime configured for the init process
  328. func (p *Init) Runtime() *runc.Runc {
  329. return p.runtime
  330. }
  331. // Exec returns a new child process
  332. func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) {
  333. p.mu.Lock()
  334. defer p.mu.Unlock()
  335. return p.initState.Exec(ctx, path, r)
  336. }
  337. // exec returns a new exec'd process
  338. func (p *Init) exec(ctx context.Context, path string, r *ExecConfig) (proc.Process, error) {
  339. // process exec request
  340. var spec specs.Process
  341. if err := json.Unmarshal(r.Spec.Value, &spec); err != nil {
  342. return nil, err
  343. }
  344. spec.Terminal = r.Terminal
  345. e := &execProcess{
  346. id: r.ID,
  347. path: path,
  348. parent: p,
  349. spec: spec,
  350. stdio: proc.Stdio{
  351. Stdin: r.Stdin,
  352. Stdout: r.Stdout,
  353. Stderr: r.Stderr,
  354. Terminal: r.Terminal,
  355. },
  356. waitBlock: make(chan struct{}),
  357. pid: &safePid{},
  358. }
  359. e.execState = &execCreatedState{p: e}
  360. return e, nil
  361. }
  362. // Checkpoint the init process
  363. func (p *Init) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
  364. p.mu.Lock()
  365. defer p.mu.Unlock()
  366. return p.initState.Checkpoint(ctx, r)
  367. }
  368. func (p *Init) checkpoint(ctx context.Context, r *CheckpointConfig) error {
  369. var actions []runc.CheckpointAction
  370. if !r.Exit {
  371. actions = append(actions, runc.LeaveRunning)
  372. }
  373. // keep criu work directory if criu work dir is set
  374. work := r.WorkDir
  375. if work == "" {
  376. work = filepath.Join(p.WorkDir, "criu-work")
  377. defer os.RemoveAll(work)
  378. }
  379. if err := p.runtime.Checkpoint(ctx, p.id, &runc.CheckpointOpts{
  380. WorkDir: work,
  381. ImagePath: r.Path,
  382. AllowOpenTCP: r.AllowOpenTCP,
  383. AllowExternalUnixSockets: r.AllowExternalUnixSockets,
  384. AllowTerminal: r.AllowTerminal,
  385. FileLocks: r.FileLocks,
  386. EmptyNamespaces: r.EmptyNamespaces,
  387. }, actions...); err != nil {
  388. dumpLog := filepath.Join(p.Bundle, "criu-dump.log")
  389. if cerr := copyFile(dumpLog, filepath.Join(work, "dump.log")); cerr != nil {
  390. log.G(ctx).Error(err)
  391. }
  392. return fmt.Errorf("%s path= %s", criuError(err), dumpLog)
  393. }
  394. return nil
  395. }
  396. // Update the processes resource configuration
  397. func (p *Init) Update(ctx context.Context, r *google_protobuf.Any) error {
  398. p.mu.Lock()
  399. defer p.mu.Unlock()
  400. return p.initState.Update(ctx, r)
  401. }
  402. func (p *Init) update(ctx context.Context, r *google_protobuf.Any) error {
  403. var resources specs.LinuxResources
  404. if err := json.Unmarshal(r.Value, &resources); err != nil {
  405. return err
  406. }
  407. return p.runtime.Update(ctx, p.id, &resources)
  408. }
  409. // Stdio of the process
  410. func (p *Init) Stdio() proc.Stdio {
  411. return p.stdio
  412. }
  413. func (p *Init) runtimeError(rErr error, msg string) error {
  414. if rErr == nil {
  415. return nil
  416. }
  417. rMsg, err := getLastRuntimeError(p.runtime)
  418. switch {
  419. case err != nil:
  420. return errors.Wrapf(rErr, "%s: %s (%s)", msg, "unable to retrieve OCI runtime error", err.Error())
  421. case rMsg == "":
  422. return errors.Wrap(rErr, msg)
  423. default:
  424. return errors.Errorf("%s: %s", msg, rMsg)
  425. }
  426. }
  427. func withConditionalIO(c proc.Stdio) runc.IOOpt {
  428. return func(o *runc.IOOption) {
  429. o.OpenStdin = c.Stdin != ""
  430. o.OpenStdout = c.Stdout != ""
  431. o.OpenStderr = c.Stderr != ""
  432. }
  433. }