init.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. // +build !windows
  2. /*
  3. Copyright The containerd Authors.
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License.
  13. */
  14. package process
  15. import (
  16. "context"
  17. "encoding/json"
  18. "fmt"
  19. "io"
  20. "os"
  21. "path/filepath"
  22. "strings"
  23. "sync"
  24. "syscall"
  25. "time"
  26. "github.com/containerd/console"
  27. "github.com/containerd/containerd/log"
  28. "github.com/containerd/containerd/mount"
  29. "github.com/containerd/containerd/pkg/stdio"
  30. "github.com/containerd/fifo"
  31. runc "github.com/containerd/go-runc"
  32. google_protobuf "github.com/gogo/protobuf/types"
  33. specs "github.com/opencontainers/runtime-spec/specs-go"
  34. "github.com/pkg/errors"
  35. )
  36. // Init represents an initial process for a container
  37. type Init struct {
  38. wg sync.WaitGroup
  39. initState initState
  40. // mu is used to ensure that `Start()` and `Exited()` calls return in
  41. // the right order when invoked in separate go routines.
  42. // This is the case within the shim implementation as it makes use of
  43. // the reaper interface.
  44. mu sync.Mutex
  45. waitBlock chan struct{}
  46. WorkDir string
  47. id string
  48. Bundle string
  49. console console.Console
  50. Platform stdio.Platform
  51. io *processIO
  52. runtime *runc.Runc
  53. // pausing preserves the pausing state.
  54. pausing *atomicBool
  55. status int
  56. exited time.Time
  57. pid int
  58. closers []io.Closer
  59. stdin io.Closer
  60. stdio stdio.Stdio
  61. Rootfs string
  62. IoUID int
  63. IoGID int
  64. NoPivotRoot bool
  65. NoNewKeyring bool
  66. CriuWorkPath string
  67. }
  68. // NewRunc returns a new runc instance for a process
  69. func NewRunc(root, path, namespace, runtime, criu string, systemd bool) *runc.Runc {
  70. if root == "" {
  71. root = RuncRoot
  72. }
  73. return &runc.Runc{
  74. Command: runtime,
  75. Log: filepath.Join(path, "log.json"),
  76. LogFormat: runc.JSON,
  77. PdeathSignal: syscall.SIGKILL,
  78. Root: filepath.Join(root, namespace),
  79. Criu: criu,
  80. SystemdCgroup: systemd,
  81. }
  82. }
  83. // New returns a new process
  84. func New(id string, runtime *runc.Runc, stdio stdio.Stdio) *Init {
  85. p := &Init{
  86. id: id,
  87. runtime: runtime,
  88. pausing: new(atomicBool),
  89. stdio: stdio,
  90. status: 0,
  91. waitBlock: make(chan struct{}),
  92. }
  93. p.initState = &createdState{p: p}
  94. return p
  95. }
  96. // Create the process with the provided config
  97. func (p *Init) Create(ctx context.Context, r *CreateConfig) error {
  98. var (
  99. err error
  100. socket *runc.Socket
  101. pio *processIO
  102. pidFile = newPidFile(p.Bundle)
  103. )
  104. if r.Terminal {
  105. if socket, err = runc.NewTempConsoleSocket(); err != nil {
  106. return errors.Wrap(err, "failed to create OCI runtime console socket")
  107. }
  108. defer socket.Close()
  109. } else {
  110. if pio, err = createIO(ctx, p.id, p.IoUID, p.IoGID, p.stdio); err != nil {
  111. return errors.Wrap(err, "failed to create init process I/O")
  112. }
  113. p.io = pio
  114. }
  115. if r.Checkpoint != "" {
  116. return p.createCheckpointedState(r, pidFile)
  117. }
  118. opts := &runc.CreateOpts{
  119. PidFile: pidFile.Path(),
  120. NoPivot: p.NoPivotRoot,
  121. NoNewKeyring: p.NoNewKeyring,
  122. }
  123. if p.io != nil {
  124. opts.IO = p.io.IO()
  125. }
  126. if socket != nil {
  127. opts.ConsoleSocket = socket
  128. }
  129. if err := p.runtime.Create(ctx, r.ID, r.Bundle, opts); err != nil {
  130. return p.runtimeError(err, "OCI runtime create failed")
  131. }
  132. if r.Stdin != "" {
  133. if err := p.openStdin(r.Stdin); err != nil {
  134. return err
  135. }
  136. }
  137. ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
  138. defer cancel()
  139. if socket != nil {
  140. console, err := socket.ReceiveMaster()
  141. if err != nil {
  142. return errors.Wrap(err, "failed to retrieve console master")
  143. }
  144. console, err = p.Platform.CopyConsole(ctx, console, r.Stdin, r.Stdout, r.Stderr, &p.wg)
  145. if err != nil {
  146. return errors.Wrap(err, "failed to start console copy")
  147. }
  148. p.console = console
  149. } else {
  150. if err := pio.Copy(ctx, &p.wg); err != nil {
  151. return errors.Wrap(err, "failed to start io pipe copy")
  152. }
  153. }
  154. pid, err := pidFile.Read()
  155. if err != nil {
  156. return errors.Wrap(err, "failed to retrieve OCI runtime container pid")
  157. }
  158. p.pid = pid
  159. return nil
  160. }
  161. func (p *Init) openStdin(path string) error {
  162. sc, err := fifo.OpenFifo(context.Background(), path, syscall.O_WRONLY|syscall.O_NONBLOCK, 0)
  163. if err != nil {
  164. return errors.Wrapf(err, "failed to open stdin fifo %s", path)
  165. }
  166. p.stdin = sc
  167. p.closers = append(p.closers, sc)
  168. return nil
  169. }
  170. func (p *Init) createCheckpointedState(r *CreateConfig, pidFile *pidFile) error {
  171. opts := &runc.RestoreOpts{
  172. CheckpointOpts: runc.CheckpointOpts{
  173. ImagePath: r.Checkpoint,
  174. WorkDir: p.CriuWorkPath,
  175. ParentPath: r.ParentCheckpoint,
  176. },
  177. PidFile: pidFile.Path(),
  178. IO: p.io.IO(),
  179. NoPivot: p.NoPivotRoot,
  180. Detach: true,
  181. NoSubreaper: true,
  182. }
  183. p.initState = &createdCheckpointState{
  184. p: p,
  185. opts: opts,
  186. }
  187. return nil
  188. }
  189. // Wait for the process to exit
  190. func (p *Init) Wait() {
  191. <-p.waitBlock
  192. }
  193. // ID of the process
  194. func (p *Init) ID() string {
  195. return p.id
  196. }
  197. // Pid of the process
  198. func (p *Init) Pid() int {
  199. return p.pid
  200. }
  201. // ExitStatus of the process
  202. func (p *Init) ExitStatus() int {
  203. p.mu.Lock()
  204. defer p.mu.Unlock()
  205. return p.status
  206. }
  207. // ExitedAt at time when the process exited
  208. func (p *Init) ExitedAt() time.Time {
  209. p.mu.Lock()
  210. defer p.mu.Unlock()
  211. return p.exited
  212. }
  213. // Status of the process
  214. func (p *Init) Status(ctx context.Context) (string, error) {
  215. if p.pausing.get() {
  216. return "pausing", nil
  217. }
  218. p.mu.Lock()
  219. defer p.mu.Unlock()
  220. return p.initState.Status(ctx)
  221. }
  222. // Start the init process
  223. func (p *Init) Start(ctx context.Context) error {
  224. p.mu.Lock()
  225. defer p.mu.Unlock()
  226. return p.initState.Start(ctx)
  227. }
  228. func (p *Init) start(ctx context.Context) error {
  229. err := p.runtime.Start(ctx, p.id)
  230. return p.runtimeError(err, "OCI runtime start failed")
  231. }
  232. // SetExited of the init process with the next status
  233. func (p *Init) SetExited(status int) {
  234. p.mu.Lock()
  235. defer p.mu.Unlock()
  236. p.initState.SetExited(status)
  237. }
  238. func (p *Init) setExited(status int) {
  239. p.exited = time.Now()
  240. p.status = status
  241. p.Platform.ShutdownConsole(context.Background(), p.console)
  242. close(p.waitBlock)
  243. }
  244. // Delete the init process
  245. func (p *Init) Delete(ctx context.Context) error {
  246. p.mu.Lock()
  247. defer p.mu.Unlock()
  248. return p.initState.Delete(ctx)
  249. }
  250. func (p *Init) delete(ctx context.Context) error {
  251. waitTimeout(ctx, &p.wg, 2*time.Second)
  252. err := p.runtime.Delete(ctx, p.id, nil)
  253. // ignore errors if a runtime has already deleted the process
  254. // but we still hold metadata and pipes
  255. //
  256. // this is common during a checkpoint, runc will delete the container state
  257. // after a checkpoint and the container will no longer exist within runc
  258. if err != nil {
  259. if strings.Contains(err.Error(), "does not exist") {
  260. err = nil
  261. } else {
  262. err = p.runtimeError(err, "failed to delete task")
  263. }
  264. }
  265. if p.io != nil {
  266. for _, c := range p.closers {
  267. c.Close()
  268. }
  269. p.io.Close()
  270. }
  271. if err2 := mount.UnmountAll(p.Rootfs, 0); err2 != nil {
  272. log.G(ctx).WithError(err2).Warn("failed to cleanup rootfs mount")
  273. if err == nil {
  274. err = errors.Wrap(err2, "failed rootfs umount")
  275. }
  276. }
  277. return err
  278. }
  279. // Resize the init processes console
  280. func (p *Init) Resize(ws console.WinSize) error {
  281. p.mu.Lock()
  282. defer p.mu.Unlock()
  283. if p.console == nil {
  284. return nil
  285. }
  286. return p.console.Resize(ws)
  287. }
  288. // Pause the init process and all its child processes
  289. func (p *Init) Pause(ctx context.Context) error {
  290. p.mu.Lock()
  291. defer p.mu.Unlock()
  292. return p.initState.Pause(ctx)
  293. }
  294. // Resume the init process and all its child processes
  295. func (p *Init) Resume(ctx context.Context) error {
  296. p.mu.Lock()
  297. defer p.mu.Unlock()
  298. return p.initState.Resume(ctx)
  299. }
  300. // Kill the init process
  301. func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error {
  302. p.mu.Lock()
  303. defer p.mu.Unlock()
  304. return p.initState.Kill(ctx, signal, all)
  305. }
  306. func (p *Init) kill(ctx context.Context, signal uint32, all bool) error {
  307. err := p.runtime.Kill(ctx, p.id, int(signal), &runc.KillOpts{
  308. All: all,
  309. })
  310. return checkKillError(err)
  311. }
  312. // KillAll processes belonging to the init process
  313. func (p *Init) KillAll(ctx context.Context) error {
  314. p.mu.Lock()
  315. defer p.mu.Unlock()
  316. err := p.runtime.Kill(ctx, p.id, int(syscall.SIGKILL), &runc.KillOpts{
  317. All: true,
  318. })
  319. return p.runtimeError(err, "OCI runtime killall failed")
  320. }
  321. // Stdin of the process
  322. func (p *Init) Stdin() io.Closer {
  323. return p.stdin
  324. }
  325. // Runtime returns the OCI runtime configured for the init process
  326. func (p *Init) Runtime() *runc.Runc {
  327. return p.runtime
  328. }
  329. // Exec returns a new child process
  330. func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
  331. p.mu.Lock()
  332. defer p.mu.Unlock()
  333. return p.initState.Exec(ctx, path, r)
  334. }
  335. // exec returns a new exec'd process
  336. func (p *Init) exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
  337. // process exec request
  338. var spec specs.Process
  339. if err := json.Unmarshal(r.Spec.Value, &spec); err != nil {
  340. return nil, err
  341. }
  342. spec.Terminal = r.Terminal
  343. e := &execProcess{
  344. id: r.ID,
  345. path: path,
  346. parent: p,
  347. spec: spec,
  348. stdio: stdio.Stdio{
  349. Stdin: r.Stdin,
  350. Stdout: r.Stdout,
  351. Stderr: r.Stderr,
  352. Terminal: r.Terminal,
  353. },
  354. waitBlock: make(chan struct{}),
  355. }
  356. e.execState = &execCreatedState{p: e}
  357. return e, nil
  358. }
  359. // Checkpoint the init process
  360. func (p *Init) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
  361. p.mu.Lock()
  362. defer p.mu.Unlock()
  363. return p.initState.Checkpoint(ctx, r)
  364. }
  365. func (p *Init) checkpoint(ctx context.Context, r *CheckpointConfig) error {
  366. var actions []runc.CheckpointAction
  367. if !r.Exit {
  368. actions = append(actions, runc.LeaveRunning)
  369. }
  370. // keep criu work directory if criu work dir is set
  371. work := r.WorkDir
  372. if work == "" {
  373. work = filepath.Join(p.WorkDir, "criu-work")
  374. defer os.RemoveAll(work)
  375. }
  376. if err := p.runtime.Checkpoint(ctx, p.id, &runc.CheckpointOpts{
  377. WorkDir: work,
  378. ImagePath: r.Path,
  379. AllowOpenTCP: r.AllowOpenTCP,
  380. AllowExternalUnixSockets: r.AllowExternalUnixSockets,
  381. AllowTerminal: r.AllowTerminal,
  382. FileLocks: r.FileLocks,
  383. EmptyNamespaces: r.EmptyNamespaces,
  384. }, actions...); err != nil {
  385. dumpLog := filepath.Join(p.Bundle, "criu-dump.log")
  386. if cerr := copyFile(dumpLog, filepath.Join(work, "dump.log")); cerr != nil {
  387. log.G(ctx).Error(err)
  388. }
  389. return fmt.Errorf("%s path= %s", criuError(err), dumpLog)
  390. }
  391. return nil
  392. }
  393. // Update the processes resource configuration
  394. func (p *Init) Update(ctx context.Context, r *google_protobuf.Any) error {
  395. p.mu.Lock()
  396. defer p.mu.Unlock()
  397. return p.initState.Update(ctx, r)
  398. }
  399. func (p *Init) update(ctx context.Context, r *google_protobuf.Any) error {
  400. var resources specs.LinuxResources
  401. if err := json.Unmarshal(r.Value, &resources); err != nil {
  402. return err
  403. }
  404. return p.runtime.Update(ctx, p.id, &resources)
  405. }
  406. // Stdio of the process
  407. func (p *Init) Stdio() stdio.Stdio {
  408. return p.stdio
  409. }
  410. func (p *Init) runtimeError(rErr error, msg string) error {
  411. if rErr == nil {
  412. return nil
  413. }
  414. rMsg, err := getLastRuntimeError(p.runtime)
  415. switch {
  416. case err != nil:
  417. return errors.Wrapf(rErr, "%s: %s (%s)", msg, "unable to retrieve OCI runtime error", err.Error())
  418. case rMsg == "":
  419. return errors.Wrap(rErr, msg)
  420. default:
  421. return errors.Errorf("%s: %s", msg, rMsg)
  422. }
  423. }
  424. func withConditionalIO(c stdio.Stdio) runc.IOOpt {
  425. return func(o *runc.IOOption) {
  426. o.OpenStdin = c.Stdin != ""
  427. o.OpenStdout = c.Stdout != ""
  428. o.OpenStderr = c.Stderr != ""
  429. }
  430. }