remote_linux.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
  1. package libcontainerd
  2. import (
  3. "fmt"
  4. "io"
  5. "io/ioutil"
  6. "log"
  7. "net"
  8. "os"
  9. "os/exec"
  10. "path/filepath"
  11. "strconv"
  12. "strings"
  13. "sync"
  14. "syscall"
  15. "time"
  16. "github.com/Sirupsen/logrus"
  17. containerd "github.com/docker/containerd/api/grpc/types"
  18. "github.com/docker/docker/pkg/locker"
  19. sysinfo "github.com/docker/docker/pkg/system"
  20. "github.com/docker/docker/utils"
  21. "github.com/golang/protobuf/ptypes"
  22. "github.com/golang/protobuf/ptypes/timestamp"
  23. rsystem "github.com/opencontainers/runc/libcontainer/system"
  24. "golang.org/x/net/context"
  25. "google.golang.org/grpc"
  26. "google.golang.org/grpc/grpclog"
  27. "google.golang.org/grpc/health/grpc_health_v1"
  28. "google.golang.org/grpc/transport"
  29. )
  30. const (
  31. maxConnectionRetryCount = 3
  32. connectionRetryDelay = 3 * time.Second
  33. containerdHealthCheckTimeout = 3 * time.Second
  34. containerdShutdownTimeout = 15 * time.Second
  35. containerdBinary = "docker-containerd"
  36. containerdPidFilename = "docker-containerd.pid"
  37. containerdSockFilename = "docker-containerd.sock"
  38. containerdStateDir = "containerd"
  39. eventTimestampFilename = "event.ts"
  40. )
  41. type remote struct {
  42. sync.RWMutex
  43. apiClient containerd.APIClient
  44. daemonPid int
  45. stateDir string
  46. rpcAddr string
  47. startDaemon bool
  48. closeManually bool
  49. debugLog bool
  50. rpcConn *grpc.ClientConn
  51. clients []*client
  52. eventTsPath string
  53. runtime string
  54. runtimeArgs []string
  55. daemonWaitCh chan struct{}
  56. liveRestore bool
  57. oomScore int
  58. restoreFromTimestamp *timestamp.Timestamp
  59. }
  60. // New creates a fresh instance of libcontainerd remote.
  61. func New(stateDir string, options ...RemoteOption) (_ Remote, err error) {
  62. defer func() {
  63. if err != nil {
  64. err = fmt.Errorf("Failed to connect to containerd. Please make sure containerd is installed in your PATH or you have specificed the correct address. Got error: %v", err)
  65. }
  66. }()
  67. r := &remote{
  68. stateDir: stateDir,
  69. daemonPid: -1,
  70. eventTsPath: filepath.Join(stateDir, eventTimestampFilename),
  71. }
  72. for _, option := range options {
  73. if err := option.Apply(r); err != nil {
  74. return nil, err
  75. }
  76. }
  77. if err := sysinfo.MkdirAll(stateDir, 0700); err != nil {
  78. return nil, err
  79. }
  80. if r.rpcAddr == "" {
  81. r.rpcAddr = filepath.Join(stateDir, containerdSockFilename)
  82. }
  83. if r.startDaemon {
  84. if err := r.runContainerdDaemon(); err != nil {
  85. return nil, err
  86. }
  87. }
  88. // don't output the grpc reconnect logging
  89. grpclog.SetLogger(log.New(ioutil.Discard, "", log.LstdFlags))
  90. dialOpts := append([]grpc.DialOption{grpc.WithInsecure()},
  91. grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
  92. return net.DialTimeout("unix", addr, timeout)
  93. }),
  94. )
  95. conn, err := grpc.Dial(r.rpcAddr, dialOpts...)
  96. if err != nil {
  97. return nil, fmt.Errorf("error connecting to containerd: %v", err)
  98. }
  99. r.rpcConn = conn
  100. r.apiClient = containerd.NewAPIClient(conn)
  101. // Get the timestamp to restore from
  102. t := r.getLastEventTimestamp()
  103. tsp, err := ptypes.TimestampProto(t)
  104. if err != nil {
  105. logrus.Errorf("libcontainerd: failed to convert timestamp: %q", err)
  106. }
  107. r.restoreFromTimestamp = tsp
  108. go r.handleConnectionChange()
  109. if err := r.startEventsMonitor(); err != nil {
  110. return nil, err
  111. }
  112. return r, nil
  113. }
  114. func (r *remote) UpdateOptions(options ...RemoteOption) error {
  115. for _, option := range options {
  116. if err := option.Apply(r); err != nil {
  117. return err
  118. }
  119. }
  120. return nil
  121. }
  122. func (r *remote) handleConnectionChange() {
  123. var transientFailureCount = 0
  124. ticker := time.NewTicker(500 * time.Millisecond)
  125. defer ticker.Stop()
  126. healthClient := grpc_health_v1.NewHealthClient(r.rpcConn)
  127. for {
  128. <-ticker.C
  129. ctx, cancel := context.WithTimeout(context.Background(), containerdHealthCheckTimeout)
  130. _, err := healthClient.Check(ctx, &grpc_health_v1.HealthCheckRequest{})
  131. cancel()
  132. if err == nil {
  133. continue
  134. }
  135. logrus.Debugf("libcontainerd: containerd health check returned error: %v", err)
  136. if r.daemonPid != -1 {
  137. if strings.Contains(err.Error(), "is closing") {
  138. // Well, we asked for it to stop, just return
  139. return
  140. }
  141. // all other errors are transient
  142. // Reset state to be notified of next failure
  143. transientFailureCount++
  144. if transientFailureCount >= maxConnectionRetryCount {
  145. transientFailureCount = 0
  146. if utils.IsProcessAlive(r.daemonPid) {
  147. utils.KillProcess(r.daemonPid)
  148. }
  149. <-r.daemonWaitCh
  150. if err := r.runContainerdDaemon(); err != nil { //FIXME: Handle error
  151. logrus.Errorf("libcontainerd: error restarting containerd: %v", err)
  152. }
  153. continue
  154. }
  155. }
  156. }
  157. }
  158. func (r *remote) Cleanup() {
  159. if r.daemonPid == -1 {
  160. return
  161. }
  162. r.closeManually = true
  163. r.rpcConn.Close()
  164. // Ask the daemon to quit
  165. syscall.Kill(r.daemonPid, syscall.SIGTERM)
  166. // Wait up to 15secs for it to stop
  167. for i := time.Duration(0); i < containerdShutdownTimeout; i += time.Second {
  168. if !utils.IsProcessAlive(r.daemonPid) {
  169. break
  170. }
  171. time.Sleep(time.Second)
  172. }
  173. if utils.IsProcessAlive(r.daemonPid) {
  174. logrus.Warnf("libcontainerd: containerd (%d) didn't stop within 15 secs, killing it\n", r.daemonPid)
  175. syscall.Kill(r.daemonPid, syscall.SIGKILL)
  176. }
  177. // cleanup some files
  178. os.Remove(filepath.Join(r.stateDir, containerdPidFilename))
  179. os.Remove(filepath.Join(r.stateDir, containerdSockFilename))
  180. }
  181. func (r *remote) Client(b Backend) (Client, error) {
  182. c := &client{
  183. clientCommon: clientCommon{
  184. backend: b,
  185. containers: make(map[string]*container),
  186. locker: locker.New(),
  187. },
  188. remote: r,
  189. exitNotifiers: make(map[string]*exitNotifier),
  190. liveRestore: r.liveRestore,
  191. }
  192. r.Lock()
  193. r.clients = append(r.clients, c)
  194. r.Unlock()
  195. return c, nil
  196. }
  197. func (r *remote) updateEventTimestamp(t time.Time) {
  198. f, err := os.OpenFile(r.eventTsPath, syscall.O_CREAT|syscall.O_WRONLY|syscall.O_TRUNC, 0600)
  199. if err != nil {
  200. logrus.Warnf("libcontainerd: failed to open event timestamp file: %v", err)
  201. return
  202. }
  203. defer f.Close()
  204. b, err := t.MarshalText()
  205. if err != nil {
  206. logrus.Warnf("libcontainerd: failed to encode timestamp: %v", err)
  207. return
  208. }
  209. n, err := f.Write(b)
  210. if err != nil || n != len(b) {
  211. logrus.Warnf("libcontainerd: failed to update event timestamp file: %v", err)
  212. f.Truncate(0)
  213. return
  214. }
  215. }
  216. func (r *remote) getLastEventTimestamp() time.Time {
  217. t := time.Now()
  218. fi, err := os.Stat(r.eventTsPath)
  219. if os.IsNotExist(err) || fi.Size() == 0 {
  220. return t
  221. }
  222. f, err := os.Open(r.eventTsPath)
  223. if err != nil {
  224. logrus.Warnf("libcontainerd: Unable to access last event ts: %v", err)
  225. return t
  226. }
  227. defer f.Close()
  228. b := make([]byte, fi.Size())
  229. n, err := f.Read(b)
  230. if err != nil || n != len(b) {
  231. logrus.Warnf("libcontainerd: Unable to read last event ts: %v", err)
  232. return t
  233. }
  234. t.UnmarshalText(b)
  235. return t
  236. }
  237. func (r *remote) startEventsMonitor() error {
  238. // First, get past events
  239. t := r.getLastEventTimestamp()
  240. tsp, err := ptypes.TimestampProto(t)
  241. if err != nil {
  242. logrus.Errorf("libcontainerd: failed to convert timestamp: %q", err)
  243. }
  244. er := &containerd.EventsRequest{
  245. Timestamp: tsp,
  246. }
  247. events, err := r.apiClient.Events(context.Background(), er)
  248. if err != nil {
  249. return err
  250. }
  251. go r.handleEventStream(events)
  252. return nil
  253. }
  254. func (r *remote) handleEventStream(events containerd.API_EventsClient) {
  255. for {
  256. e, err := events.Recv()
  257. if err != nil {
  258. if grpc.ErrorDesc(err) == transport.ErrConnClosing.Desc &&
  259. r.closeManually {
  260. // ignore error if grpc remote connection is closed manually
  261. return
  262. }
  263. logrus.Errorf("libcontainerd: failed to receive event from containerd: %v", err)
  264. go r.startEventsMonitor()
  265. return
  266. }
  267. logrus.Debugf("libcontainerd: received containerd event: %#v", e)
  268. var container *container
  269. var c *client
  270. r.RLock()
  271. for _, c = range r.clients {
  272. container, err = c.getContainer(e.Id)
  273. if err == nil {
  274. break
  275. }
  276. }
  277. r.RUnlock()
  278. if container == nil {
  279. logrus.Warnf("libcontainerd: unknown container %s", e.Id)
  280. continue
  281. }
  282. if err := container.handleEvent(e); err != nil {
  283. logrus.Errorf("libcontainerd: error processing state change for %s: %v", e.Id, err)
  284. }
  285. tsp, err := ptypes.Timestamp(e.Timestamp)
  286. if err != nil {
  287. logrus.Errorf("libcontainerd: failed to convert event timestamp: %q", err)
  288. continue
  289. }
  290. r.updateEventTimestamp(tsp)
  291. }
  292. }
  293. func (r *remote) runContainerdDaemon() error {
  294. pidFilename := filepath.Join(r.stateDir, containerdPidFilename)
  295. f, err := os.OpenFile(pidFilename, os.O_RDWR|os.O_CREATE, 0600)
  296. if err != nil {
  297. return err
  298. }
  299. defer f.Close()
  300. // File exist, check if the daemon is alive
  301. b := make([]byte, 8)
  302. n, err := f.Read(b)
  303. if err != nil && err != io.EOF {
  304. return err
  305. }
  306. if n > 0 {
  307. pid, err := strconv.ParseUint(string(b[:n]), 10, 64)
  308. if err != nil {
  309. return err
  310. }
  311. if utils.IsProcessAlive(int(pid)) {
  312. logrus.Infof("libcontainerd: previous instance of containerd still alive (%d)", pid)
  313. r.daemonPid = int(pid)
  314. return nil
  315. }
  316. }
  317. // rewind the file
  318. _, err = f.Seek(0, os.SEEK_SET)
  319. if err != nil {
  320. return err
  321. }
  322. // Truncate it
  323. err = f.Truncate(0)
  324. if err != nil {
  325. return err
  326. }
  327. // Start a new instance
  328. args := []string{
  329. "-l", fmt.Sprintf("unix://%s", r.rpcAddr),
  330. "--shim", "docker-containerd-shim",
  331. "--metrics-interval=0",
  332. "--start-timeout", "2m",
  333. "--state-dir", filepath.Join(r.stateDir, containerdStateDir),
  334. }
  335. if r.runtime != "" {
  336. args = append(args, "--runtime")
  337. args = append(args, r.runtime)
  338. }
  339. if r.debugLog {
  340. args = append(args, "--debug")
  341. }
  342. if len(r.runtimeArgs) > 0 {
  343. for _, v := range r.runtimeArgs {
  344. args = append(args, "--runtime-args")
  345. args = append(args, v)
  346. }
  347. logrus.Debugf("libcontainerd: runContainerdDaemon: runtimeArgs: %s", args)
  348. }
  349. cmd := exec.Command(containerdBinary, args...)
  350. // redirect containerd logs to docker logs
  351. cmd.Stdout = os.Stdout
  352. cmd.Stderr = os.Stderr
  353. cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true, Pdeathsig: syscall.SIGKILL}
  354. cmd.Env = nil
  355. // clear the NOTIFY_SOCKET from the env when starting containerd
  356. for _, e := range os.Environ() {
  357. if !strings.HasPrefix(e, "NOTIFY_SOCKET") {
  358. cmd.Env = append(cmd.Env, e)
  359. }
  360. }
  361. if err := cmd.Start(); err != nil {
  362. return err
  363. }
  364. logrus.Infof("libcontainerd: new containerd process, pid: %d", cmd.Process.Pid)
  365. if err := setOOMScore(cmd.Process.Pid, r.oomScore); err != nil {
  366. utils.KillProcess(cmd.Process.Pid)
  367. return err
  368. }
  369. if _, err := f.WriteString(fmt.Sprintf("%d", cmd.Process.Pid)); err != nil {
  370. utils.KillProcess(cmd.Process.Pid)
  371. return err
  372. }
  373. r.daemonWaitCh = make(chan struct{})
  374. go func() {
  375. cmd.Wait()
  376. close(r.daemonWaitCh)
  377. }() // Reap our child when needed
  378. r.daemonPid = cmd.Process.Pid
  379. return nil
  380. }
  381. func setOOMScore(pid, score int) error {
  382. oomScoreAdjPath := fmt.Sprintf("/proc/%d/oom_score_adj", pid)
  383. f, err := os.OpenFile(oomScoreAdjPath, os.O_WRONLY, 0)
  384. if err != nil {
  385. return err
  386. }
  387. stringScore := strconv.Itoa(score)
  388. _, err = f.WriteString(stringScore)
  389. f.Close()
  390. if os.IsPermission(err) {
  391. // Setting oom_score_adj does not work in an
  392. // unprivileged container. Ignore the error, but log
  393. // it if we appear not to be in that situation.
  394. if !rsystem.RunningInUserNS() {
  395. logrus.Debugf("Permission denied writing %q to %s", stringScore, oomScoreAdjPath)
  396. }
  397. return nil
  398. }
  399. return err
  400. }
  401. // WithRemoteAddr sets the external containerd socket to connect to.
  402. func WithRemoteAddr(addr string) RemoteOption {
  403. return rpcAddr(addr)
  404. }
  405. type rpcAddr string
  406. func (a rpcAddr) Apply(r Remote) error {
  407. if remote, ok := r.(*remote); ok {
  408. remote.rpcAddr = string(a)
  409. return nil
  410. }
  411. return fmt.Errorf("WithRemoteAddr option not supported for this remote")
  412. }
  413. // WithRuntimePath sets the path of the runtime to be used as the
  414. // default by containerd
  415. func WithRuntimePath(rt string) RemoteOption {
  416. return runtimePath(rt)
  417. }
  418. type runtimePath string
  419. func (rt runtimePath) Apply(r Remote) error {
  420. if remote, ok := r.(*remote); ok {
  421. remote.runtime = string(rt)
  422. return nil
  423. }
  424. return fmt.Errorf("WithRuntime option not supported for this remote")
  425. }
  426. // WithRuntimeArgs sets the list of runtime args passed to containerd
  427. func WithRuntimeArgs(args []string) RemoteOption {
  428. return runtimeArgs(args)
  429. }
  430. type runtimeArgs []string
  431. func (rt runtimeArgs) Apply(r Remote) error {
  432. if remote, ok := r.(*remote); ok {
  433. remote.runtimeArgs = rt
  434. return nil
  435. }
  436. return fmt.Errorf("WithRuntimeArgs option not supported for this remote")
  437. }
  438. // WithStartDaemon defines if libcontainerd should also run containerd daemon.
  439. func WithStartDaemon(start bool) RemoteOption {
  440. return startDaemon(start)
  441. }
  442. type startDaemon bool
  443. func (s startDaemon) Apply(r Remote) error {
  444. if remote, ok := r.(*remote); ok {
  445. remote.startDaemon = bool(s)
  446. return nil
  447. }
  448. return fmt.Errorf("WithStartDaemon option not supported for this remote")
  449. }
  450. // WithDebugLog defines if containerd debug logs will be enabled for daemon.
  451. func WithDebugLog(debug bool) RemoteOption {
  452. return debugLog(debug)
  453. }
  454. type debugLog bool
  455. func (d debugLog) Apply(r Remote) error {
  456. if remote, ok := r.(*remote); ok {
  457. remote.debugLog = bool(d)
  458. return nil
  459. }
  460. return fmt.Errorf("WithDebugLog option not supported for this remote")
  461. }
  462. // WithLiveRestore defines if containers are stopped on shutdown or restored.
  463. func WithLiveRestore(v bool) RemoteOption {
  464. return liveRestore(v)
  465. }
  466. type liveRestore bool
  467. func (l liveRestore) Apply(r Remote) error {
  468. if remote, ok := r.(*remote); ok {
  469. remote.liveRestore = bool(l)
  470. for _, c := range remote.clients {
  471. c.liveRestore = bool(l)
  472. }
  473. return nil
  474. }
  475. return fmt.Errorf("WithLiveRestore option not supported for this remote")
  476. }
  477. // WithOOMScore defines the oom_score_adj to set for the containerd process.
  478. func WithOOMScore(score int) RemoteOption {
  479. return oomScore(score)
  480. }
  481. type oomScore int
  482. func (o oomScore) Apply(r Remote) error {
  483. if remote, ok := r.(*remote); ok {
  484. remote.oomScore = int(o)
  485. return nil
  486. }
  487. return fmt.Errorf("WithOOMScore option not supported for this remote")
  488. }