remote_linux.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541
  1. package libcontainerd
  2. import (
  3. "fmt"
  4. "io"
  5. "io/ioutil"
  6. "log"
  7. "net"
  8. "os"
  9. "os/exec"
  10. "path/filepath"
  11. "strconv"
  12. "strings"
  13. "sync"
  14. "syscall"
  15. "time"
  16. "github.com/Sirupsen/logrus"
  17. containerd "github.com/docker/containerd/api/grpc/types"
  18. "github.com/docker/docker/pkg/locker"
  19. sysinfo "github.com/docker/docker/pkg/system"
  20. "github.com/docker/docker/utils"
  21. "golang.org/x/net/context"
  22. "google.golang.org/grpc"
  23. "google.golang.org/grpc/grpclog"
  24. "google.golang.org/grpc/transport"
  25. )
  26. const (
  27. maxConnectionRetryCount = 3
  28. connectionRetryDelay = 3 * time.Second
  29. containerdShutdownTimeout = 15 * time.Second
  30. containerdBinary = "docker-containerd"
  31. containerdPidFilename = "docker-containerd.pid"
  32. containerdSockFilename = "docker-containerd.sock"
  33. containerdStateDir = "containerd"
  34. eventTimestampFilename = "event.ts"
  35. )
  36. type remote struct {
  37. sync.RWMutex
  38. apiClient containerd.APIClient
  39. daemonPid int
  40. stateDir string
  41. rpcAddr string
  42. startDaemon bool
  43. closeManually bool
  44. debugLog bool
  45. rpcConn *grpc.ClientConn
  46. clients []*client
  47. eventTsPath string
  48. pastEvents map[string]*containerd.Event
  49. runtime string
  50. runtimeArgs []string
  51. daemonWaitCh chan struct{}
  52. liveRestore bool
  53. oomScore int
  54. }
  55. // New creates a fresh instance of libcontainerd remote.
  56. func New(stateDir string, options ...RemoteOption) (_ Remote, err error) {
  57. defer func() {
  58. if err != nil {
  59. err = fmt.Errorf("Failed to connect to containerd. Please make sure containerd is installed in your PATH or you have specificed the correct address. Got error: %v", err)
  60. }
  61. }()
  62. r := &remote{
  63. stateDir: stateDir,
  64. daemonPid: -1,
  65. eventTsPath: filepath.Join(stateDir, eventTimestampFilename),
  66. pastEvents: make(map[string]*containerd.Event),
  67. }
  68. for _, option := range options {
  69. if err := option.Apply(r); err != nil {
  70. return nil, err
  71. }
  72. }
  73. if err := sysinfo.MkdirAll(stateDir, 0700); err != nil {
  74. return nil, err
  75. }
  76. if r.rpcAddr == "" {
  77. r.rpcAddr = filepath.Join(stateDir, containerdSockFilename)
  78. }
  79. if r.startDaemon {
  80. if err := r.runContainerdDaemon(); err != nil {
  81. return nil, err
  82. }
  83. }
  84. // don't output the grpc reconnect logging
  85. grpclog.SetLogger(log.New(ioutil.Discard, "", log.LstdFlags))
  86. dialOpts := append([]grpc.DialOption{grpc.WithInsecure()},
  87. grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
  88. return net.DialTimeout("unix", addr, timeout)
  89. }),
  90. )
  91. conn, err := grpc.Dial(r.rpcAddr, dialOpts...)
  92. if err != nil {
  93. return nil, fmt.Errorf("error connecting to containerd: %v", err)
  94. }
  95. r.rpcConn = conn
  96. r.apiClient = containerd.NewAPIClient(conn)
  97. go r.handleConnectionChange()
  98. if err := r.startEventsMonitor(); err != nil {
  99. return nil, err
  100. }
  101. return r, nil
  102. }
  103. func (r *remote) UpdateOptions(options ...RemoteOption) error {
  104. for _, option := range options {
  105. if err := option.Apply(r); err != nil {
  106. return err
  107. }
  108. }
  109. return nil
  110. }
  111. func (r *remote) handleConnectionChange() {
  112. var transientFailureCount = 0
  113. state := grpc.Idle
  114. for {
  115. s, err := r.rpcConn.WaitForStateChange(context.Background(), state)
  116. if err != nil {
  117. break
  118. }
  119. state = s
  120. logrus.Debugf("containerd connection state change: %v", s)
  121. if r.daemonPid != -1 {
  122. switch state {
  123. case grpc.TransientFailure:
  124. // Reset state to be notified of next failure
  125. transientFailureCount++
  126. if transientFailureCount >= maxConnectionRetryCount {
  127. transientFailureCount = 0
  128. if utils.IsProcessAlive(r.daemonPid) {
  129. utils.KillProcess(r.daemonPid)
  130. <-r.daemonWaitCh
  131. }
  132. if err := r.runContainerdDaemon(); err != nil { //FIXME: Handle error
  133. logrus.Errorf("error restarting containerd: %v", err)
  134. }
  135. } else {
  136. state = grpc.Idle
  137. time.Sleep(connectionRetryDelay)
  138. }
  139. case grpc.Shutdown:
  140. // Well, we asked for it to stop, just return
  141. return
  142. }
  143. }
  144. }
  145. }
  146. func (r *remote) Cleanup() {
  147. if r.daemonPid == -1 {
  148. return
  149. }
  150. r.closeManually = true
  151. r.rpcConn.Close()
  152. // Ask the daemon to quit
  153. syscall.Kill(r.daemonPid, syscall.SIGTERM)
  154. // Wait up to 15secs for it to stop
  155. for i := time.Duration(0); i < containerdShutdownTimeout; i += time.Second {
  156. if !utils.IsProcessAlive(r.daemonPid) {
  157. break
  158. }
  159. time.Sleep(time.Second)
  160. }
  161. if utils.IsProcessAlive(r.daemonPid) {
  162. logrus.Warnf("libcontainerd: containerd (%d) didn't stop within 15 secs, killing it\n", r.daemonPid)
  163. syscall.Kill(r.daemonPid, syscall.SIGKILL)
  164. }
  165. // cleanup some files
  166. os.Remove(filepath.Join(r.stateDir, containerdPidFilename))
  167. os.Remove(filepath.Join(r.stateDir, containerdSockFilename))
  168. }
  169. func (r *remote) Client(b Backend) (Client, error) {
  170. c := &client{
  171. clientCommon: clientCommon{
  172. backend: b,
  173. containers: make(map[string]*container),
  174. locker: locker.New(),
  175. },
  176. remote: r,
  177. exitNotifiers: make(map[string]*exitNotifier),
  178. liveRestore: r.liveRestore,
  179. }
  180. r.Lock()
  181. r.clients = append(r.clients, c)
  182. r.Unlock()
  183. return c, nil
  184. }
  185. func (r *remote) updateEventTimestamp(t time.Time) {
  186. f, err := os.OpenFile(r.eventTsPath, syscall.O_CREAT|syscall.O_WRONLY|syscall.O_TRUNC, 0600)
  187. defer f.Close()
  188. if err != nil {
  189. logrus.Warnf("libcontainerd: failed to open event timestamp file: %v", err)
  190. return
  191. }
  192. b, err := t.MarshalText()
  193. if err != nil {
  194. logrus.Warnf("libcontainerd: failed to encode timestamp: %v", err)
  195. return
  196. }
  197. n, err := f.Write(b)
  198. if err != nil || n != len(b) {
  199. logrus.Warnf("libcontainerd: failed to update event timestamp file: %v", err)
  200. f.Truncate(0)
  201. return
  202. }
  203. }
  204. func (r *remote) getLastEventTimestamp() int64 {
  205. t := time.Now()
  206. fi, err := os.Stat(r.eventTsPath)
  207. if os.IsNotExist(err) || fi.Size() == 0 {
  208. return t.Unix()
  209. }
  210. f, err := os.Open(r.eventTsPath)
  211. defer f.Close()
  212. if err != nil {
  213. logrus.Warnf("libcontainerd: Unable to access last event ts: %v", err)
  214. return t.Unix()
  215. }
  216. b := make([]byte, fi.Size())
  217. n, err := f.Read(b)
  218. if err != nil || n != len(b) {
  219. logrus.Warnf("libcontainerd: Unable to read last event ts: %v", err)
  220. return t.Unix()
  221. }
  222. t.UnmarshalText(b)
  223. return t.Unix()
  224. }
  225. func (r *remote) startEventsMonitor() error {
  226. // First, get past events
  227. er := &containerd.EventsRequest{
  228. Timestamp: uint64(r.getLastEventTimestamp()),
  229. }
  230. events, err := r.apiClient.Events(context.Background(), er)
  231. if err != nil {
  232. return err
  233. }
  234. go r.handleEventStream(events)
  235. return nil
  236. }
  237. func (r *remote) handleEventStream(events containerd.API_EventsClient) {
  238. live := false
  239. for {
  240. e, err := events.Recv()
  241. if err != nil {
  242. if grpc.ErrorDesc(err) == transport.ErrConnClosing.Desc &&
  243. r.closeManually {
  244. // ignore error if grpc remote connection is closed manually
  245. return
  246. }
  247. logrus.Errorf("failed to receive event from containerd: %v", err)
  248. go r.startEventsMonitor()
  249. return
  250. }
  251. if live == false {
  252. logrus.Debugf("received past containerd event: %#v", e)
  253. // Pause/Resume events should never happens after exit one
  254. switch e.Type {
  255. case StateExit:
  256. r.pastEvents[e.Id] = e
  257. case StatePause:
  258. r.pastEvents[e.Id] = e
  259. case StateResume:
  260. r.pastEvents[e.Id] = e
  261. case stateLive:
  262. live = true
  263. r.updateEventTimestamp(time.Unix(int64(e.Timestamp), 0))
  264. }
  265. } else {
  266. logrus.Debugf("received containerd event: %#v", e)
  267. var container *container
  268. var c *client
  269. r.RLock()
  270. for _, c = range r.clients {
  271. container, err = c.getContainer(e.Id)
  272. if err == nil {
  273. break
  274. }
  275. }
  276. r.RUnlock()
  277. if container == nil {
  278. logrus.Errorf("no state for container: %q", err)
  279. continue
  280. }
  281. if err := container.handleEvent(e); err != nil {
  282. logrus.Errorf("error processing state change for %s: %v", e.Id, err)
  283. }
  284. r.updateEventTimestamp(time.Unix(int64(e.Timestamp), 0))
  285. }
  286. }
  287. }
  288. func (r *remote) runContainerdDaemon() error {
  289. pidFilename := filepath.Join(r.stateDir, containerdPidFilename)
  290. f, err := os.OpenFile(pidFilename, os.O_RDWR|os.O_CREATE, 0600)
  291. defer f.Close()
  292. if err != nil {
  293. return err
  294. }
  295. // File exist, check if the daemon is alive
  296. b := make([]byte, 8)
  297. n, err := f.Read(b)
  298. if err != nil && err != io.EOF {
  299. return err
  300. }
  301. if n > 0 {
  302. pid, err := strconv.ParseUint(string(b[:n]), 10, 64)
  303. if err != nil {
  304. return err
  305. }
  306. if utils.IsProcessAlive(int(pid)) {
  307. logrus.Infof("previous instance of containerd still alive (%d)", pid)
  308. r.daemonPid = int(pid)
  309. return nil
  310. }
  311. }
  312. // rewind the file
  313. _, err = f.Seek(0, os.SEEK_SET)
  314. if err != nil {
  315. return err
  316. }
  317. // Truncate it
  318. err = f.Truncate(0)
  319. if err != nil {
  320. return err
  321. }
  322. // Start a new instance
  323. args := []string{
  324. "-l", fmt.Sprintf("unix://%s", r.rpcAddr),
  325. "--shim", "docker-containerd-shim",
  326. "--metrics-interval=0",
  327. "--start-timeout", "2m",
  328. "--state-dir", filepath.Join(r.stateDir, containerdStateDir),
  329. }
  330. if r.runtime != "" {
  331. args = append(args, "--runtime")
  332. args = append(args, r.runtime)
  333. }
  334. if r.debugLog {
  335. args = append(args, "--debug")
  336. }
  337. if len(r.runtimeArgs) > 0 {
  338. for _, v := range r.runtimeArgs {
  339. args = append(args, "--runtime-args")
  340. args = append(args, v)
  341. }
  342. logrus.Debugf("runContainerdDaemon: runtimeArgs: %s", args)
  343. }
  344. cmd := exec.Command(containerdBinary, args...)
  345. // redirect containerd logs to docker logs
  346. cmd.Stdout = os.Stdout
  347. cmd.Stderr = os.Stderr
  348. cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true, Pdeathsig: syscall.SIGKILL}
  349. cmd.Env = nil
  350. // clear the NOTIFY_SOCKET from the env when starting containerd
  351. for _, e := range os.Environ() {
  352. if !strings.HasPrefix(e, "NOTIFY_SOCKET") {
  353. cmd.Env = append(cmd.Env, e)
  354. }
  355. }
  356. if err := cmd.Start(); err != nil {
  357. return err
  358. }
  359. logrus.Infof("New containerd process, pid: %d", cmd.Process.Pid)
  360. if err := setOOMScore(cmd.Process.Pid, r.oomScore); err != nil {
  361. utils.KillProcess(cmd.Process.Pid)
  362. return err
  363. }
  364. if _, err := f.WriteString(fmt.Sprintf("%d", cmd.Process.Pid)); err != nil {
  365. utils.KillProcess(cmd.Process.Pid)
  366. return err
  367. }
  368. r.daemonWaitCh = make(chan struct{})
  369. go func() {
  370. cmd.Wait()
  371. close(r.daemonWaitCh)
  372. }() // Reap our child when needed
  373. r.daemonPid = cmd.Process.Pid
  374. return nil
  375. }
  376. func setOOMScore(pid, score int) error {
  377. f, err := os.OpenFile(fmt.Sprintf("/proc/%d/oom_score_adj", pid), os.O_WRONLY, 0)
  378. if err != nil {
  379. return err
  380. }
  381. _, err = f.WriteString(strconv.Itoa(score))
  382. f.Close()
  383. return err
  384. }
  385. // WithRemoteAddr sets the external containerd socket to connect to.
  386. func WithRemoteAddr(addr string) RemoteOption {
  387. return rpcAddr(addr)
  388. }
  389. type rpcAddr string
  390. func (a rpcAddr) Apply(r Remote) error {
  391. if remote, ok := r.(*remote); ok {
  392. remote.rpcAddr = string(a)
  393. return nil
  394. }
  395. return fmt.Errorf("WithRemoteAddr option not supported for this remote")
  396. }
  397. // WithRuntimePath sets the path of the runtime to be used as the
  398. // default by containerd
  399. func WithRuntimePath(rt string) RemoteOption {
  400. return runtimePath(rt)
  401. }
  402. type runtimePath string
  403. func (rt runtimePath) Apply(r Remote) error {
  404. if remote, ok := r.(*remote); ok {
  405. remote.runtime = string(rt)
  406. return nil
  407. }
  408. return fmt.Errorf("WithRuntime option not supported for this remote")
  409. }
  410. // WithRuntimeArgs sets the list of runtime args passed to containerd
  411. func WithRuntimeArgs(args []string) RemoteOption {
  412. return runtimeArgs(args)
  413. }
  414. type runtimeArgs []string
  415. func (rt runtimeArgs) Apply(r Remote) error {
  416. if remote, ok := r.(*remote); ok {
  417. remote.runtimeArgs = rt
  418. return nil
  419. }
  420. return fmt.Errorf("WithRuntimeArgs option not supported for this remote")
  421. }
  422. // WithStartDaemon defines if libcontainerd should also run containerd daemon.
  423. func WithStartDaemon(start bool) RemoteOption {
  424. return startDaemon(start)
  425. }
  426. type startDaemon bool
  427. func (s startDaemon) Apply(r Remote) error {
  428. if remote, ok := r.(*remote); ok {
  429. remote.startDaemon = bool(s)
  430. return nil
  431. }
  432. return fmt.Errorf("WithStartDaemon option not supported for this remote")
  433. }
  434. // WithDebugLog defines if containerd debug logs will be enabled for daemon.
  435. func WithDebugLog(debug bool) RemoteOption {
  436. return debugLog(debug)
  437. }
  438. type debugLog bool
  439. func (d debugLog) Apply(r Remote) error {
  440. if remote, ok := r.(*remote); ok {
  441. remote.debugLog = bool(d)
  442. return nil
  443. }
  444. return fmt.Errorf("WithDebugLog option not supported for this remote")
  445. }
  446. // WithLiveRestore defines if containers are stopped on shutdown or restored.
  447. func WithLiveRestore(v bool) RemoteOption {
  448. return liveRestore(v)
  449. }
  450. type liveRestore bool
  451. func (l liveRestore) Apply(r Remote) error {
  452. if remote, ok := r.(*remote); ok {
  453. remote.liveRestore = bool(l)
  454. for _, c := range remote.clients {
  455. c.liveRestore = bool(l)
  456. }
  457. return nil
  458. }
  459. return fmt.Errorf("WithLiveRestore option not supported for this remote")
  460. }
  461. // WithOOMScore defines the oom_score_adj to set for the containerd process.
  462. func WithOOMScore(score int) RemoteOption {
  463. return oomScore(score)
  464. }
  465. type oomScore int
  466. func (o oomScore) Apply(r Remote) error {
  467. if remote, ok := r.(*remote); ok {
  468. remote.oomScore = int(o)
  469. return nil
  470. }
  471. return fmt.Errorf("WithOOMScore option not supported for this remote")
  472. }