namespace_linux.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632
  1. package osl
  2. import (
  3. "fmt"
  4. "io/ioutil"
  5. "net"
  6. "os"
  7. "os/exec"
  8. "path/filepath"
  9. "runtime"
  10. "strconv"
  11. "strings"
  12. "sync"
  13. "syscall"
  14. "time"
  15. "github.com/docker/docker/pkg/reexec"
  16. "github.com/docker/libnetwork/ns"
  17. "github.com/docker/libnetwork/types"
  18. "github.com/sirupsen/logrus"
  19. "github.com/vishvananda/netlink"
  20. "github.com/vishvananda/netns"
  21. )
  22. const defaultPrefix = "/var/run/docker"
  23. func init() {
  24. reexec.Register("set-ipv6", reexecSetIPv6)
  25. }
  26. var (
  27. once sync.Once
  28. garbagePathMap = make(map[string]bool)
  29. gpmLock sync.Mutex
  30. gpmWg sync.WaitGroup
  31. gpmCleanupPeriod = 60 * time.Second
  32. gpmChan = make(chan chan struct{})
  33. prefix = defaultPrefix
  34. )
  35. // The networkNamespace type is the linux implementation of the Sandbox
  36. // interface. It represents a linux network namespace, and moves an interface
  37. // into it when called on method AddInterface or sets the gateway etc.
  38. type networkNamespace struct {
  39. path string
  40. iFaces []*nwIface
  41. gw net.IP
  42. gwv6 net.IP
  43. staticRoutes []*types.StaticRoute
  44. neighbors []*neigh
  45. nextIfIndex map[string]int
  46. isDefault bool
  47. nlHandle *netlink.Handle
  48. loV6Enabled bool
  49. sync.Mutex
  50. }
  51. // SetBasePath sets the base url prefix for the ns path
  52. func SetBasePath(path string) {
  53. prefix = path
  54. }
  55. func init() {
  56. reexec.Register("netns-create", reexecCreateNamespace)
  57. }
  58. func basePath() string {
  59. return filepath.Join(prefix, "netns")
  60. }
  61. func createBasePath() {
  62. err := os.MkdirAll(basePath(), 0755)
  63. if err != nil {
  64. panic("Could not create net namespace path directory")
  65. }
  66. // Start the garbage collection go routine
  67. go removeUnusedPaths()
  68. }
  69. func removeUnusedPaths() {
  70. gpmLock.Lock()
  71. period := gpmCleanupPeriod
  72. gpmLock.Unlock()
  73. ticker := time.NewTicker(period)
  74. for {
  75. var (
  76. gc chan struct{}
  77. gcOk bool
  78. )
  79. select {
  80. case <-ticker.C:
  81. case gc, gcOk = <-gpmChan:
  82. }
  83. gpmLock.Lock()
  84. pathList := make([]string, 0, len(garbagePathMap))
  85. for path := range garbagePathMap {
  86. pathList = append(pathList, path)
  87. }
  88. garbagePathMap = make(map[string]bool)
  89. gpmWg.Add(1)
  90. gpmLock.Unlock()
  91. for _, path := range pathList {
  92. os.Remove(path)
  93. }
  94. gpmWg.Done()
  95. if gcOk {
  96. close(gc)
  97. }
  98. }
  99. }
  100. func addToGarbagePaths(path string) {
  101. gpmLock.Lock()
  102. garbagePathMap[path] = true
  103. gpmLock.Unlock()
  104. }
  105. func removeFromGarbagePaths(path string) {
  106. gpmLock.Lock()
  107. delete(garbagePathMap, path)
  108. gpmLock.Unlock()
  109. }
  110. // GC triggers garbage collection of namespace path right away
  111. // and waits for it.
  112. func GC() {
  113. gpmLock.Lock()
  114. if len(garbagePathMap) == 0 {
  115. // No need for GC if map is empty
  116. gpmLock.Unlock()
  117. return
  118. }
  119. gpmLock.Unlock()
  120. // if content exists in the garbage paths
  121. // we can trigger GC to run, providing a
  122. // channel to be notified on completion
  123. waitGC := make(chan struct{})
  124. gpmChan <- waitGC
  125. // wait for GC completion
  126. <-waitGC
  127. }
  128. // GenerateKey generates a sandbox key based on the passed
  129. // container id.
  130. func GenerateKey(containerID string) string {
  131. maxLen := 12
  132. // Read sandbox key from host for overlay
  133. if strings.HasPrefix(containerID, "-") {
  134. var (
  135. index int
  136. indexStr string
  137. tmpkey string
  138. )
  139. dir, err := ioutil.ReadDir(basePath())
  140. if err != nil {
  141. return ""
  142. }
  143. for _, v := range dir {
  144. id := v.Name()
  145. if strings.HasSuffix(id, containerID[:maxLen-1]) {
  146. indexStr = strings.TrimSuffix(id, containerID[:maxLen-1])
  147. tmpindex, err := strconv.Atoi(indexStr)
  148. if err != nil {
  149. return ""
  150. }
  151. if tmpindex > index {
  152. index = tmpindex
  153. tmpkey = id
  154. }
  155. }
  156. }
  157. containerID = tmpkey
  158. if containerID == "" {
  159. return ""
  160. }
  161. }
  162. if len(containerID) < maxLen {
  163. maxLen = len(containerID)
  164. }
  165. return basePath() + "/" + containerID[:maxLen]
  166. }
  167. // NewSandbox provides a new sandbox instance created in an os specific way
  168. // provided a key which uniquely identifies the sandbox
  169. func NewSandbox(key string, osCreate, isRestore bool) (Sandbox, error) {
  170. if !isRestore {
  171. err := createNetworkNamespace(key, osCreate)
  172. if err != nil {
  173. return nil, err
  174. }
  175. } else {
  176. once.Do(createBasePath)
  177. }
  178. n := &networkNamespace{path: key, isDefault: !osCreate, nextIfIndex: make(map[string]int)}
  179. sboxNs, err := netns.GetFromPath(n.path)
  180. if err != nil {
  181. return nil, fmt.Errorf("failed get network namespace %q: %v", n.path, err)
  182. }
  183. defer sboxNs.Close()
  184. n.nlHandle, err = netlink.NewHandleAt(sboxNs, syscall.NETLINK_ROUTE)
  185. if err != nil {
  186. return nil, fmt.Errorf("failed to create a netlink handle: %v", err)
  187. }
  188. err = n.nlHandle.SetSocketTimeout(ns.NetlinkSocketsTimeout)
  189. if err != nil {
  190. logrus.Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err)
  191. }
  192. // In live-restore mode, IPV6 entries are getting cleaned up due to below code
  193. // We should retain IPV6 configrations in live-restore mode when Docker Daemon
  194. // comes back. It should work as it is on other cases
  195. // As starting point, disable IPv6 on all interfaces
  196. if !isRestore && !n.isDefault {
  197. err = setIPv6(n.path, "all", false)
  198. if err != nil {
  199. logrus.Warnf("Failed to disable IPv6 on all interfaces on network namespace %q: %v", n.path, err)
  200. }
  201. }
  202. if err = n.loopbackUp(); err != nil {
  203. n.nlHandle.Delete()
  204. return nil, err
  205. }
  206. return n, nil
  207. }
  208. func (n *networkNamespace) InterfaceOptions() IfaceOptionSetter {
  209. return n
  210. }
  211. func (n *networkNamespace) NeighborOptions() NeighborOptionSetter {
  212. return n
  213. }
  214. func mountNetworkNamespace(basePath string, lnPath string) error {
  215. return syscall.Mount(basePath, lnPath, "bind", syscall.MS_BIND, "")
  216. }
  217. // GetSandboxForExternalKey returns sandbox object for the supplied path
  218. func GetSandboxForExternalKey(basePath string, key string) (Sandbox, error) {
  219. if err := createNamespaceFile(key); err != nil {
  220. return nil, err
  221. }
  222. if err := mountNetworkNamespace(basePath, key); err != nil {
  223. return nil, err
  224. }
  225. n := &networkNamespace{path: key, nextIfIndex: make(map[string]int)}
  226. sboxNs, err := netns.GetFromPath(n.path)
  227. if err != nil {
  228. return nil, fmt.Errorf("failed get network namespace %q: %v", n.path, err)
  229. }
  230. defer sboxNs.Close()
  231. n.nlHandle, err = netlink.NewHandleAt(sboxNs, syscall.NETLINK_ROUTE)
  232. if err != nil {
  233. return nil, fmt.Errorf("failed to create a netlink handle: %v", err)
  234. }
  235. err = n.nlHandle.SetSocketTimeout(ns.NetlinkSocketsTimeout)
  236. if err != nil {
  237. logrus.Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err)
  238. }
  239. // As starting point, disable IPv6 on all interfaces
  240. err = setIPv6(n.path, "all", false)
  241. if err != nil {
  242. logrus.Warnf("Failed to disable IPv6 on all interfaces on network namespace %q: %v", n.path, err)
  243. }
  244. if err = n.loopbackUp(); err != nil {
  245. n.nlHandle.Delete()
  246. return nil, err
  247. }
  248. return n, nil
  249. }
  250. func reexecCreateNamespace() {
  251. if len(os.Args) < 2 {
  252. logrus.Fatal("no namespace path provided")
  253. }
  254. if err := mountNetworkNamespace("/proc/self/ns/net", os.Args[1]); err != nil {
  255. logrus.Fatal(err)
  256. }
  257. }
  258. func createNetworkNamespace(path string, osCreate bool) error {
  259. if err := createNamespaceFile(path); err != nil {
  260. return err
  261. }
  262. cmd := &exec.Cmd{
  263. Path: reexec.Self(),
  264. Args: append([]string{"netns-create"}, path),
  265. Stdout: os.Stdout,
  266. Stderr: os.Stderr,
  267. }
  268. if osCreate {
  269. cmd.SysProcAttr = &syscall.SysProcAttr{}
  270. cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWNET
  271. }
  272. if err := cmd.Run(); err != nil {
  273. return fmt.Errorf("namespace creation reexec command failed: %v", err)
  274. }
  275. return nil
  276. }
  277. func unmountNamespaceFile(path string) {
  278. if _, err := os.Stat(path); err == nil {
  279. syscall.Unmount(path, syscall.MNT_DETACH)
  280. }
  281. }
  282. func createNamespaceFile(path string) (err error) {
  283. var f *os.File
  284. once.Do(createBasePath)
  285. // Remove it from garbage collection list if present
  286. removeFromGarbagePaths(path)
  287. // If the path is there unmount it first
  288. unmountNamespaceFile(path)
  289. // wait for garbage collection to complete if it is in progress
  290. // before trying to create the file.
  291. gpmWg.Wait()
  292. if f, err = os.Create(path); err == nil {
  293. f.Close()
  294. }
  295. return err
  296. }
  297. func (n *networkNamespace) loopbackUp() error {
  298. iface, err := n.nlHandle.LinkByName("lo")
  299. if err != nil {
  300. return err
  301. }
  302. return n.nlHandle.LinkSetUp(iface)
  303. }
  304. func (n *networkNamespace) GetLoopbackIfaceName() string {
  305. return "lo"
  306. }
  307. func (n *networkNamespace) AddAliasIP(ifName string, ip *net.IPNet) error {
  308. iface, err := n.nlHandle.LinkByName(ifName)
  309. if err != nil {
  310. return err
  311. }
  312. return n.nlHandle.AddrAdd(iface, &netlink.Addr{IPNet: ip})
  313. }
  314. func (n *networkNamespace) RemoveAliasIP(ifName string, ip *net.IPNet) error {
  315. iface, err := n.nlHandle.LinkByName(ifName)
  316. if err != nil {
  317. return err
  318. }
  319. return n.nlHandle.AddrDel(iface, &netlink.Addr{IPNet: ip})
  320. }
  321. func (n *networkNamespace) InvokeFunc(f func()) error {
  322. return nsInvoke(n.nsPath(), func(nsFD int) error { return nil }, func(callerFD int) error {
  323. f()
  324. return nil
  325. })
  326. }
  327. // InitOSContext initializes OS context while configuring network resources
  328. func InitOSContext() func() {
  329. runtime.LockOSThread()
  330. if err := ns.SetNamespace(); err != nil {
  331. logrus.Error(err)
  332. }
  333. return runtime.UnlockOSThread
  334. }
  335. func nsInvoke(path string, prefunc func(nsFD int) error, postfunc func(callerFD int) error) error {
  336. defer InitOSContext()()
  337. newNs, err := netns.GetFromPath(path)
  338. if err != nil {
  339. return fmt.Errorf("failed get network namespace %q: %v", path, err)
  340. }
  341. defer newNs.Close()
  342. // Invoked before the namespace switch happens but after the namespace file
  343. // handle is obtained.
  344. if err := prefunc(int(newNs)); err != nil {
  345. return fmt.Errorf("failed in prefunc: %v", err)
  346. }
  347. if err = netns.Set(newNs); err != nil {
  348. return err
  349. }
  350. defer ns.SetNamespace()
  351. // Invoked after the namespace switch.
  352. return postfunc(ns.ParseHandlerInt())
  353. }
  354. func (n *networkNamespace) nsPath() string {
  355. n.Lock()
  356. defer n.Unlock()
  357. return n.path
  358. }
  359. func (n *networkNamespace) Info() Info {
  360. return n
  361. }
  362. func (n *networkNamespace) Key() string {
  363. return n.path
  364. }
  365. func (n *networkNamespace) Destroy() error {
  366. if n.nlHandle != nil {
  367. n.nlHandle.Delete()
  368. }
  369. // Assuming no running process is executing in this network namespace,
  370. // unmounting is sufficient to destroy it.
  371. if err := syscall.Unmount(n.path, syscall.MNT_DETACH); err != nil {
  372. return err
  373. }
  374. // Stash it into the garbage collection list
  375. addToGarbagePaths(n.path)
  376. return nil
  377. }
  378. // Restore restore the network namespace
  379. func (n *networkNamespace) Restore(ifsopt map[string][]IfaceOption, routes []*types.StaticRoute, gw net.IP, gw6 net.IP) error {
  380. // restore interfaces
  381. for name, opts := range ifsopt {
  382. if !strings.Contains(name, "+") {
  383. return fmt.Errorf("wrong iface name in restore osl sandbox interface: %s", name)
  384. }
  385. seps := strings.Split(name, "+")
  386. srcName := seps[0]
  387. dstPrefix := seps[1]
  388. i := &nwIface{srcName: srcName, dstName: dstPrefix, ns: n}
  389. i.processInterfaceOptions(opts...)
  390. if i.master != "" {
  391. i.dstMaster = n.findDst(i.master, true)
  392. if i.dstMaster == "" {
  393. return fmt.Errorf("could not find an appropriate master %q for %q",
  394. i.master, i.srcName)
  395. }
  396. }
  397. if n.isDefault {
  398. i.dstName = i.srcName
  399. } else {
  400. links, err := n.nlHandle.LinkList()
  401. if err != nil {
  402. return fmt.Errorf("failed to retrieve list of links in network namespace %q during restore", n.path)
  403. }
  404. // due to the docker network connect/disconnect, so the dstName should
  405. // restore from the namespace
  406. for _, link := range links {
  407. addrs, err := n.nlHandle.AddrList(link, netlink.FAMILY_V4)
  408. if err != nil {
  409. return err
  410. }
  411. ifaceName := link.Attrs().Name
  412. if strings.HasPrefix(ifaceName, "vxlan") {
  413. if i.dstName == "vxlan" {
  414. i.dstName = ifaceName
  415. break
  416. }
  417. }
  418. // find the interface name by ip
  419. if i.address != nil {
  420. for _, addr := range addrs {
  421. if addr.IPNet.String() == i.address.String() {
  422. i.dstName = ifaceName
  423. break
  424. }
  425. continue
  426. }
  427. if i.dstName == ifaceName {
  428. break
  429. }
  430. }
  431. // This is to find the interface name of the pair in overlay sandbox
  432. if strings.HasPrefix(ifaceName, "veth") {
  433. if i.master != "" && i.dstName == "veth" {
  434. i.dstName = ifaceName
  435. }
  436. }
  437. }
  438. var index int
  439. indexStr := strings.TrimPrefix(i.dstName, dstPrefix)
  440. if indexStr != "" {
  441. index, err = strconv.Atoi(indexStr)
  442. if err != nil {
  443. return err
  444. }
  445. }
  446. index++
  447. n.Lock()
  448. if index > n.nextIfIndex[dstPrefix] {
  449. n.nextIfIndex[dstPrefix] = index
  450. }
  451. n.iFaces = append(n.iFaces, i)
  452. n.Unlock()
  453. }
  454. }
  455. // restore routes
  456. for _, r := range routes {
  457. n.Lock()
  458. n.staticRoutes = append(n.staticRoutes, r)
  459. n.Unlock()
  460. }
  461. // restore gateway
  462. if len(gw) > 0 {
  463. n.Lock()
  464. n.gw = gw
  465. n.Unlock()
  466. }
  467. if len(gw6) > 0 {
  468. n.Lock()
  469. n.gwv6 = gw6
  470. n.Unlock()
  471. }
  472. return nil
  473. }
  474. // Checks whether IPv6 needs to be enabled/disabled on the loopback interface
  475. func (n *networkNamespace) checkLoV6() {
  476. var (
  477. enable = false
  478. action = "disable"
  479. )
  480. n.Lock()
  481. for _, iface := range n.iFaces {
  482. if iface.AddressIPv6() != nil {
  483. enable = true
  484. action = "enable"
  485. break
  486. }
  487. }
  488. n.Unlock()
  489. if n.loV6Enabled == enable {
  490. return
  491. }
  492. if err := setIPv6(n.path, "lo", enable); err != nil {
  493. logrus.Warnf("Failed to %s IPv6 on loopback interface on network namespace %q: %v", action, n.path, err)
  494. }
  495. n.loV6Enabled = enable
  496. }
  497. func reexecSetIPv6() {
  498. runtime.LockOSThread()
  499. defer runtime.UnlockOSThread()
  500. if len(os.Args) < 3 {
  501. logrus.Errorf("invalid number of arguments for %s", os.Args[0])
  502. os.Exit(1)
  503. }
  504. ns, err := netns.GetFromPath(os.Args[1])
  505. if err != nil {
  506. logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err)
  507. os.Exit(2)
  508. }
  509. defer ns.Close()
  510. if err = netns.Set(ns); err != nil {
  511. logrus.Errorf("setting into container netns %q failed: %v", os.Args[1], err)
  512. os.Exit(3)
  513. }
  514. var (
  515. action = "disable"
  516. value = byte('1')
  517. path = fmt.Sprintf("/proc/sys/net/ipv6/conf/%s/disable_ipv6", os.Args[2])
  518. )
  519. if os.Args[3] == "true" {
  520. action = "enable"
  521. value = byte('0')
  522. }
  523. if err = ioutil.WriteFile(path, []byte{value, '\n'}, 0644); err != nil {
  524. logrus.Errorf("failed to %s IPv6 forwarding for container's interface %s: %v", action, os.Args[2], err)
  525. os.Exit(4)
  526. }
  527. os.Exit(0)
  528. }
  529. func setIPv6(path, iface string, enable bool) error {
  530. cmd := &exec.Cmd{
  531. Path: reexec.Self(),
  532. Args: append([]string{"set-ipv6"}, path, iface, strconv.FormatBool(enable)),
  533. Stdout: os.Stdout,
  534. Stderr: os.Stderr,
  535. }
  536. if err := cmd.Run(); err != nil {
  537. return fmt.Errorf("reexec to set IPv6 failed: %v", err)
  538. }
  539. return nil
  540. }