namespace_linux.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510
  1. package osl
  2. import (
  3. "fmt"
  4. "io/ioutil"
  5. "net"
  6. "os"
  7. "os/exec"
  8. "path/filepath"
  9. "runtime"
  10. "strconv"
  11. "strings"
  12. "sync"
  13. "syscall"
  14. "time"
  15. "github.com/Sirupsen/logrus"
  16. "github.com/docker/docker/pkg/reexec"
  17. "github.com/docker/libnetwork/ns"
  18. "github.com/docker/libnetwork/types"
  19. "github.com/vishvananda/netlink"
  20. "github.com/vishvananda/netns"
  21. )
  22. const defaultPrefix = "/var/run/docker"
  23. var (
  24. once sync.Once
  25. garbagePathMap = make(map[string]bool)
  26. gpmLock sync.Mutex
  27. gpmWg sync.WaitGroup
  28. gpmCleanupPeriod = 60 * time.Second
  29. gpmChan = make(chan chan struct{})
  30. prefix = defaultPrefix
  31. )
  32. // The networkNamespace type is the linux implementation of the Sandbox
  33. // interface. It represents a linux network namespace, and moves an interface
  34. // into it when called on method AddInterface or sets the gateway etc.
  35. type networkNamespace struct {
  36. path string
  37. iFaces []*nwIface
  38. gw net.IP
  39. gwv6 net.IP
  40. staticRoutes []*types.StaticRoute
  41. neighbors []*neigh
  42. nextIfIndex int
  43. isDefault bool
  44. nlHandle *netlink.Handle
  45. sync.Mutex
  46. }
  47. // SetBasePath sets the base url prefix for the ns path
  48. func SetBasePath(path string) {
  49. prefix = path
  50. }
  51. func init() {
  52. reexec.Register("netns-create", reexecCreateNamespace)
  53. }
  54. func basePath() string {
  55. return filepath.Join(prefix, "netns")
  56. }
  57. func createBasePath() {
  58. err := os.MkdirAll(basePath(), 0755)
  59. if err != nil {
  60. panic("Could not create net namespace path directory")
  61. }
  62. // Start the garbage collection go routine
  63. go removeUnusedPaths()
  64. }
  65. func removeUnusedPaths() {
  66. gpmLock.Lock()
  67. period := gpmCleanupPeriod
  68. gpmLock.Unlock()
  69. ticker := time.NewTicker(period)
  70. for {
  71. var (
  72. gc chan struct{}
  73. gcOk bool
  74. )
  75. select {
  76. case <-ticker.C:
  77. case gc, gcOk = <-gpmChan:
  78. }
  79. gpmLock.Lock()
  80. pathList := make([]string, 0, len(garbagePathMap))
  81. for path := range garbagePathMap {
  82. pathList = append(pathList, path)
  83. }
  84. garbagePathMap = make(map[string]bool)
  85. gpmWg.Add(1)
  86. gpmLock.Unlock()
  87. for _, path := range pathList {
  88. os.Remove(path)
  89. }
  90. gpmWg.Done()
  91. if gcOk {
  92. close(gc)
  93. }
  94. }
  95. }
  96. func addToGarbagePaths(path string) {
  97. gpmLock.Lock()
  98. garbagePathMap[path] = true
  99. gpmLock.Unlock()
  100. }
  101. func removeFromGarbagePaths(path string) {
  102. gpmLock.Lock()
  103. delete(garbagePathMap, path)
  104. gpmLock.Unlock()
  105. }
  106. // GC triggers garbage collection of namespace path right away
  107. // and waits for it.
  108. func GC() {
  109. gpmLock.Lock()
  110. if len(garbagePathMap) == 0 {
  111. // No need for GC if map is empty
  112. gpmLock.Unlock()
  113. return
  114. }
  115. gpmLock.Unlock()
  116. // if content exists in the garbage paths
  117. // we can trigger GC to run, providing a
  118. // channel to be notified on completion
  119. waitGC := make(chan struct{})
  120. gpmChan <- waitGC
  121. // wait for GC completion
  122. <-waitGC
  123. }
  124. // GenerateKey generates a sandbox key based on the passed
  125. // container id.
  126. func GenerateKey(containerID string) string {
  127. maxLen := 12
  128. // Read sandbox key from host for overlay
  129. if strings.HasPrefix(containerID, "-") {
  130. var (
  131. index int
  132. indexStr string
  133. tmpkey string
  134. )
  135. dir, err := ioutil.ReadDir(basePath())
  136. if err != nil {
  137. return ""
  138. }
  139. for _, v := range dir {
  140. id := v.Name()
  141. if strings.HasSuffix(id, containerID[:maxLen-1]) {
  142. indexStr = strings.TrimSuffix(id, containerID[:maxLen-1])
  143. tmpindex, err := strconv.Atoi(indexStr)
  144. if err != nil {
  145. return ""
  146. }
  147. if tmpindex > index {
  148. index = tmpindex
  149. tmpkey = id
  150. }
  151. }
  152. }
  153. containerID = tmpkey
  154. if containerID == "" {
  155. return ""
  156. }
  157. }
  158. if len(containerID) < maxLen {
  159. maxLen = len(containerID)
  160. }
  161. return basePath() + "/" + containerID[:maxLen]
  162. }
  163. // NewSandbox provides a new sandbox instance created in an os specific way
  164. // provided a key which uniquely identifies the sandbox
  165. func NewSandbox(key string, osCreate, isRestore bool) (Sandbox, error) {
  166. if !isRestore {
  167. err := createNetworkNamespace(key, osCreate)
  168. if err != nil {
  169. return nil, err
  170. }
  171. } else {
  172. once.Do(createBasePath)
  173. }
  174. n := &networkNamespace{path: key, isDefault: !osCreate}
  175. sboxNs, err := netns.GetFromPath(n.path)
  176. if err != nil {
  177. return nil, fmt.Errorf("failed get network namespace %q: %v", n.path, err)
  178. }
  179. defer sboxNs.Close()
  180. n.nlHandle, err = netlink.NewHandleAt(sboxNs, syscall.NETLINK_ROUTE)
  181. if err != nil {
  182. return nil, fmt.Errorf("failed to create a netlink handle: %v", err)
  183. }
  184. err = n.nlHandle.SetSocketTimeout(ns.NetlinkSocketsTimeout)
  185. if err != nil {
  186. logrus.Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err)
  187. }
  188. if err = n.loopbackUp(); err != nil {
  189. n.nlHandle.Delete()
  190. return nil, err
  191. }
  192. return n, nil
  193. }
  194. func (n *networkNamespace) InterfaceOptions() IfaceOptionSetter {
  195. return n
  196. }
  197. func (n *networkNamespace) NeighborOptions() NeighborOptionSetter {
  198. return n
  199. }
  200. func mountNetworkNamespace(basePath string, lnPath string) error {
  201. return syscall.Mount(basePath, lnPath, "bind", syscall.MS_BIND, "")
  202. }
  203. // GetSandboxForExternalKey returns sandbox object for the supplied path
  204. func GetSandboxForExternalKey(basePath string, key string) (Sandbox, error) {
  205. if err := createNamespaceFile(key); err != nil {
  206. return nil, err
  207. }
  208. if err := mountNetworkNamespace(basePath, key); err != nil {
  209. return nil, err
  210. }
  211. n := &networkNamespace{path: key}
  212. sboxNs, err := netns.GetFromPath(n.path)
  213. if err != nil {
  214. return nil, fmt.Errorf("failed get network namespace %q: %v", n.path, err)
  215. }
  216. defer sboxNs.Close()
  217. n.nlHandle, err = netlink.NewHandleAt(sboxNs, syscall.NETLINK_ROUTE)
  218. if err != nil {
  219. return nil, fmt.Errorf("failed to create a netlink handle: %v", err)
  220. }
  221. err = n.nlHandle.SetSocketTimeout(ns.NetlinkSocketsTimeout)
  222. if err != nil {
  223. logrus.Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err)
  224. }
  225. if err = n.loopbackUp(); err != nil {
  226. n.nlHandle.Delete()
  227. return nil, err
  228. }
  229. return n, nil
  230. }
  231. func reexecCreateNamespace() {
  232. if len(os.Args) < 2 {
  233. logrus.Fatal("no namespace path provided")
  234. }
  235. if err := mountNetworkNamespace("/proc/self/ns/net", os.Args[1]); err != nil {
  236. logrus.Fatal(err)
  237. }
  238. }
  239. func createNetworkNamespace(path string, osCreate bool) error {
  240. if err := createNamespaceFile(path); err != nil {
  241. return err
  242. }
  243. cmd := &exec.Cmd{
  244. Path: reexec.Self(),
  245. Args: append([]string{"netns-create"}, path),
  246. Stdout: os.Stdout,
  247. Stderr: os.Stderr,
  248. }
  249. if osCreate {
  250. cmd.SysProcAttr = &syscall.SysProcAttr{}
  251. cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWNET
  252. }
  253. if err := cmd.Run(); err != nil {
  254. return fmt.Errorf("namespace creation reexec command failed: %v", err)
  255. }
  256. return nil
  257. }
  258. func unmountNamespaceFile(path string) {
  259. if _, err := os.Stat(path); err == nil {
  260. syscall.Unmount(path, syscall.MNT_DETACH)
  261. }
  262. }
  263. func createNamespaceFile(path string) (err error) {
  264. var f *os.File
  265. once.Do(createBasePath)
  266. // Remove it from garbage collection list if present
  267. removeFromGarbagePaths(path)
  268. // If the path is there unmount it first
  269. unmountNamespaceFile(path)
  270. // wait for garbage collection to complete if it is in progress
  271. // before trying to create the file.
  272. gpmWg.Wait()
  273. if f, err = os.Create(path); err == nil {
  274. f.Close()
  275. }
  276. return err
  277. }
  278. func (n *networkNamespace) loopbackUp() error {
  279. iface, err := n.nlHandle.LinkByName("lo")
  280. if err != nil {
  281. return err
  282. }
  283. return n.nlHandle.LinkSetUp(iface)
  284. }
  285. func (n *networkNamespace) InvokeFunc(f func()) error {
  286. return nsInvoke(n.nsPath(), func(nsFD int) error { return nil }, func(callerFD int) error {
  287. f()
  288. return nil
  289. })
  290. }
  291. // InitOSContext initializes OS context while configuring network resources
  292. func InitOSContext() func() {
  293. runtime.LockOSThread()
  294. if err := ns.SetNamespace(); err != nil {
  295. logrus.Error(err)
  296. }
  297. return runtime.UnlockOSThread
  298. }
  299. func nsInvoke(path string, prefunc func(nsFD int) error, postfunc func(callerFD int) error) error {
  300. defer InitOSContext()()
  301. newNs, err := netns.GetFromPath(path)
  302. if err != nil {
  303. return fmt.Errorf("failed get network namespace %q: %v", path, err)
  304. }
  305. defer newNs.Close()
  306. // Invoked before the namespace switch happens but after the namespace file
  307. // handle is obtained.
  308. if err := prefunc(int(newNs)); err != nil {
  309. return fmt.Errorf("failed in prefunc: %v", err)
  310. }
  311. if err = netns.Set(newNs); err != nil {
  312. return err
  313. }
  314. defer ns.SetNamespace()
  315. // Invoked after the namespace switch.
  316. return postfunc(ns.ParseHandlerInt())
  317. }
  318. func (n *networkNamespace) nsPath() string {
  319. n.Lock()
  320. defer n.Unlock()
  321. return n.path
  322. }
  323. func (n *networkNamespace) Info() Info {
  324. return n
  325. }
  326. func (n *networkNamespace) Key() string {
  327. return n.path
  328. }
  329. func (n *networkNamespace) Destroy() error {
  330. if n.nlHandle != nil {
  331. n.nlHandle.Delete()
  332. }
  333. // Assuming no running process is executing in this network namespace,
  334. // unmounting is sufficient to destroy it.
  335. if err := syscall.Unmount(n.path, syscall.MNT_DETACH); err != nil {
  336. return err
  337. }
  338. // Stash it into the garbage collection list
  339. addToGarbagePaths(n.path)
  340. return nil
  341. }
  342. // Restore restore the network namespace
  343. func (n *networkNamespace) Restore(ifsopt map[string][]IfaceOption, routes []*types.StaticRoute, gw net.IP, gw6 net.IP) error {
  344. // restore interfaces
  345. for name, opts := range ifsopt {
  346. if !strings.Contains(name, "+") {
  347. return fmt.Errorf("wrong iface name in restore osl sandbox interface: %s", name)
  348. }
  349. seps := strings.Split(name, "+")
  350. srcName := seps[0]
  351. dstPrefix := seps[1]
  352. i := &nwIface{srcName: srcName, dstName: dstPrefix, ns: n}
  353. i.processInterfaceOptions(opts...)
  354. if i.master != "" {
  355. i.dstMaster = n.findDst(i.master, true)
  356. if i.dstMaster == "" {
  357. return fmt.Errorf("could not find an appropriate master %q for %q",
  358. i.master, i.srcName)
  359. }
  360. }
  361. if n.isDefault {
  362. i.dstName = i.srcName
  363. } else {
  364. links, err := n.nlHandle.LinkList()
  365. if err != nil {
  366. return fmt.Errorf("failed to retrieve list of links in network namespace %q during restore", n.path)
  367. }
  368. // due to the docker network connect/disconnect, so the dstName should
  369. // restore from the namespace
  370. for _, link := range links {
  371. addrs, err := n.nlHandle.AddrList(link, netlink.FAMILY_V4)
  372. if err != nil {
  373. return err
  374. }
  375. ifaceName := link.Attrs().Name
  376. if strings.HasPrefix(ifaceName, "vxlan") {
  377. if i.dstName == "vxlan" {
  378. i.dstName = ifaceName
  379. break
  380. }
  381. }
  382. // find the interface name by ip
  383. if i.address != nil {
  384. for _, addr := range addrs {
  385. if addr.IPNet.String() == i.address.String() {
  386. i.dstName = ifaceName
  387. break
  388. }
  389. continue
  390. }
  391. if i.dstName == ifaceName {
  392. break
  393. }
  394. }
  395. // This is to find the interface name of the pair in overlay sandbox
  396. if strings.HasPrefix(ifaceName, "veth") {
  397. if i.master != "" && i.dstName == "veth" {
  398. i.dstName = ifaceName
  399. }
  400. }
  401. }
  402. var index int
  403. indexStr := strings.TrimPrefix(i.dstName, dstPrefix)
  404. if indexStr != "" {
  405. index, err = strconv.Atoi(indexStr)
  406. if err != nil {
  407. return err
  408. }
  409. }
  410. index++
  411. n.Lock()
  412. if index > n.nextIfIndex {
  413. n.nextIfIndex = index
  414. }
  415. n.iFaces = append(n.iFaces, i)
  416. n.Unlock()
  417. }
  418. }
  419. // restore routes
  420. for _, r := range routes {
  421. n.Lock()
  422. n.staticRoutes = append(n.staticRoutes, r)
  423. n.Unlock()
  424. }
  425. // restore gateway
  426. if len(gw) > 0 {
  427. n.Lock()
  428. n.gw = gw
  429. n.Unlock()
  430. }
  431. if len(gw6) > 0 {
  432. n.Lock()
  433. n.gwv6 = gw6
  434. n.Unlock()
  435. }
  436. return nil
  437. }