namespace_linux.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. package osl
  2. import (
  3. "fmt"
  4. "io/ioutil"
  5. "net"
  6. "os"
  7. "os/exec"
  8. "runtime"
  9. "strconv"
  10. "strings"
  11. "sync"
  12. "syscall"
  13. "time"
  14. log "github.com/Sirupsen/logrus"
  15. "github.com/docker/docker/pkg/reexec"
  16. "github.com/docker/libnetwork/ns"
  17. "github.com/docker/libnetwork/types"
  18. "github.com/vishvananda/netlink"
  19. "github.com/vishvananda/netns"
  20. )
  21. const prefix = "/var/run/docker/netns"
  22. var (
  23. once sync.Once
  24. garbagePathMap = make(map[string]bool)
  25. gpmLock sync.Mutex
  26. gpmWg sync.WaitGroup
  27. gpmCleanupPeriod = 60 * time.Second
  28. gpmChan = make(chan chan struct{})
  29. nsOnce sync.Once
  30. )
  31. // The networkNamespace type is the linux implementation of the Sandbox
  32. // interface. It represents a linux network namespace, and moves an interface
  33. // into it when called on method AddInterface or sets the gateway etc.
  34. type networkNamespace struct {
  35. path string
  36. iFaces []*nwIface
  37. gw net.IP
  38. gwv6 net.IP
  39. staticRoutes []*types.StaticRoute
  40. neighbors []*neigh
  41. nextIfIndex int
  42. isDefault bool
  43. nlHandle *netlink.Handle
  44. sync.Mutex
  45. }
  46. func init() {
  47. reexec.Register("netns-create", reexecCreateNamespace)
  48. }
  49. func createBasePath() {
  50. err := os.MkdirAll(prefix, 0755)
  51. if err != nil {
  52. panic("Could not create net namespace path directory")
  53. }
  54. // Start the garbage collection go routine
  55. go removeUnusedPaths()
  56. }
  57. func removeUnusedPaths() {
  58. gpmLock.Lock()
  59. period := gpmCleanupPeriod
  60. gpmLock.Unlock()
  61. ticker := time.NewTicker(period)
  62. for {
  63. var (
  64. gc chan struct{}
  65. gcOk bool
  66. )
  67. select {
  68. case <-ticker.C:
  69. case gc, gcOk = <-gpmChan:
  70. }
  71. gpmLock.Lock()
  72. pathList := make([]string, 0, len(garbagePathMap))
  73. for path := range garbagePathMap {
  74. pathList = append(pathList, path)
  75. }
  76. garbagePathMap = make(map[string]bool)
  77. gpmWg.Add(1)
  78. gpmLock.Unlock()
  79. for _, path := range pathList {
  80. os.Remove(path)
  81. }
  82. gpmWg.Done()
  83. if gcOk {
  84. close(gc)
  85. }
  86. }
  87. }
  88. func addToGarbagePaths(path string) {
  89. gpmLock.Lock()
  90. garbagePathMap[path] = true
  91. gpmLock.Unlock()
  92. }
  93. func removeFromGarbagePaths(path string) {
  94. gpmLock.Lock()
  95. delete(garbagePathMap, path)
  96. gpmLock.Unlock()
  97. }
  98. // GC triggers garbage collection of namespace path right away
  99. // and waits for it.
  100. func GC() {
  101. gpmLock.Lock()
  102. if len(garbagePathMap) == 0 {
  103. // No need for GC if map is empty
  104. gpmLock.Unlock()
  105. return
  106. }
  107. gpmLock.Unlock()
  108. // if content exists in the garbage paths
  109. // we can trigger GC to run, providing a
  110. // channel to be notified on completion
  111. waitGC := make(chan struct{})
  112. gpmChan <- waitGC
  113. // wait for GC completion
  114. <-waitGC
  115. }
  116. // GenerateKey generates a sandbox key based on the passed
  117. // container id.
  118. func GenerateKey(containerID string) string {
  119. maxLen := 12
  120. // Read sandbox key from host for overlay
  121. if strings.HasPrefix(containerID, "-") {
  122. var (
  123. index int
  124. indexStr string
  125. tmpkey string
  126. )
  127. dir, err := ioutil.ReadDir(prefix)
  128. if err != nil {
  129. return ""
  130. }
  131. for _, v := range dir {
  132. id := v.Name()
  133. if strings.HasSuffix(id, containerID[:maxLen-1]) {
  134. indexStr = strings.TrimSuffix(id, containerID[:maxLen-1])
  135. tmpindex, err := strconv.Atoi(indexStr)
  136. if err != nil {
  137. return ""
  138. }
  139. if tmpindex > index {
  140. index = tmpindex
  141. tmpkey = id
  142. }
  143. }
  144. }
  145. containerID = tmpkey
  146. if containerID == "" {
  147. return ""
  148. }
  149. }
  150. if len(containerID) < maxLen {
  151. maxLen = len(containerID)
  152. }
  153. return prefix + "/" + containerID[:maxLen]
  154. }
  155. // NewSandbox provides a new sandbox instance created in an os specific way
  156. // provided a key which uniquely identifies the sandbox
  157. func NewSandbox(key string, osCreate, isRestore bool) (Sandbox, error) {
  158. if !isRestore {
  159. err := createNetworkNamespace(key, osCreate)
  160. if err != nil {
  161. return nil, err
  162. }
  163. } else {
  164. once.Do(createBasePath)
  165. }
  166. n := &networkNamespace{path: key, isDefault: !osCreate}
  167. sboxNs, err := netns.GetFromPath(n.path)
  168. if err != nil {
  169. return nil, fmt.Errorf("failed get network namespace %q: %v", n.path, err)
  170. }
  171. defer sboxNs.Close()
  172. n.nlHandle, err = netlink.NewHandleAt(sboxNs)
  173. if err != nil {
  174. return nil, fmt.Errorf("failed to create a netlink handle: %v", err)
  175. }
  176. if err = n.loopbackUp(); err != nil {
  177. n.nlHandle.Delete()
  178. return nil, err
  179. }
  180. return n, nil
  181. }
  182. func (n *networkNamespace) InterfaceOptions() IfaceOptionSetter {
  183. return n
  184. }
  185. func (n *networkNamespace) NeighborOptions() NeighborOptionSetter {
  186. return n
  187. }
  188. func mountNetworkNamespace(basePath string, lnPath string) error {
  189. return syscall.Mount(basePath, lnPath, "bind", syscall.MS_BIND, "")
  190. }
  191. // GetSandboxForExternalKey returns sandbox object for the supplied path
  192. func GetSandboxForExternalKey(basePath string, key string) (Sandbox, error) {
  193. if err := createNamespaceFile(key); err != nil {
  194. return nil, err
  195. }
  196. if err := mountNetworkNamespace(basePath, key); err != nil {
  197. return nil, err
  198. }
  199. n := &networkNamespace{path: key}
  200. sboxNs, err := netns.GetFromPath(n.path)
  201. if err != nil {
  202. return nil, fmt.Errorf("failed get network namespace %q: %v", n.path, err)
  203. }
  204. defer sboxNs.Close()
  205. n.nlHandle, err = netlink.NewHandleAt(sboxNs)
  206. if err != nil {
  207. return nil, fmt.Errorf("failed to create a netlink handle: %v", err)
  208. }
  209. if err = n.loopbackUp(); err != nil {
  210. n.nlHandle.Delete()
  211. return nil, err
  212. }
  213. return n, nil
  214. }
  215. func reexecCreateNamespace() {
  216. if len(os.Args) < 2 {
  217. log.Fatal("no namespace path provided")
  218. }
  219. if err := mountNetworkNamespace("/proc/self/ns/net", os.Args[1]); err != nil {
  220. log.Fatal(err)
  221. }
  222. }
  223. func createNetworkNamespace(path string, osCreate bool) error {
  224. if err := createNamespaceFile(path); err != nil {
  225. return err
  226. }
  227. cmd := &exec.Cmd{
  228. Path: reexec.Self(),
  229. Args: append([]string{"netns-create"}, path),
  230. Stdout: os.Stdout,
  231. Stderr: os.Stderr,
  232. }
  233. if osCreate {
  234. cmd.SysProcAttr = &syscall.SysProcAttr{}
  235. cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWNET
  236. }
  237. if err := cmd.Run(); err != nil {
  238. return fmt.Errorf("namespace creation reexec command failed: %v", err)
  239. }
  240. return nil
  241. }
  242. func unmountNamespaceFile(path string) {
  243. if _, err := os.Stat(path); err == nil {
  244. syscall.Unmount(path, syscall.MNT_DETACH)
  245. }
  246. }
  247. func createNamespaceFile(path string) (err error) {
  248. var f *os.File
  249. once.Do(createBasePath)
  250. // Remove it from garbage collection list if present
  251. removeFromGarbagePaths(path)
  252. // If the path is there unmount it first
  253. unmountNamespaceFile(path)
  254. // wait for garbage collection to complete if it is in progress
  255. // before trying to create the file.
  256. gpmWg.Wait()
  257. if f, err = os.Create(path); err == nil {
  258. f.Close()
  259. }
  260. return err
  261. }
  262. func (n *networkNamespace) loopbackUp() error {
  263. iface, err := n.nlHandle.LinkByName("lo")
  264. if err != nil {
  265. return err
  266. }
  267. return n.nlHandle.LinkSetUp(iface)
  268. }
  269. func (n *networkNamespace) InvokeFunc(f func()) error {
  270. return nsInvoke(n.nsPath(), func(nsFD int) error { return nil }, func(callerFD int) error {
  271. f()
  272. return nil
  273. })
  274. }
  275. // InitOSContext initializes OS context while configuring network resources
  276. func InitOSContext() func() {
  277. nsOnce.Do(ns.Init)
  278. runtime.LockOSThread()
  279. if err := ns.SetNamespace(); err != nil {
  280. log.Error(err)
  281. }
  282. return runtime.UnlockOSThread
  283. }
  284. func nsInvoke(path string, prefunc func(nsFD int) error, postfunc func(callerFD int) error) error {
  285. defer InitOSContext()()
  286. newNs, err := netns.GetFromPath(path)
  287. if err != nil {
  288. return fmt.Errorf("failed get network namespace %q: %v", path, err)
  289. }
  290. defer newNs.Close()
  291. // Invoked before the namespace switch happens but after the namespace file
  292. // handle is obtained.
  293. if err := prefunc(int(newNs)); err != nil {
  294. return fmt.Errorf("failed in prefunc: %v", err)
  295. }
  296. if err = netns.Set(newNs); err != nil {
  297. return err
  298. }
  299. defer ns.SetNamespace()
  300. // Invoked after the namespace switch.
  301. return postfunc(ns.ParseHandlerInt())
  302. }
  303. func (n *networkNamespace) nsPath() string {
  304. n.Lock()
  305. defer n.Unlock()
  306. return n.path
  307. }
  308. func (n *networkNamespace) Info() Info {
  309. return n
  310. }
  311. func (n *networkNamespace) Key() string {
  312. return n.path
  313. }
  314. func (n *networkNamespace) Destroy() error {
  315. if n.nlHandle != nil {
  316. n.nlHandle.Delete()
  317. }
  318. // Assuming no running process is executing in this network namespace,
  319. // unmounting is sufficient to destroy it.
  320. if err := syscall.Unmount(n.path, syscall.MNT_DETACH); err != nil {
  321. return err
  322. }
  323. // Stash it into the garbage collection list
  324. addToGarbagePaths(n.path)
  325. return nil
  326. }
  327. // Restore restore the network namespace
  328. func (n *networkNamespace) Restore(ifsopt map[string][]IfaceOption, routes []*types.StaticRoute, gw net.IP, gw6 net.IP) error {
  329. // restore interfaces
  330. for name, opts := range ifsopt {
  331. if !strings.Contains(name, "+") {
  332. return fmt.Errorf("wrong iface name in restore osl sandbox interface: %s", name)
  333. }
  334. seps := strings.Split(name, "+")
  335. srcName := seps[0]
  336. dstPrefix := seps[1]
  337. i := &nwIface{srcName: srcName, dstName: dstPrefix, ns: n}
  338. i.processInterfaceOptions(opts...)
  339. if i.master != "" {
  340. i.dstMaster = n.findDst(i.master, true)
  341. if i.dstMaster == "" {
  342. return fmt.Errorf("could not find an appropriate master %q for %q",
  343. i.master, i.srcName)
  344. }
  345. }
  346. if n.isDefault {
  347. i.dstName = i.srcName
  348. } else {
  349. links, err := n.nlHandle.LinkList()
  350. if err != nil {
  351. return fmt.Errorf("failed to retrieve list of links in network namespace %q during restore", n.path)
  352. }
  353. // due to the docker network connect/disconnect, so the dstName should
  354. // restore from the namespace
  355. for _, link := range links {
  356. addrs, err := n.nlHandle.AddrList(link, netlink.FAMILY_V4)
  357. if err != nil {
  358. return err
  359. }
  360. ifaceName := link.Attrs().Name
  361. if strings.HasPrefix(ifaceName, "vxlan") {
  362. if i.dstName == "vxlan" {
  363. i.dstName = ifaceName
  364. break
  365. }
  366. }
  367. // find the interface name by ip
  368. if i.address != nil {
  369. for _, addr := range addrs {
  370. if addr.IPNet.String() == i.address.String() {
  371. i.dstName = ifaceName
  372. break
  373. }
  374. continue
  375. }
  376. if i.dstName == ifaceName {
  377. break
  378. }
  379. }
  380. // This is to find the interface name of the pair in overlay sandbox
  381. if strings.HasPrefix(ifaceName, "veth") {
  382. if i.master != "" && i.dstName == "veth" {
  383. i.dstName = ifaceName
  384. }
  385. }
  386. }
  387. var index int
  388. indexStr := strings.TrimPrefix(i.dstName, dstPrefix)
  389. if indexStr != "" {
  390. index, err = strconv.Atoi(indexStr)
  391. if err != nil {
  392. return err
  393. }
  394. }
  395. index++
  396. n.Lock()
  397. if index > n.nextIfIndex {
  398. n.nextIfIndex = index
  399. }
  400. n.iFaces = append(n.iFaces, i)
  401. n.Unlock()
  402. }
  403. }
  404. // restore routes
  405. for _, r := range routes {
  406. n.Lock()
  407. n.staticRoutes = append(n.staticRoutes, r)
  408. n.Unlock()
  409. }
  410. // restore gateway
  411. if len(gw) > 0 {
  412. n.Lock()
  413. n.gw = gw
  414. n.Unlock()
  415. }
  416. if len(gw6) > 0 {
  417. n.Lock()
  418. n.gwv6 = gw6
  419. n.Unlock()
  420. }
  421. return nil
  422. }