namespace_linux.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489
  1. package osl
  2. import (
  3. "fmt"
  4. "io/ioutil"
  5. "net"
  6. "os"
  7. "os/exec"
  8. "runtime"
  9. "strconv"
  10. "strings"
  11. "sync"
  12. "syscall"
  13. "time"
  14. log "github.com/Sirupsen/logrus"
  15. "github.com/docker/docker/pkg/reexec"
  16. "github.com/docker/libnetwork/ns"
  17. "github.com/docker/libnetwork/types"
  18. "github.com/vishvananda/netlink"
  19. "github.com/vishvananda/netns"
  20. )
  21. const prefix = "/var/run/docker/netns"
  22. var (
  23. once sync.Once
  24. garbagePathMap = make(map[string]bool)
  25. gpmLock sync.Mutex
  26. gpmWg sync.WaitGroup
  27. gpmCleanupPeriod = 60 * time.Second
  28. gpmChan = make(chan chan struct{})
  29. )
  30. // The networkNamespace type is the linux implementation of the Sandbox
  31. // interface. It represents a linux network namespace, and moves an interface
  32. // into it when called on method AddInterface or sets the gateway etc.
  33. type networkNamespace struct {
  34. path string
  35. iFaces []*nwIface
  36. gw net.IP
  37. gwv6 net.IP
  38. staticRoutes []*types.StaticRoute
  39. neighbors []*neigh
  40. nextIfIndex int
  41. isDefault bool
  42. nlHandle *netlink.Handle
  43. sync.Mutex
  44. }
  45. func init() {
  46. reexec.Register("netns-create", reexecCreateNamespace)
  47. }
  48. func createBasePath() {
  49. err := os.MkdirAll(prefix, 0755)
  50. if err != nil {
  51. panic("Could not create net namespace path directory")
  52. }
  53. // Start the garbage collection go routine
  54. go removeUnusedPaths()
  55. }
  56. func removeUnusedPaths() {
  57. gpmLock.Lock()
  58. period := gpmCleanupPeriod
  59. gpmLock.Unlock()
  60. ticker := time.NewTicker(period)
  61. for {
  62. var (
  63. gc chan struct{}
  64. gcOk bool
  65. )
  66. select {
  67. case <-ticker.C:
  68. case gc, gcOk = <-gpmChan:
  69. }
  70. gpmLock.Lock()
  71. pathList := make([]string, 0, len(garbagePathMap))
  72. for path := range garbagePathMap {
  73. pathList = append(pathList, path)
  74. }
  75. garbagePathMap = make(map[string]bool)
  76. gpmWg.Add(1)
  77. gpmLock.Unlock()
  78. for _, path := range pathList {
  79. os.Remove(path)
  80. }
  81. gpmWg.Done()
  82. if gcOk {
  83. close(gc)
  84. }
  85. }
  86. }
  87. func addToGarbagePaths(path string) {
  88. gpmLock.Lock()
  89. garbagePathMap[path] = true
  90. gpmLock.Unlock()
  91. }
  92. func removeFromGarbagePaths(path string) {
  93. gpmLock.Lock()
  94. delete(garbagePathMap, path)
  95. gpmLock.Unlock()
  96. }
  97. // GC triggers garbage collection of namespace path right away
  98. // and waits for it.
  99. func GC() {
  100. gpmLock.Lock()
  101. if len(garbagePathMap) == 0 {
  102. // No need for GC if map is empty
  103. gpmLock.Unlock()
  104. return
  105. }
  106. gpmLock.Unlock()
  107. // if content exists in the garbage paths
  108. // we can trigger GC to run, providing a
  109. // channel to be notified on completion
  110. waitGC := make(chan struct{})
  111. gpmChan <- waitGC
  112. // wait for GC completion
  113. <-waitGC
  114. }
  115. // GenerateKey generates a sandbox key based on the passed
  116. // container id.
  117. func GenerateKey(containerID string) string {
  118. maxLen := 12
  119. // Read sandbox key from host for overlay
  120. if strings.HasPrefix(containerID, "-") {
  121. var (
  122. index int
  123. indexStr string
  124. tmpkey string
  125. )
  126. dir, err := ioutil.ReadDir(prefix)
  127. if err != nil {
  128. return ""
  129. }
  130. for _, v := range dir {
  131. id := v.Name()
  132. if strings.HasSuffix(id, containerID[:maxLen-1]) {
  133. indexStr = strings.TrimSuffix(id, containerID[:maxLen-1])
  134. tmpindex, err := strconv.Atoi(indexStr)
  135. if err != nil {
  136. return ""
  137. }
  138. if tmpindex > index {
  139. index = tmpindex
  140. tmpkey = id
  141. }
  142. }
  143. }
  144. containerID = tmpkey
  145. if containerID == "" {
  146. return ""
  147. }
  148. }
  149. if len(containerID) < maxLen {
  150. maxLen = len(containerID)
  151. }
  152. return prefix + "/" + containerID[:maxLen]
  153. }
  154. // NewSandbox provides a new sandbox instance created in an os specific way
  155. // provided a key which uniquely identifies the sandbox
  156. func NewSandbox(key string, osCreate, isRestore bool) (Sandbox, error) {
  157. if !isRestore {
  158. err := createNetworkNamespace(key, osCreate)
  159. if err != nil {
  160. return nil, err
  161. }
  162. } else {
  163. once.Do(createBasePath)
  164. }
  165. n := &networkNamespace{path: key, isDefault: !osCreate}
  166. sboxNs, err := netns.GetFromPath(n.path)
  167. if err != nil {
  168. return nil, fmt.Errorf("failed get network namespace %q: %v", n.path, err)
  169. }
  170. defer sboxNs.Close()
  171. n.nlHandle, err = netlink.NewHandleAt(sboxNs, syscall.NETLINK_ROUTE)
  172. if err != nil {
  173. return nil, fmt.Errorf("failed to create a netlink handle: %v", err)
  174. }
  175. if err = n.loopbackUp(); err != nil {
  176. n.nlHandle.Delete()
  177. return nil, err
  178. }
  179. return n, nil
  180. }
  181. func (n *networkNamespace) InterfaceOptions() IfaceOptionSetter {
  182. return n
  183. }
  184. func (n *networkNamespace) NeighborOptions() NeighborOptionSetter {
  185. return n
  186. }
  187. func mountNetworkNamespace(basePath string, lnPath string) error {
  188. return syscall.Mount(basePath, lnPath, "bind", syscall.MS_BIND, "")
  189. }
  190. // GetSandboxForExternalKey returns sandbox object for the supplied path
  191. func GetSandboxForExternalKey(basePath string, key string) (Sandbox, error) {
  192. if err := createNamespaceFile(key); err != nil {
  193. return nil, err
  194. }
  195. if err := mountNetworkNamespace(basePath, key); err != nil {
  196. return nil, err
  197. }
  198. n := &networkNamespace{path: key}
  199. sboxNs, err := netns.GetFromPath(n.path)
  200. if err != nil {
  201. return nil, fmt.Errorf("failed get network namespace %q: %v", n.path, err)
  202. }
  203. defer sboxNs.Close()
  204. n.nlHandle, err = netlink.NewHandleAt(sboxNs, syscall.NETLINK_ROUTE)
  205. if err != nil {
  206. return nil, fmt.Errorf("failed to create a netlink handle: %v", err)
  207. }
  208. if err = n.loopbackUp(); err != nil {
  209. n.nlHandle.Delete()
  210. return nil, err
  211. }
  212. return n, nil
  213. }
  214. func reexecCreateNamespace() {
  215. if len(os.Args) < 2 {
  216. log.Fatal("no namespace path provided")
  217. }
  218. if err := mountNetworkNamespace("/proc/self/ns/net", os.Args[1]); err != nil {
  219. log.Fatal(err)
  220. }
  221. }
  222. func createNetworkNamespace(path string, osCreate bool) error {
  223. if err := createNamespaceFile(path); err != nil {
  224. return err
  225. }
  226. cmd := &exec.Cmd{
  227. Path: reexec.Self(),
  228. Args: append([]string{"netns-create"}, path),
  229. Stdout: os.Stdout,
  230. Stderr: os.Stderr,
  231. }
  232. if osCreate {
  233. cmd.SysProcAttr = &syscall.SysProcAttr{}
  234. cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWNET
  235. }
  236. if err := cmd.Run(); err != nil {
  237. return fmt.Errorf("namespace creation reexec command failed: %v", err)
  238. }
  239. return nil
  240. }
  241. func unmountNamespaceFile(path string) {
  242. if _, err := os.Stat(path); err == nil {
  243. syscall.Unmount(path, syscall.MNT_DETACH)
  244. }
  245. }
  246. func createNamespaceFile(path string) (err error) {
  247. var f *os.File
  248. once.Do(createBasePath)
  249. // Remove it from garbage collection list if present
  250. removeFromGarbagePaths(path)
  251. // If the path is there unmount it first
  252. unmountNamespaceFile(path)
  253. // wait for garbage collection to complete if it is in progress
  254. // before trying to create the file.
  255. gpmWg.Wait()
  256. if f, err = os.Create(path); err == nil {
  257. f.Close()
  258. }
  259. return err
  260. }
  261. func (n *networkNamespace) loopbackUp() error {
  262. iface, err := n.nlHandle.LinkByName("lo")
  263. if err != nil {
  264. return err
  265. }
  266. return n.nlHandle.LinkSetUp(iface)
  267. }
  268. func (n *networkNamespace) InvokeFunc(f func()) error {
  269. return nsInvoke(n.nsPath(), func(nsFD int) error { return nil }, func(callerFD int) error {
  270. f()
  271. return nil
  272. })
  273. }
  274. // InitOSContext initializes OS context while configuring network resources
  275. func InitOSContext() func() {
  276. runtime.LockOSThread()
  277. if err := ns.SetNamespace(); err != nil {
  278. log.Error(err)
  279. }
  280. return runtime.UnlockOSThread
  281. }
  282. func nsInvoke(path string, prefunc func(nsFD int) error, postfunc func(callerFD int) error) error {
  283. defer InitOSContext()()
  284. newNs, err := netns.GetFromPath(path)
  285. if err != nil {
  286. return fmt.Errorf("failed get network namespace %q: %v", path, err)
  287. }
  288. defer newNs.Close()
  289. // Invoked before the namespace switch happens but after the namespace file
  290. // handle is obtained.
  291. if err := prefunc(int(newNs)); err != nil {
  292. return fmt.Errorf("failed in prefunc: %v", err)
  293. }
  294. if err = netns.Set(newNs); err != nil {
  295. return err
  296. }
  297. defer ns.SetNamespace()
  298. // Invoked after the namespace switch.
  299. return postfunc(ns.ParseHandlerInt())
  300. }
  301. func (n *networkNamespace) nsPath() string {
  302. n.Lock()
  303. defer n.Unlock()
  304. return n.path
  305. }
  306. func (n *networkNamespace) Info() Info {
  307. return n
  308. }
  309. func (n *networkNamespace) Key() string {
  310. return n.path
  311. }
  312. func (n *networkNamespace) Destroy() error {
  313. if n.nlHandle != nil {
  314. n.nlHandle.Delete()
  315. }
  316. // Assuming no running process is executing in this network namespace,
  317. // unmounting is sufficient to destroy it.
  318. if err := syscall.Unmount(n.path, syscall.MNT_DETACH); err != nil {
  319. return err
  320. }
  321. // Stash it into the garbage collection list
  322. addToGarbagePaths(n.path)
  323. return nil
  324. }
  325. // Restore restore the network namespace
  326. func (n *networkNamespace) Restore(ifsopt map[string][]IfaceOption, routes []*types.StaticRoute, gw net.IP, gw6 net.IP) error {
  327. // restore interfaces
  328. for name, opts := range ifsopt {
  329. if !strings.Contains(name, "+") {
  330. return fmt.Errorf("wrong iface name in restore osl sandbox interface: %s", name)
  331. }
  332. seps := strings.Split(name, "+")
  333. srcName := seps[0]
  334. dstPrefix := seps[1]
  335. i := &nwIface{srcName: srcName, dstName: dstPrefix, ns: n}
  336. i.processInterfaceOptions(opts...)
  337. if i.master != "" {
  338. i.dstMaster = n.findDst(i.master, true)
  339. if i.dstMaster == "" {
  340. return fmt.Errorf("could not find an appropriate master %q for %q",
  341. i.master, i.srcName)
  342. }
  343. }
  344. if n.isDefault {
  345. i.dstName = i.srcName
  346. } else {
  347. links, err := n.nlHandle.LinkList()
  348. if err != nil {
  349. return fmt.Errorf("failed to retrieve list of links in network namespace %q during restore", n.path)
  350. }
  351. // due to the docker network connect/disconnect, so the dstName should
  352. // restore from the namespace
  353. for _, link := range links {
  354. addrs, err := n.nlHandle.AddrList(link, netlink.FAMILY_V4)
  355. if err != nil {
  356. return err
  357. }
  358. ifaceName := link.Attrs().Name
  359. if strings.HasPrefix(ifaceName, "vxlan") {
  360. if i.dstName == "vxlan" {
  361. i.dstName = ifaceName
  362. break
  363. }
  364. }
  365. // find the interface name by ip
  366. if i.address != nil {
  367. for _, addr := range addrs {
  368. if addr.IPNet.String() == i.address.String() {
  369. i.dstName = ifaceName
  370. break
  371. }
  372. continue
  373. }
  374. if i.dstName == ifaceName {
  375. break
  376. }
  377. }
  378. // This is to find the interface name of the pair in overlay sandbox
  379. if strings.HasPrefix(ifaceName, "veth") {
  380. if i.master != "" && i.dstName == "veth" {
  381. i.dstName = ifaceName
  382. }
  383. }
  384. }
  385. var index int
  386. indexStr := strings.TrimPrefix(i.dstName, dstPrefix)
  387. if indexStr != "" {
  388. index, err = strconv.Atoi(indexStr)
  389. if err != nil {
  390. return err
  391. }
  392. }
  393. index++
  394. n.Lock()
  395. if index > n.nextIfIndex {
  396. n.nextIfIndex = index
  397. }
  398. n.iFaces = append(n.iFaces, i)
  399. n.Unlock()
  400. }
  401. }
  402. // restore routes
  403. for _, r := range routes {
  404. n.Lock()
  405. n.staticRoutes = append(n.staticRoutes, r)
  406. n.Unlock()
  407. }
  408. // restore gateway
  409. if len(gw) > 0 {
  410. n.Lock()
  411. n.gw = gw
  412. n.Unlock()
  413. }
  414. if len(gw6) > 0 {
  415. n.Lock()
  416. n.gwv6 = gw6
  417. n.Unlock()
  418. }
  419. return nil
  420. }