ov_network.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671
  1. package overlay
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "net"
  6. "os"
  7. "path/filepath"
  8. "strings"
  9. "sync"
  10. "syscall"
  11. "github.com/Sirupsen/logrus"
  12. "github.com/docker/libnetwork/datastore"
  13. "github.com/docker/libnetwork/driverapi"
  14. "github.com/docker/libnetwork/netutils"
  15. "github.com/docker/libnetwork/osl"
  16. "github.com/docker/libnetwork/resolvconf"
  17. "github.com/docker/libnetwork/types"
  18. "github.com/vishvananda/netlink"
  19. "github.com/vishvananda/netlink/nl"
  20. )
  21. var (
  22. hostMode bool
  23. hostModeOnce sync.Once
  24. )
  25. type networkTable map[string]*network
  26. type subnet struct {
  27. once *sync.Once
  28. vxlanName string
  29. brName string
  30. vni uint32
  31. initErr error
  32. subnetIP *net.IPNet
  33. gwIP *net.IPNet
  34. }
  35. type subnetJSON struct {
  36. SubnetIP string
  37. GwIP string
  38. Vni uint32
  39. }
  40. type network struct {
  41. id string
  42. dbIndex uint64
  43. dbExists bool
  44. sbox osl.Sandbox
  45. endpoints endpointTable
  46. driver *driver
  47. joinCnt int
  48. once *sync.Once
  49. initEpoch int
  50. initErr error
  51. subnets []*subnet
  52. sync.Mutex
  53. }
  54. func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error {
  55. if id == "" {
  56. return fmt.Errorf("invalid network id")
  57. }
  58. if len(ipV4Data) == 0 || ipV4Data[0].Pool.String() == "0.0.0.0/0" {
  59. return types.BadRequestErrorf("ipv4 pool is empty")
  60. }
  61. // Since we perform lazy configuration make sure we try
  62. // configuring the driver when we enter CreateNetwork
  63. if err := d.configure(); err != nil {
  64. return err
  65. }
  66. n := &network{
  67. id: id,
  68. driver: d,
  69. endpoints: endpointTable{},
  70. once: &sync.Once{},
  71. subnets: []*subnet{},
  72. }
  73. for _, ipd := range ipV4Data {
  74. s := &subnet{
  75. subnetIP: ipd.Pool,
  76. gwIP: ipd.Gateway,
  77. once: &sync.Once{},
  78. }
  79. n.subnets = append(n.subnets, s)
  80. }
  81. if err := n.writeToStore(); err != nil {
  82. return fmt.Errorf("failed to update data store for network %v: %v", n.id, err)
  83. }
  84. d.addNetwork(n)
  85. return nil
  86. }
  87. func (d *driver) DeleteNetwork(nid string) error {
  88. if nid == "" {
  89. return fmt.Errorf("invalid network id")
  90. }
  91. n := d.network(nid)
  92. if n == nil {
  93. return fmt.Errorf("could not find network with id %s", nid)
  94. }
  95. d.deleteNetwork(nid)
  96. return n.releaseVxlanID()
  97. }
  98. func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error {
  99. return nil
  100. }
  101. func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
  102. return nil
  103. }
  104. func (n *network) incEndpointCount() {
  105. n.Lock()
  106. defer n.Unlock()
  107. n.joinCnt++
  108. }
  109. func (n *network) joinSandbox() error {
  110. // If there is a race between two go routines here only one will win
  111. // the other will wait.
  112. n.once.Do(func() {
  113. // save the error status of initSandbox in n.initErr so that
  114. // all the racing go routines are able to know the status.
  115. n.initErr = n.initSandbox()
  116. })
  117. return n.initErr
  118. }
  119. func (n *network) joinSubnetSandbox(s *subnet) error {
  120. s.once.Do(func() {
  121. s.initErr = n.initSubnetSandbox(s)
  122. })
  123. return s.initErr
  124. }
  125. func (n *network) leaveSandbox() {
  126. n.Lock()
  127. defer n.Unlock()
  128. n.joinCnt--
  129. if n.joinCnt != 0 {
  130. return
  131. }
  132. // We are about to destroy sandbox since the container is leaving the network
  133. // Reinitialize the once variable so that we will be able to trigger one time
  134. // sandbox initialization(again) when another container joins subsequently.
  135. n.once = &sync.Once{}
  136. for _, s := range n.subnets {
  137. s.once = &sync.Once{}
  138. }
  139. n.destroySandbox()
  140. }
  141. // to be called while holding network lock
  142. func (n *network) destroySandbox() {
  143. if n.sbox != nil {
  144. for _, iface := range n.sbox.Info().Interfaces() {
  145. if err := iface.Remove(); err != nil {
  146. logrus.Debugf("Remove interface %s failed: %v", iface.SrcName(), err)
  147. }
  148. }
  149. for _, s := range n.subnets {
  150. if hostMode {
  151. if err := removeFilters(n.id[:12], s.brName); err != nil {
  152. logrus.Warnf("Could not remove overlay filters: %v", err)
  153. }
  154. }
  155. if s.vxlanName != "" {
  156. err := deleteInterface(s.vxlanName)
  157. if err != nil {
  158. logrus.Warnf("could not cleanup sandbox properly: %v", err)
  159. }
  160. }
  161. }
  162. if hostMode {
  163. if err := removeNetworkChain(n.id[:12]); err != nil {
  164. logrus.Warnf("could not remove network chain: %v", err)
  165. }
  166. }
  167. n.sbox.Destroy()
  168. n.sbox = nil
  169. }
  170. }
  171. func setHostMode() {
  172. if os.Getenv("_OVERLAY_HOST_MODE") != "" {
  173. hostMode = true
  174. return
  175. }
  176. err := createVxlan("testvxlan", 1)
  177. if err != nil {
  178. logrus.Errorf("Failed to create testvxlan interface: %v", err)
  179. return
  180. }
  181. defer deleteInterface("testvxlan")
  182. path := "/proc/self/ns/net"
  183. f, err := os.OpenFile(path, os.O_RDONLY, 0)
  184. if err != nil {
  185. logrus.Errorf("Failed to open path %s for network namespace for setting host mode: %v", path, err)
  186. return
  187. }
  188. defer f.Close()
  189. nsFD := f.Fd()
  190. iface, err := netlink.LinkByName("testvxlan")
  191. if err != nil {
  192. logrus.Errorf("Failed to get link testvxlan: %v", err)
  193. return
  194. }
  195. // If we are not able to move the vxlan interface to a namespace
  196. // then fallback to host mode
  197. if err := netlink.LinkSetNsFd(iface, int(nsFD)); err != nil {
  198. hostMode = true
  199. }
  200. }
  201. func (n *network) generateVxlanName(s *subnet) string {
  202. return "vx-" + fmt.Sprintf("%06x", n.vxlanID(s)) + "-" + n.id[:5]
  203. }
  204. func (n *network) generateBridgeName(s *subnet) string {
  205. return "ov-" + fmt.Sprintf("%06x", n.vxlanID(s)) + "-" + n.id[:5]
  206. }
  207. func isOverlap(nw *net.IPNet) bool {
  208. var nameservers []string
  209. if rc, err := resolvconf.Get(); err == nil {
  210. nameservers = resolvconf.GetNameserversAsCIDR(rc.Content)
  211. }
  212. if err := netutils.CheckNameserverOverlaps(nameservers, nw); err != nil {
  213. return true
  214. }
  215. if err := netutils.CheckRouteOverlaps(nw); err != nil {
  216. return true
  217. }
  218. return false
  219. }
  220. func (n *network) initSubnetSandbox(s *subnet) error {
  221. brName := n.generateBridgeName(s)
  222. vxlanName := n.generateVxlanName(s)
  223. if hostMode {
  224. // Try to delete stale bridge interface if it exists
  225. deleteInterface(brName)
  226. // Try to delete the vxlan interface by vni if already present
  227. deleteVxlanByVNI(n.vxlanID(s))
  228. if isOverlap(s.subnetIP) {
  229. return fmt.Errorf("overlay subnet %s has conflicts in the host while running in host mode", s.subnetIP.String())
  230. }
  231. }
  232. // create a bridge and vxlan device for this subnet and move it to the sandbox
  233. sbox := n.sandbox()
  234. if err := sbox.AddInterface(brName, "br",
  235. sbox.InterfaceOptions().Address(s.gwIP),
  236. sbox.InterfaceOptions().Bridge(true)); err != nil {
  237. return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err)
  238. }
  239. err := createVxlan(vxlanName, n.vxlanID(s))
  240. if err != nil {
  241. return err
  242. }
  243. if err := sbox.AddInterface(vxlanName, "vxlan",
  244. sbox.InterfaceOptions().Master(brName)); err != nil {
  245. return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err)
  246. }
  247. if hostMode {
  248. if err := addFilters(n.id[:12], brName); err != nil {
  249. return err
  250. }
  251. }
  252. n.Lock()
  253. s.vxlanName = vxlanName
  254. s.brName = brName
  255. n.Unlock()
  256. return nil
  257. }
  258. func (n *network) cleanupStaleSandboxes() {
  259. filepath.Walk(filepath.Dir(osl.GenerateKey("walk")),
  260. func(path string, info os.FileInfo, err error) error {
  261. _, fname := filepath.Split(path)
  262. pList := strings.Split(fname, "-")
  263. if len(pList) <= 1 {
  264. return nil
  265. }
  266. pattern := pList[1]
  267. if strings.Contains(n.id, pattern) {
  268. syscall.Unmount(path, syscall.MNT_DETACH)
  269. os.Remove(path)
  270. }
  271. return nil
  272. })
  273. }
  274. func (n *network) initSandbox() error {
  275. n.Lock()
  276. n.initEpoch++
  277. n.Unlock()
  278. hostModeOnce.Do(setHostMode)
  279. if hostMode {
  280. if err := addNetworkChain(n.id[:12]); err != nil {
  281. return err
  282. }
  283. }
  284. // If there are any stale sandboxes related to this network
  285. // from previous daemon life clean it up here
  286. n.cleanupStaleSandboxes()
  287. sbox, err := osl.NewSandbox(
  288. osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch)+n.id), !hostMode)
  289. if err != nil {
  290. return fmt.Errorf("could not create network sandbox: %v", err)
  291. }
  292. n.setSandbox(sbox)
  293. n.driver.peerDbUpdateSandbox(n.id)
  294. var nlSock *nl.NetlinkSocket
  295. sbox.InvokeFunc(func() {
  296. nlSock, err = nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_NEIGH)
  297. if err != nil {
  298. err = fmt.Errorf("failed to subscribe to neighbor group netlink messages")
  299. }
  300. })
  301. go n.watchMiss(nlSock)
  302. return nil
  303. }
  304. func (n *network) watchMiss(nlSock *nl.NetlinkSocket) {
  305. for {
  306. msgs, err := nlSock.Receive()
  307. if err != nil {
  308. logrus.Errorf("Failed to receive from netlink: %v ", err)
  309. continue
  310. }
  311. for _, msg := range msgs {
  312. if msg.Header.Type != syscall.RTM_GETNEIGH && msg.Header.Type != syscall.RTM_NEWNEIGH {
  313. continue
  314. }
  315. neigh, err := netlink.NeighDeserialize(msg.Data)
  316. if err != nil {
  317. logrus.Errorf("Failed to deserialize netlink ndmsg: %v", err)
  318. continue
  319. }
  320. if neigh.IP.To16() != nil {
  321. continue
  322. }
  323. if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 {
  324. continue
  325. }
  326. mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, neigh.IP)
  327. if err != nil {
  328. logrus.Errorf("could not resolve peer %q: %v", neigh.IP, err)
  329. continue
  330. }
  331. if err := n.driver.peerAdd(n.id, "dummy", neigh.IP, IPmask, mac, vtep, true); err != nil {
  332. logrus.Errorf("could not add neighbor entry for missed peer %q: %v", neigh.IP, err)
  333. }
  334. }
  335. }
  336. }
  337. func (d *driver) addNetwork(n *network) {
  338. d.Lock()
  339. d.networks[n.id] = n
  340. d.Unlock()
  341. }
  342. func (d *driver) deleteNetwork(nid string) {
  343. d.Lock()
  344. delete(d.networks, nid)
  345. d.Unlock()
  346. }
  347. func (d *driver) network(nid string) *network {
  348. d.Lock()
  349. networks := d.networks
  350. d.Unlock()
  351. n, ok := networks[nid]
  352. if !ok {
  353. n = d.getNetworkFromStore(nid)
  354. if n != nil {
  355. n.driver = d
  356. n.endpoints = endpointTable{}
  357. n.once = &sync.Once{}
  358. networks[nid] = n
  359. }
  360. }
  361. return n
  362. }
  363. func (d *driver) getNetworkFromStore(nid string) *network {
  364. if d.store == nil {
  365. return nil
  366. }
  367. n := &network{id: nid}
  368. if err := d.store.GetObject(datastore.Key(n.Key()...), n); err != nil {
  369. return nil
  370. }
  371. return n
  372. }
  373. func (n *network) sandbox() osl.Sandbox {
  374. n.Lock()
  375. defer n.Unlock()
  376. return n.sbox
  377. }
  378. func (n *network) setSandbox(sbox osl.Sandbox) {
  379. n.Lock()
  380. n.sbox = sbox
  381. n.Unlock()
  382. }
  383. func (n *network) vxlanID(s *subnet) uint32 {
  384. n.Lock()
  385. defer n.Unlock()
  386. return s.vni
  387. }
  388. func (n *network) setVxlanID(s *subnet, vni uint32) {
  389. n.Lock()
  390. s.vni = vni
  391. n.Unlock()
  392. }
  393. func (n *network) Key() []string {
  394. return []string{"overlay", "network", n.id}
  395. }
  396. func (n *network) KeyPrefix() []string {
  397. return []string{"overlay", "network"}
  398. }
  399. func (n *network) Value() []byte {
  400. netJSON := []*subnetJSON{}
  401. for _, s := range n.subnets {
  402. sj := &subnetJSON{
  403. SubnetIP: s.subnetIP.String(),
  404. GwIP: s.gwIP.String(),
  405. Vni: s.vni,
  406. }
  407. netJSON = append(netJSON, sj)
  408. }
  409. b, err := json.Marshal(netJSON)
  410. if err != nil {
  411. return []byte{}
  412. }
  413. return b
  414. }
  415. func (n *network) Index() uint64 {
  416. return n.dbIndex
  417. }
  418. func (n *network) SetIndex(index uint64) {
  419. n.dbIndex = index
  420. n.dbExists = true
  421. }
  422. func (n *network) Exists() bool {
  423. return n.dbExists
  424. }
  425. func (n *network) Skip() bool {
  426. return false
  427. }
  428. func (n *network) SetValue(value []byte) error {
  429. var newNet bool
  430. netJSON := []*subnetJSON{}
  431. err := json.Unmarshal(value, &netJSON)
  432. if err != nil {
  433. return err
  434. }
  435. if len(n.subnets) == 0 {
  436. newNet = true
  437. }
  438. for _, sj := range netJSON {
  439. subnetIPstr := sj.SubnetIP
  440. gwIPstr := sj.GwIP
  441. vni := sj.Vni
  442. subnetIP, _ := types.ParseCIDR(subnetIPstr)
  443. gwIP, _ := types.ParseCIDR(gwIPstr)
  444. if newNet {
  445. s := &subnet{
  446. subnetIP: subnetIP,
  447. gwIP: gwIP,
  448. vni: vni,
  449. once: &sync.Once{},
  450. }
  451. n.subnets = append(n.subnets, s)
  452. } else {
  453. sNet := n.getMatchingSubnet(subnetIP)
  454. if sNet != nil {
  455. sNet.vni = vni
  456. }
  457. }
  458. }
  459. return nil
  460. }
  461. func (n *network) DataScope() string {
  462. return datastore.GlobalScope
  463. }
  464. func (n *network) writeToStore() error {
  465. return n.driver.store.PutObjectAtomic(n)
  466. }
  467. func (n *network) releaseVxlanID() error {
  468. if n.driver.store == nil {
  469. return fmt.Errorf("no datastore configured. cannot release vxlan id")
  470. }
  471. if len(n.subnets) == 0 {
  472. return nil
  473. }
  474. if err := n.driver.store.DeleteObjectAtomic(n); err != nil {
  475. if err == datastore.ErrKeyModified || err == datastore.ErrKeyNotFound {
  476. // In both the above cases we can safely assume that the key has been removed by some other
  477. // instance and so simply get out of here
  478. return nil
  479. }
  480. return fmt.Errorf("failed to delete network to vxlan id map: %v", err)
  481. }
  482. for _, s := range n.subnets {
  483. n.driver.vxlanIdm.Release(uint64(n.vxlanID(s)))
  484. n.setVxlanID(s, 0)
  485. }
  486. return nil
  487. }
  488. func (n *network) obtainVxlanID(s *subnet) error {
  489. //return if the subnet already has a vxlan id assigned
  490. if s.vni != 0 {
  491. return nil
  492. }
  493. if n.driver.store == nil {
  494. return fmt.Errorf("no datastore configured. cannot obtain vxlan id")
  495. }
  496. for {
  497. if err := n.driver.store.GetObject(datastore.Key(n.Key()...), n); err != nil {
  498. return fmt.Errorf("getting network %q from datastore failed %v", n.id, err)
  499. }
  500. if s.vni == 0 {
  501. vxlanID, err := n.driver.vxlanIdm.GetID()
  502. if err != nil {
  503. return fmt.Errorf("failed to allocate vxlan id: %v", err)
  504. }
  505. n.setVxlanID(s, uint32(vxlanID))
  506. if err := n.writeToStore(); err != nil {
  507. n.driver.vxlanIdm.Release(uint64(n.vxlanID(s)))
  508. n.setVxlanID(s, 0)
  509. if err == datastore.ErrKeyModified {
  510. continue
  511. }
  512. return fmt.Errorf("network %q failed to update data store: %v", n.id, err)
  513. }
  514. return nil
  515. }
  516. return nil
  517. }
  518. }
  519. // getSubnetforIP returns the subnet to which the given IP belongs
  520. func (n *network) getSubnetforIP(ip *net.IPNet) *subnet {
  521. for _, s := range n.subnets {
  522. // first check if the mask lengths are the same
  523. i, _ := s.subnetIP.Mask.Size()
  524. j, _ := ip.Mask.Size()
  525. if i != j {
  526. continue
  527. }
  528. if s.subnetIP.Contains(ip.IP) {
  529. return s
  530. }
  531. }
  532. return nil
  533. }
  534. // getMatchingSubnet return the network's subnet that matches the input
  535. func (n *network) getMatchingSubnet(ip *net.IPNet) *subnet {
  536. if ip == nil {
  537. return nil
  538. }
  539. for _, s := range n.subnets {
  540. // first check if the mask lengths are the same
  541. i, _ := s.subnetIP.Mask.Size()
  542. j, _ := ip.Mask.Size()
  543. if i != j {
  544. continue
  545. }
  546. if s.subnetIP.IP.Equal(ip.IP) {
  547. return s
  548. }
  549. }
  550. return nil
  551. }