ov_network.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. package overlay
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "net"
  6. "sync"
  7. "syscall"
  8. "github.com/Sirupsen/logrus"
  9. "github.com/docker/libnetwork/datastore"
  10. "github.com/docker/libnetwork/driverapi"
  11. "github.com/docker/libnetwork/netutils"
  12. "github.com/docker/libnetwork/osl"
  13. "github.com/docker/libnetwork/types"
  14. "github.com/vishvananda/netlink"
  15. "github.com/vishvananda/netlink/nl"
  16. )
  17. type networkTable map[string]*network
  18. type subnet struct {
  19. once *sync.Once
  20. vxlanName string
  21. brName string
  22. vni uint32
  23. initErr error
  24. subnetIP *net.IPNet
  25. gwIP *net.IPNet
  26. }
  27. type subnetJSON struct {
  28. SubnetIP string
  29. GwIP string
  30. Vni uint32
  31. }
  32. type network struct {
  33. id string
  34. dbIndex uint64
  35. dbExists bool
  36. sbox osl.Sandbox
  37. endpoints endpointTable
  38. driver *driver
  39. joinCnt int
  40. once *sync.Once
  41. initEpoch int
  42. initErr error
  43. subnets []*subnet
  44. sync.Mutex
  45. }
  46. func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error {
  47. if id == "" {
  48. return fmt.Errorf("invalid network id")
  49. }
  50. // Since we perform lazy configuration make sure we try
  51. // configuring the driver when we enter CreateNetwork
  52. if err := d.configure(); err != nil {
  53. return err
  54. }
  55. n := &network{
  56. id: id,
  57. driver: d,
  58. endpoints: endpointTable{},
  59. once: &sync.Once{},
  60. subnets: []*subnet{},
  61. }
  62. for _, ipd := range ipV4Data {
  63. s := &subnet{
  64. subnetIP: ipd.Pool,
  65. gwIP: ipd.Gateway,
  66. once: &sync.Once{},
  67. }
  68. n.subnets = append(n.subnets, s)
  69. }
  70. if err := n.writeToStore(); err != nil {
  71. return fmt.Errorf("failed to update data store for network %v: %v", n.id, err)
  72. }
  73. d.addNetwork(n)
  74. return nil
  75. }
  76. /* func (d *driver) createNetworkfromStore(nid string) (*network, error) {
  77. n := &network{
  78. id: nid,
  79. driver: d,
  80. endpoints: endpointTable{},
  81. once: &sync.Once{},
  82. subnets: []*subnet{},
  83. }
  84. err := d.store.GetObject(datastore.Key(n.Key()...), n)
  85. if err != nil {
  86. return nil, fmt.Errorf("unable to get network %q from data store, %v", nid, err)
  87. }
  88. return n, nil
  89. }*/
  90. func (d *driver) DeleteNetwork(nid string) error {
  91. if nid == "" {
  92. return fmt.Errorf("invalid network id")
  93. }
  94. n := d.network(nid)
  95. if n == nil {
  96. return fmt.Errorf("could not find network with id %s", nid)
  97. }
  98. d.deleteNetwork(nid)
  99. return n.releaseVxlanID()
  100. }
  101. func (n *network) incEndpointCount() {
  102. n.Lock()
  103. defer n.Unlock()
  104. n.joinCnt++
  105. }
  106. func (n *network) joinSandbox() error {
  107. // If there is a race between two go routines here only one will win
  108. // the other will wait.
  109. n.once.Do(func() {
  110. // save the error status of initSandbox in n.initErr so that
  111. // all the racing go routines are able to know the status.
  112. n.initErr = n.initSandbox()
  113. })
  114. return n.initErr
  115. }
  116. func (n *network) joinSubnetSandbox(s *subnet) error {
  117. s.once.Do(func() {
  118. s.initErr = n.initSubnetSandbox(s)
  119. })
  120. return s.initErr
  121. }
  122. func (n *network) leaveSandbox() {
  123. n.Lock()
  124. n.joinCnt--
  125. if n.joinCnt != 0 {
  126. n.Unlock()
  127. return
  128. }
  129. // We are about to destroy sandbox since the container is leaving the network
  130. // Reinitialize the once variable so that we will be able to trigger one time
  131. // sandbox initialization(again) when another container joins subsequently.
  132. n.once = &sync.Once{}
  133. for _, s := range n.subnets {
  134. s.once = &sync.Once{}
  135. }
  136. n.Unlock()
  137. n.destroySandbox()
  138. }
  139. func (n *network) destroySandbox() {
  140. sbox := n.sandbox()
  141. if sbox != nil {
  142. for _, iface := range sbox.Info().Interfaces() {
  143. iface.Remove()
  144. }
  145. for _, s := range n.subnets {
  146. if s.vxlanName != "" {
  147. err := deleteVxlan(s.vxlanName)
  148. if err != nil {
  149. logrus.Warnf("could not cleanup sandbox properly: %v", err)
  150. }
  151. }
  152. }
  153. sbox.Destroy()
  154. n.setSandbox(nil)
  155. }
  156. }
  157. func (n *network) initSubnetSandbox(s *subnet) error {
  158. // create a bridge and vxlan device for this subnet and move it to the sandbox
  159. brName, err := netutils.GenerateIfaceName("bridge", 7)
  160. if err != nil {
  161. return err
  162. }
  163. sbox := n.sandbox()
  164. if err := sbox.AddInterface(brName, "br",
  165. sbox.InterfaceOptions().Address(s.gwIP),
  166. sbox.InterfaceOptions().Bridge(true)); err != nil {
  167. return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err)
  168. }
  169. vxlanName, err := createVxlan(n.vxlanID(s))
  170. if err != nil {
  171. return err
  172. }
  173. if err := sbox.AddInterface(vxlanName, "vxlan",
  174. sbox.InterfaceOptions().Master(brName)); err != nil {
  175. return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err)
  176. }
  177. n.Lock()
  178. s.vxlanName = vxlanName
  179. s.brName = brName
  180. n.Unlock()
  181. return nil
  182. }
  183. func (n *network) initSandbox() error {
  184. n.Lock()
  185. n.initEpoch++
  186. n.Unlock()
  187. sbox, err := osl.NewSandbox(
  188. osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch)+n.id), true)
  189. if err != nil {
  190. return fmt.Errorf("could not create network sandbox: %v", err)
  191. }
  192. n.setSandbox(sbox)
  193. n.driver.peerDbUpdateSandbox(n.id)
  194. var nlSock *nl.NetlinkSocket
  195. sbox.InvokeFunc(func() {
  196. nlSock, err = nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_NEIGH)
  197. if err != nil {
  198. err = fmt.Errorf("failed to subscribe to neighbor group netlink messages")
  199. }
  200. })
  201. go n.watchMiss(nlSock)
  202. return nil
  203. }
  204. func (n *network) watchMiss(nlSock *nl.NetlinkSocket) {
  205. for {
  206. msgs, err := nlSock.Receive()
  207. if err != nil {
  208. logrus.Errorf("Failed to receive from netlink: %v ", err)
  209. continue
  210. }
  211. for _, msg := range msgs {
  212. if msg.Header.Type != syscall.RTM_GETNEIGH && msg.Header.Type != syscall.RTM_NEWNEIGH {
  213. continue
  214. }
  215. neigh, err := netlink.NeighDeserialize(msg.Data)
  216. if err != nil {
  217. logrus.Errorf("Failed to deserialize netlink ndmsg: %v", err)
  218. continue
  219. }
  220. if neigh.IP.To16() != nil {
  221. continue
  222. }
  223. if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 {
  224. continue
  225. }
  226. mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, neigh.IP)
  227. if err != nil {
  228. logrus.Errorf("could not resolve peer %q: %v", neigh.IP, err)
  229. continue
  230. }
  231. if err := n.driver.peerAdd(n.id, "dummy", neigh.IP, IPmask, mac, vtep, true); err != nil {
  232. logrus.Errorf("could not add neighbor entry for missed peer %q: %v", neigh.IP, err)
  233. }
  234. }
  235. }
  236. }
  237. func (d *driver) addNetwork(n *network) {
  238. d.Lock()
  239. d.networks[n.id] = n
  240. d.Unlock()
  241. }
  242. func (d *driver) deleteNetwork(nid string) {
  243. d.Lock()
  244. delete(d.networks, nid)
  245. d.Unlock()
  246. }
  247. func (d *driver) network(nid string) *network {
  248. d.Lock()
  249. networks := d.networks
  250. d.Unlock()
  251. n, ok := networks[nid]
  252. if !ok {
  253. n = d.getNetworkFromStore(nid)
  254. if n != nil {
  255. n.driver = d
  256. n.endpoints = endpointTable{}
  257. n.once = &sync.Once{}
  258. networks[nid] = n
  259. }
  260. }
  261. return n
  262. }
  263. func (d *driver) getNetworkFromStore(nid string) *network {
  264. if d.store == nil {
  265. return nil
  266. }
  267. n := &network{id: nid}
  268. if err := d.store.GetObject(datastore.Key(n.Key()...), n); err != nil {
  269. return nil
  270. }
  271. return n
  272. }
  273. func (n *network) sandbox() osl.Sandbox {
  274. n.Lock()
  275. defer n.Unlock()
  276. return n.sbox
  277. }
  278. func (n *network) setSandbox(sbox osl.Sandbox) {
  279. n.Lock()
  280. n.sbox = sbox
  281. n.Unlock()
  282. }
  283. func (n *network) vxlanID(s *subnet) uint32 {
  284. n.Lock()
  285. defer n.Unlock()
  286. return s.vni
  287. }
  288. func (n *network) setVxlanID(s *subnet, vni uint32) {
  289. n.Lock()
  290. s.vni = vni
  291. n.Unlock()
  292. }
  293. func (n *network) Key() []string {
  294. return []string{"overlay", "network", n.id}
  295. }
  296. func (n *network) KeyPrefix() []string {
  297. return []string{"overlay", "network"}
  298. }
  299. func (n *network) Value() []byte {
  300. netJSON := []*subnetJSON{}
  301. for _, s := range n.subnets {
  302. sj := &subnetJSON{
  303. SubnetIP: s.subnetIP.String(),
  304. GwIP: s.gwIP.String(),
  305. Vni: s.vni,
  306. }
  307. netJSON = append(netJSON, sj)
  308. }
  309. b, err := json.Marshal(netJSON)
  310. if err != nil {
  311. return []byte{}
  312. }
  313. return b
  314. }
  315. func (n *network) Index() uint64 {
  316. return n.dbIndex
  317. }
  318. func (n *network) SetIndex(index uint64) {
  319. n.dbIndex = index
  320. n.dbExists = true
  321. }
  322. func (n *network) Exists() bool {
  323. return n.dbExists
  324. }
  325. func (n *network) Skip() bool {
  326. return false
  327. }
  328. func (n *network) SetValue(value []byte) error {
  329. var newNet bool
  330. netJSON := []*subnetJSON{}
  331. err := json.Unmarshal(value, &netJSON)
  332. if err != nil {
  333. return err
  334. }
  335. if len(n.subnets) == 0 {
  336. newNet = true
  337. }
  338. for _, sj := range netJSON {
  339. subnetIPstr := sj.SubnetIP
  340. gwIPstr := sj.GwIP
  341. vni := sj.Vni
  342. subnetIP, _ := types.ParseCIDR(subnetIPstr)
  343. gwIP, _ := types.ParseCIDR(gwIPstr)
  344. if newNet {
  345. s := &subnet{
  346. subnetIP: subnetIP,
  347. gwIP: gwIP,
  348. vni: vni,
  349. once: &sync.Once{},
  350. }
  351. n.subnets = append(n.subnets, s)
  352. } else {
  353. sNet := n.getMatchingSubnet(subnetIP)
  354. if sNet != nil {
  355. sNet.vni = vni
  356. }
  357. }
  358. }
  359. return nil
  360. }
  361. func (n *network) DataScope() string {
  362. return datastore.GlobalScope
  363. }
  364. func (n *network) writeToStore() error {
  365. return n.driver.store.PutObjectAtomic(n)
  366. }
  367. func (n *network) releaseVxlanID() error {
  368. if n.driver.store == nil {
  369. return fmt.Errorf("no datastore configured. cannot release vxlan id")
  370. }
  371. if len(n.subnets) == 0 {
  372. return nil
  373. }
  374. if err := n.driver.store.DeleteObjectAtomic(n); err != nil {
  375. if err == datastore.ErrKeyModified || err == datastore.ErrKeyNotFound {
  376. // In both the above cases we can safely assume that the key has been removed by some other
  377. // instance and so simply get out of here
  378. return nil
  379. }
  380. return fmt.Errorf("failed to delete network to vxlan id map: %v", err)
  381. }
  382. for _, s := range n.subnets {
  383. n.driver.vxlanIdm.Release(uint64(n.vxlanID(s)))
  384. n.setVxlanID(s, 0)
  385. }
  386. return nil
  387. }
  388. func (n *network) obtainVxlanID(s *subnet) error {
  389. //return if the subnet already has a vxlan id assigned
  390. if s.vni != 0 {
  391. return nil
  392. }
  393. if n.driver.store == nil {
  394. return fmt.Errorf("no datastore configured. cannot obtain vxlan id")
  395. }
  396. for {
  397. if err := n.driver.store.GetObject(datastore.Key(n.Key()...), n); err != nil {
  398. return fmt.Errorf("getting network %q from datastore failed %v", n.id, err)
  399. }
  400. if s.vni == 0 {
  401. vxlanID, err := n.driver.vxlanIdm.GetID()
  402. if err != nil {
  403. return fmt.Errorf("failed to allocate vxlan id: %v", err)
  404. }
  405. n.setVxlanID(s, uint32(vxlanID))
  406. if err := n.writeToStore(); err != nil {
  407. n.driver.vxlanIdm.Release(uint64(n.vxlanID(s)))
  408. n.setVxlanID(s, 0)
  409. if err == datastore.ErrKeyModified {
  410. continue
  411. }
  412. return fmt.Errorf("network %q failed to update data store: %v", n.id, err)
  413. }
  414. return nil
  415. }
  416. return nil
  417. }
  418. }
  419. // getSubnetforIP returns the subnet to which the given IP belongs
  420. func (n *network) getSubnetforIP(ip *net.IPNet) *subnet {
  421. for _, s := range n.subnets {
  422. // first check if the mask lengths are the same
  423. i, _ := s.subnetIP.Mask.Size()
  424. j, _ := ip.Mask.Size()
  425. if i != j {
  426. continue
  427. }
  428. if s.subnetIP.Contains(ip.IP) {
  429. return s
  430. }
  431. }
  432. return nil
  433. }
  434. // getMatchingSubnet return the network's subnet that matches the input
  435. func (n *network) getMatchingSubnet(ip *net.IPNet) *subnet {
  436. if ip == nil {
  437. return nil
  438. }
  439. for _, s := range n.subnets {
  440. // first check if the mask lengths are the same
  441. i, _ := s.subnetIP.Mask.Size()
  442. j, _ := ip.Mask.Size()
  443. if i != j {
  444. continue
  445. }
  446. if s.subnetIP.IP.Equal(ip.IP) {
  447. return s
  448. }
  449. }
  450. return nil
  451. }