allocator.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624
  1. package ipam
  2. import (
  3. "fmt"
  4. "net"
  5. "strings"
  6. "sync"
  7. log "github.com/Sirupsen/logrus"
  8. "github.com/docker/libkv/store"
  9. "github.com/docker/libnetwork/bitseq"
  10. "github.com/docker/libnetwork/datastore"
  11. "github.com/docker/libnetwork/types"
  12. )
  13. const (
  14. // The biggest configurable host subnets
  15. minNetSize = 8
  16. minNetSizeV6 = 64
  17. // The effective network size for v6
  18. minNetSizeV6Eff = 96
  19. // The size of the host subnet used internally, it's the most granular sequence addresses
  20. defaultInternalHostSize = 16
  21. // datastore keyes for ipam objects
  22. dsConfigKey = "ipam-config" // ipam-config/<domain>/<map of subent configs>
  23. dsDataKey = "ipam-data" // ipam-data/<domain>/<subnet>/<child-sudbnet>/<bitmask>
  24. )
  25. // Allocator provides per address space ipv4/ipv6 book keeping
  26. type Allocator struct {
  27. // The internal subnets host size
  28. internalHostSize int
  29. // Static subnet information
  30. subnets map[subnetKey]*SubnetInfo
  31. // Allocated addresses in each address space's internal subnet
  32. addresses map[subnetKey]*bitseq.Handle
  33. // Datastore
  34. store datastore.DataStore
  35. App string
  36. ID string
  37. dbIndex uint64
  38. dbExists bool
  39. sync.Mutex
  40. }
  41. // NewAllocator returns an instance of libnetwork ipam
  42. func NewAllocator(ds datastore.DataStore) (*Allocator, error) {
  43. a := &Allocator{}
  44. a.subnets = make(map[subnetKey]*SubnetInfo)
  45. a.addresses = make(map[subnetKey]*bitseq.Handle)
  46. a.internalHostSize = defaultInternalHostSize
  47. a.store = ds
  48. a.App = "ipam"
  49. a.ID = dsConfigKey
  50. if a.store == nil {
  51. return a, nil
  52. }
  53. // Register for status changes
  54. a.watchForChanges()
  55. // Get the initial subnet configs status from the ds if present.
  56. kvPair, err := a.store.KVStore().Get(datastore.Key(a.Key()...))
  57. if err != nil {
  58. if err != store.ErrKeyNotFound {
  59. return nil, fmt.Errorf("failed to retrieve the ipam subnet configs from datastore: %v", err)
  60. }
  61. return a, nil
  62. }
  63. a.subnetConfigFromStore(kvPair)
  64. // Now retrieve the list of small subnets
  65. var inserterList []func() error
  66. a.Lock()
  67. for k, v := range a.subnets {
  68. inserterList = append(inserterList,
  69. func() error {
  70. subnetList, err := getInternalSubnets(v.Subnet, a.internalHostSize)
  71. if err != nil {
  72. return fmt.Errorf("failed to load address bitmask for configured subnet %s because of %s", v.Subnet.String(), err.Error())
  73. }
  74. return a.insertAddressMasks(k, subnetList)
  75. })
  76. }
  77. a.Unlock()
  78. // Add the bitmasks, data could come from datastore
  79. for _, f := range inserterList {
  80. if err := f(); err != nil {
  81. return nil, err
  82. }
  83. }
  84. return a, nil
  85. }
  86. func (a *Allocator) subnetConfigFromStore(kvPair *store.KVPair) {
  87. a.Lock()
  88. if a.dbIndex < kvPair.LastIndex {
  89. a.subnets = byteArrayToSubnets(kvPair.Value)
  90. a.dbIndex = kvPair.LastIndex
  91. a.dbExists = true
  92. }
  93. a.Unlock()
  94. }
  95. // Pointer to the configured subnets in each address space
  96. type subnetKey struct {
  97. addressSpace AddressSpace
  98. subnet string
  99. childSubnet string
  100. }
  101. func (s *subnetKey) String() string {
  102. k := fmt.Sprintf("%s/%s", s.addressSpace, s.subnet)
  103. if s.childSubnet != "" {
  104. k = fmt.Sprintf("%s/%s", k, s.childSubnet)
  105. }
  106. return k
  107. }
  108. func (s *subnetKey) FromString(str string) error {
  109. if str == "" || !strings.Contains(str, "/") {
  110. return fmt.Errorf("invalid string form for subnetkey: %s", str)
  111. }
  112. p := strings.Split(str, "/")
  113. if len(p) != 3 && len(p) != 5 {
  114. return fmt.Errorf("invalid string form for subnetkey: %s", str)
  115. }
  116. s.addressSpace = AddressSpace(p[0])
  117. s.subnet = fmt.Sprintf("%s/%s", p[1], p[2])
  118. if len(p) == 5 {
  119. s.childSubnet = fmt.Sprintf("%s/%s", p[1], p[2])
  120. }
  121. return nil
  122. }
  123. func (s *subnetKey) canonicalSubnet() *net.IPNet {
  124. if _, sub, err := net.ParseCIDR(s.subnet); err == nil {
  125. return sub
  126. }
  127. return nil
  128. }
  129. func (s *subnetKey) canonicalChildSubnet() *net.IPNet {
  130. if _, sub, err := net.ParseCIDR(s.childSubnet); err == nil {
  131. return sub
  132. }
  133. return nil
  134. }
  135. type ipVersion int
  136. const (
  137. v4 = 4
  138. v6 = 6
  139. )
  140. /*******************
  141. * IPAMConf Contract
  142. ********************/
  143. // AddSubnet adds a subnet for the specified address space
  144. func (a *Allocator) AddSubnet(addrSpace AddressSpace, subnetInfo *SubnetInfo) error {
  145. // Sanity check
  146. if addrSpace == "" {
  147. return ErrInvalidAddressSpace
  148. }
  149. if subnetInfo == nil || subnetInfo.Subnet == nil {
  150. return ErrInvalidSubnet
  151. }
  152. // Convert to smaller internal subnets (if needed)
  153. subnetList, err := getInternalSubnets(subnetInfo.Subnet, a.internalHostSize)
  154. if err != nil {
  155. return err
  156. }
  157. retry:
  158. if a.contains(addrSpace, subnetInfo) {
  159. return ErrOverlapSubnet
  160. }
  161. // Store the configured subnet and sync to datatstore
  162. key := subnetKey{addrSpace, subnetInfo.Subnet.String(), ""}
  163. a.Lock()
  164. a.subnets[key] = subnetInfo
  165. a.Unlock()
  166. err = a.writeToStore()
  167. if err != nil {
  168. if _, ok := err.(types.RetryError); !ok {
  169. return types.InternalErrorf("subnet configuration failed because of %s", err.Error())
  170. }
  171. // Update to latest
  172. if erru := a.readFromStore(); erru != nil {
  173. // Restore and bail out
  174. a.Lock()
  175. delete(a.addresses, key)
  176. a.Unlock()
  177. return fmt.Errorf("failed to get updated subnets config from datastore (%v) after (%v)", erru, err)
  178. }
  179. goto retry
  180. }
  181. // Insert respective bitmasks for this subnet
  182. a.insertAddressMasks(key, subnetList)
  183. return nil
  184. }
  185. // Create and insert the internal subnet(s) addresses masks into the address database. Mask data may come from the bitseq datastore.
  186. func (a *Allocator) insertAddressMasks(parentKey subnetKey, internalSubnetList []*net.IPNet) error {
  187. ipVer := getAddressVersion(internalSubnetList[0].IP)
  188. num := len(internalSubnetList)
  189. ones, bits := internalSubnetList[0].Mask.Size()
  190. numAddresses := 1 << uint(bits-ones)
  191. for i := 0; i < num; i++ {
  192. smallKey := subnetKey{parentKey.addressSpace, parentKey.subnet, internalSubnetList[i].String()}
  193. limit := uint32(numAddresses)
  194. if ipVer == v4 && i == num-1 {
  195. // Do not let broadcast address be reserved
  196. limit--
  197. }
  198. // Generate the new address masks. AddressMask content may come from datastore
  199. h, err := bitseq.NewHandle(dsDataKey, a.getStore(), smallKey.String(), limit)
  200. if err != nil {
  201. return err
  202. }
  203. if ipVer == v4 && i == 0 {
  204. // Do not let network identifier address be reserved
  205. h.Set(0)
  206. }
  207. a.Lock()
  208. a.addresses[smallKey] = h
  209. a.Unlock()
  210. }
  211. return nil
  212. }
  213. // Check subnets size. In case configured subnet is v6 and host size is
  214. // greater than 32 bits, adjust subnet to /96.
  215. func adjustAndCheckSubnetSize(subnet *net.IPNet) (*net.IPNet, error) {
  216. ones, bits := subnet.Mask.Size()
  217. if v6 == getAddressVersion(subnet.IP) {
  218. if ones < minNetSizeV6 {
  219. return nil, ErrInvalidSubnet
  220. }
  221. if ones < minNetSizeV6Eff {
  222. newMask := net.CIDRMask(minNetSizeV6Eff, bits)
  223. return &net.IPNet{IP: subnet.IP, Mask: newMask}, nil
  224. }
  225. } else {
  226. if ones < minNetSize {
  227. return nil, ErrInvalidSubnet
  228. }
  229. }
  230. return subnet, nil
  231. }
  232. // Checks whether the passed subnet is a superset or subset of any of the subset in the db
  233. func (a *Allocator) contains(space AddressSpace, subInfo *SubnetInfo) bool {
  234. a.Lock()
  235. defer a.Unlock()
  236. for k, v := range a.subnets {
  237. if space == k.addressSpace {
  238. if subInfo.Subnet.Contains(v.Subnet.IP) ||
  239. v.Subnet.Contains(subInfo.Subnet.IP) {
  240. return true
  241. }
  242. }
  243. }
  244. return false
  245. }
  246. // Splits the passed subnet into N internal subnets with host size equal to internalHostSize.
  247. // If the subnet's host size is equal to or smaller than internalHostSize, there won't be any
  248. // split and the return list will contain only the passed subnet.
  249. func getInternalSubnets(inSubnet *net.IPNet, internalHostSize int) ([]*net.IPNet, error) {
  250. var subnetList []*net.IPNet
  251. // Sanity check and size adjustment for v6
  252. subnet, err := adjustAndCheckSubnetSize(inSubnet)
  253. if err != nil {
  254. return subnetList, err
  255. }
  256. // Get network/host subnet information
  257. netBits, bits := subnet.Mask.Size()
  258. hostBits := bits - netBits
  259. extraBits := hostBits - internalHostSize
  260. if extraBits <= 0 {
  261. subnetList = make([]*net.IPNet, 1)
  262. subnetList[0] = subnet
  263. } else {
  264. // Split in smaller internal subnets
  265. numIntSubs := 1 << uint(extraBits)
  266. subnetList = make([]*net.IPNet, numIntSubs)
  267. // Construct one copy of the internal subnets's mask
  268. intNetBits := bits - internalHostSize
  269. intMask := net.CIDRMask(intNetBits, bits)
  270. // Construct the prefix portion for each internal subnet
  271. for i := 0; i < numIntSubs; i++ {
  272. intIP := make([]byte, len(subnet.IP))
  273. copy(intIP, subnet.IP) // IPv6 is too big, just work on the extra portion
  274. addIntToIP(intIP, uint32(i<<uint(internalHostSize)))
  275. subnetList[i] = &net.IPNet{IP: intIP, Mask: intMask}
  276. }
  277. }
  278. return subnetList, nil
  279. }
  280. // RemoveSubnet removes the subnet from the specified address space
  281. func (a *Allocator) RemoveSubnet(addrSpace AddressSpace, subnet *net.IPNet) error {
  282. if addrSpace == "" {
  283. return ErrInvalidAddressSpace
  284. }
  285. if subnet == nil {
  286. return ErrInvalidSubnet
  287. }
  288. retry:
  289. // Look for the respective subnet configuration data
  290. // Remove it along with the internal subnets
  291. subKey := subnetKey{addrSpace, subnet.String(), ""}
  292. a.Lock()
  293. current, ok := a.subnets[subKey]
  294. a.Unlock()
  295. if !ok {
  296. return ErrSubnetNotFound
  297. }
  298. // Remove config and sync to datastore
  299. a.Lock()
  300. delete(a.subnets, subKey)
  301. a.Unlock()
  302. err := a.writeToStore()
  303. if err != nil {
  304. if _, ok := err.(types.RetryError); !ok {
  305. return types.InternalErrorf("subnet removal failed because of %s", err.Error())
  306. }
  307. // Update to latest
  308. if erru := a.readFromStore(); erru != nil {
  309. // Restore and bail out
  310. a.Lock()
  311. a.subnets[subKey] = current
  312. a.Unlock()
  313. return fmt.Errorf("failed to get updated subnets config from datastore (%v) after (%v)", erru, err)
  314. }
  315. goto retry
  316. }
  317. // Get the list of smaller internal subnets
  318. subnetList, err := getInternalSubnets(subnet, a.internalHostSize)
  319. if err != nil {
  320. return err
  321. }
  322. for _, s := range subnetList {
  323. sk := subnetKey{addrSpace, subKey.subnet, s.String()}
  324. a.Lock()
  325. if bm, ok := a.addresses[sk]; ok {
  326. bm.Destroy()
  327. }
  328. delete(a.addresses, sk)
  329. a.Unlock()
  330. }
  331. return nil
  332. }
  333. // AddVendorInfo adds vendor specific data
  334. func (a *Allocator) AddVendorInfo([]byte) error {
  335. // no op for us
  336. return nil
  337. }
  338. /****************
  339. * IPAM Contract
  340. ****************/
  341. // Request allows requesting an IPv4 address from the specified address space
  342. func (a *Allocator) Request(addrSpace AddressSpace, req *AddressRequest) (*AddressResponse, error) {
  343. return a.request(addrSpace, req, v4)
  344. }
  345. // RequestV6 requesting an IPv6 address from the specified address space
  346. func (a *Allocator) RequestV6(addrSpace AddressSpace, req *AddressRequest) (*AddressResponse, error) {
  347. return a.request(addrSpace, req, v6)
  348. }
  349. func (a *Allocator) request(addrSpace AddressSpace, req *AddressRequest, version ipVersion) (*AddressResponse, error) {
  350. // Empty response
  351. response := &AddressResponse{}
  352. // Sanity check
  353. if addrSpace == "" {
  354. return response, ErrInvalidAddressSpace
  355. }
  356. // Validate request
  357. if err := req.Validate(); err != nil {
  358. return response, err
  359. }
  360. // Check ip version congruence
  361. if &req.Subnet != nil && version != getAddressVersion(req.Subnet.IP) {
  362. return response, ErrInvalidRequest
  363. }
  364. // Look for an address
  365. ip, _, err := a.reserveAddress(addrSpace, &req.Subnet, req.Address, version)
  366. if err == nil {
  367. // Populate response
  368. response.Address = ip
  369. a.Lock()
  370. response.Subnet = *a.subnets[subnetKey{addrSpace, req.Subnet.String(), ""}]
  371. a.Unlock()
  372. }
  373. return response, err
  374. }
  375. // Release allows releasing the address from the specified address space
  376. func (a *Allocator) Release(addrSpace AddressSpace, address net.IP) {
  377. var (
  378. space *bitseq.Handle
  379. sub *net.IPNet
  380. )
  381. if address == nil {
  382. log.Debugf("Requested to remove nil address from address space %s", addrSpace)
  383. return
  384. }
  385. ver := getAddressVersion(address)
  386. if ver == v4 {
  387. address = address.To4()
  388. }
  389. // Find the subnet containing the address
  390. for _, subKey := range a.getSubnetList(addrSpace, ver) {
  391. sub = subKey.canonicalChildSubnet()
  392. if sub.Contains(address) {
  393. a.Lock()
  394. space = a.addresses[subKey]
  395. a.Unlock()
  396. break
  397. }
  398. }
  399. if space == nil {
  400. log.Debugf("Could not find subnet on address space %s containing %s on release", addrSpace, address.String())
  401. return
  402. }
  403. // Retrieve correspondent ordinal in the subnet
  404. hostPart, err := types.GetHostPartIP(address, sub.Mask)
  405. if err != nil {
  406. log.Warnf("Failed to release address %s on address space %s because of internal error: %v", address.String(), addrSpace, err)
  407. return
  408. }
  409. ordinal := ipToUint32(hostPart)
  410. // Release it
  411. if err := space.Unset(ordinal); err != nil {
  412. log.Warnf("Failed to release address %s on address space %s because of internal error: %v", address.String(), addrSpace, err)
  413. }
  414. }
  415. func (a *Allocator) reserveAddress(addrSpace AddressSpace, subnet *net.IPNet, prefAddress net.IP, ver ipVersion) (net.IP, *net.IPNet, error) {
  416. var keyList []subnetKey
  417. // Get the list of pointers to the internal subnets
  418. if subnet != nil {
  419. // Get the list of smaller internal subnets
  420. subnetList, err := getInternalSubnets(subnet, a.internalHostSize)
  421. if err != nil {
  422. return nil, nil, err
  423. }
  424. for _, s := range subnetList {
  425. keyList = append(keyList, subnetKey{addrSpace, subnet.String(), s.String()})
  426. }
  427. } else {
  428. a.Lock()
  429. keyList = a.getSubnetList(addrSpace, ver)
  430. a.Unlock()
  431. }
  432. if len(keyList) == 0 {
  433. return nil, nil, ErrNoAvailableSubnet
  434. }
  435. for _, key := range keyList {
  436. a.Lock()
  437. bitmask, ok := a.addresses[key]
  438. a.Unlock()
  439. if !ok {
  440. log.Warnf("Did not find a bitmask for subnet key: %s", key.String())
  441. continue
  442. }
  443. address, err := a.getAddress(key.canonicalChildSubnet(), bitmask, prefAddress, ver)
  444. if err == nil {
  445. return address, subnet, nil
  446. }
  447. }
  448. return nil, nil, ErrNoAvailableIPs
  449. }
  450. // Get the list of available internal subnets for the specified address space and the desired ip version
  451. func (a *Allocator) getSubnetList(addrSpace AddressSpace, ver ipVersion) []subnetKey {
  452. var list [1024]subnetKey
  453. ind := 0
  454. a.Lock()
  455. for subKey := range a.addresses {
  456. s := subKey.canonicalSubnet()
  457. subVer := getAddressVersion(s.IP)
  458. if subKey.addressSpace == addrSpace && subVer == ver {
  459. list[ind] = subKey
  460. ind++
  461. }
  462. }
  463. a.Unlock()
  464. return list[0:ind]
  465. }
  466. func (a *Allocator) getAddress(subnet *net.IPNet, bitmask *bitseq.Handle, prefAddress net.IP, ver ipVersion) (net.IP, error) {
  467. var (
  468. ordinal uint32
  469. err error
  470. )
  471. if bitmask.Unselected() <= 0 {
  472. return nil, ErrNoAvailableIPs
  473. }
  474. if prefAddress == nil {
  475. ordinal, err = bitmask.SetAny()
  476. } else {
  477. hostPart, e := types.GetHostPartIP(prefAddress, subnet.Mask)
  478. if e != nil {
  479. return nil, fmt.Errorf("failed to allocate preferred address %s: %v", prefAddress.String(), e)
  480. }
  481. ordinal = ipToUint32(types.GetMinimalIP(hostPart))
  482. err = bitmask.Set(ordinal)
  483. }
  484. if err != nil {
  485. return nil, ErrNoAvailableIPs
  486. }
  487. // Convert IP ordinal for this subnet into IP address
  488. return generateAddress(ordinal, subnet), nil
  489. }
  490. // DumpDatabase dumps the internal info
  491. func (a *Allocator) DumpDatabase() {
  492. a.Lock()
  493. defer a.Unlock()
  494. for k, config := range a.subnets {
  495. fmt.Printf("\n\n%s:", config.Subnet.String())
  496. subnetList, _ := getInternalSubnets(config.Subnet, a.internalHostSize)
  497. for _, s := range subnetList {
  498. internKey := subnetKey{k.addressSpace, config.Subnet.String(), s.String()}
  499. bm := a.addresses[internKey]
  500. fmt.Printf("\n\t%s: %s\n\t%d", internKey.childSubnet, bm, bm.Unselected())
  501. }
  502. }
  503. }
  504. func (a *Allocator) getStore() datastore.DataStore {
  505. a.Lock()
  506. defer a.Unlock()
  507. return a.store
  508. }
  509. // It generates the ip address in the passed subnet specified by
  510. // the passed host address ordinal
  511. func generateAddress(ordinal uint32, network *net.IPNet) net.IP {
  512. var address [16]byte
  513. // Get network portion of IP
  514. if getAddressVersion(network.IP) == v4 {
  515. copy(address[:], network.IP.To4())
  516. } else {
  517. copy(address[:], network.IP)
  518. }
  519. end := len(network.Mask)
  520. addIntToIP(address[:end], ordinal)
  521. return net.IP(address[:end])
  522. }
  523. func getAddressVersion(ip net.IP) ipVersion {
  524. if ip.To4() == nil {
  525. return v6
  526. }
  527. return v4
  528. }
  529. // Adds the ordinal IP to the current array
  530. // 192.168.0.0 + 53 => 192.168.53
  531. func addIntToIP(array []byte, ordinal uint32) {
  532. for i := len(array) - 1; i >= 0; i-- {
  533. array[i] |= (byte)(ordinal & 0xff)
  534. ordinal >>= 8
  535. }
  536. }
  537. // Convert an ordinal to the respective IP address
  538. func ipToUint32(ip []byte) uint32 {
  539. value := uint32(0)
  540. for i := 0; i < len(ip); i++ {
  541. j := len(ip) - 1 - i
  542. value += uint32(ip[i]) << uint(j*8)
  543. }
  544. return value
  545. }