allocator.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634
  1. package ipam
  2. import (
  3. "fmt"
  4. "net"
  5. "strings"
  6. "sync"
  7. log "github.com/Sirupsen/logrus"
  8. "github.com/docker/libkv/store"
  9. "github.com/docker/libnetwork/bitseq"
  10. "github.com/docker/libnetwork/datastore"
  11. "github.com/docker/libnetwork/types"
  12. )
  13. const (
  14. // The biggest configurable host subnets
  15. minNetSize = 8
  16. minNetSizeV6 = 64
  17. // The effective network size for v6
  18. minNetSizeV6Eff = 96
  19. // The size of the host subnet used internally, it's the most granular sequence addresses
  20. defaultInternalHostSize = 16
  21. // datastore keyes for ipam obkects
  22. dsConfigKey = "ipam-config" // ipam-config/<domain>/<map of subent configs>
  23. dsDataKey = "ipam-data" // ipam-data/<domain>/<subnet>/<child-sudbnet>/<bitmask>
  24. )
  25. // Allocator provides per address space ipv4/ipv6 book keeping
  26. type Allocator struct {
  27. // The internal subnets host size
  28. internalHostSize int
  29. // Static subnet information
  30. subnets map[subnetKey]*SubnetInfo
  31. // Allocated addresses in each address space's internal subnet
  32. addresses map[subnetKey]*bitseq.Handle
  33. // Datastore
  34. store datastore.DataStore
  35. App string
  36. ID string
  37. dbIndex uint64
  38. dbExists bool
  39. sync.Mutex
  40. }
  41. // NewAllocator returns an instance of libnetwork ipam
  42. func NewAllocator(ds datastore.DataStore) (*Allocator, error) {
  43. a := &Allocator{}
  44. a.subnets = make(map[subnetKey]*SubnetInfo)
  45. a.addresses = make(map[subnetKey]*bitseq.Handle)
  46. a.internalHostSize = defaultInternalHostSize
  47. a.store = ds
  48. a.App = "ipam"
  49. a.ID = dsConfigKey
  50. if a.store == nil {
  51. return a, nil
  52. }
  53. // Register for status changes
  54. a.watchForChanges()
  55. // Get the initial subnet configs status from the ds if present.
  56. kvPair, err := a.store.KVStore().Get(datastore.Key(a.Key()...))
  57. if err != nil {
  58. if err != store.ErrKeyNotFound {
  59. return nil, fmt.Errorf("failed to retrieve the ipam subnet configs from datastore: %v", err)
  60. }
  61. return a, nil
  62. }
  63. a.subnetConfigFromStore(kvPair)
  64. // Now retrieve the list of small subnets
  65. var inserterList []func() error
  66. a.Lock()
  67. for k, v := range a.subnets {
  68. inserterList = append(inserterList,
  69. func() error {
  70. subnetList, err := getInternalSubnets(v.Subnet, a.internalHostSize)
  71. if err != nil {
  72. return fmt.Errorf("failed to load address bitmask for configured subnet %s because of %s", v.Subnet.String(), err.Error())
  73. }
  74. a.insertAddressMasks(k, subnetList)
  75. return nil
  76. })
  77. }
  78. a.Unlock()
  79. // Add the bitmasks, data could come from datastore
  80. for _, f := range inserterList {
  81. if err := f(); err != nil {
  82. return nil, err
  83. }
  84. }
  85. return a, nil
  86. }
  87. func (a *Allocator) subnetConfigFromStore(kvPair *store.KVPair) {
  88. a.Lock()
  89. if a.dbIndex < kvPair.LastIndex {
  90. a.subnets = byteArrayToSubnets(kvPair.Value)
  91. a.dbIndex = kvPair.LastIndex
  92. a.dbExists = true
  93. }
  94. a.Unlock()
  95. }
  96. // Pointer to the configured subnets in each address space
  97. type subnetKey struct {
  98. addressSpace AddressSpace
  99. subnet string
  100. childSubnet string
  101. }
  102. func (s *subnetKey) String() string {
  103. k := fmt.Sprintf("%s/%s", s.addressSpace, s.subnet)
  104. if s.childSubnet != "" {
  105. k = fmt.Sprintf("%s/%s", k, s.childSubnet)
  106. }
  107. return k
  108. }
  109. func (s *subnetKey) FromString(str string) error {
  110. if str == "" || !strings.Contains(str, "/") {
  111. return fmt.Errorf("invalid string form for subnetkey: %s", str)
  112. }
  113. p := strings.Split(str, "/")
  114. if len(p) != 3 && len(p) != 5 {
  115. return fmt.Errorf("invalid string form for subnetkey: %s", str)
  116. }
  117. s.addressSpace = AddressSpace(p[0])
  118. s.subnet = fmt.Sprintf("%s/%s", p[1], p[2])
  119. if len(p) == 5 {
  120. s.childSubnet = fmt.Sprintf("%s/%s", p[1], p[2])
  121. }
  122. return nil
  123. }
  124. func (s *subnetKey) canonicalSubnet() *net.IPNet {
  125. if _, sub, err := net.ParseCIDR(s.subnet); err == nil {
  126. return sub
  127. }
  128. return nil
  129. }
  130. func (s *subnetKey) canonicalChildSubnet() *net.IPNet {
  131. if _, sub, err := net.ParseCIDR(s.childSubnet); err == nil {
  132. return sub
  133. }
  134. return nil
  135. }
  136. type ipVersion int
  137. const (
  138. v4 = 4
  139. v6 = 6
  140. )
  141. /*******************
  142. * IPAMConf Contract
  143. ********************/
  144. // AddSubnet adds a subnet for the specified address space
  145. func (a *Allocator) AddSubnet(addrSpace AddressSpace, subnetInfo *SubnetInfo) error {
  146. // Sanity check
  147. if addrSpace == "" {
  148. return ErrInvalidAddressSpace
  149. }
  150. if subnetInfo == nil || subnetInfo.Subnet == nil {
  151. return ErrInvalidSubnet
  152. }
  153. // Convert to smaller internal subnets (if needed)
  154. subnetList, err := getInternalSubnets(subnetInfo.Subnet, a.internalHostSize)
  155. if err != nil {
  156. return err
  157. }
  158. retry:
  159. if a.contains(addrSpace, subnetInfo) {
  160. return ErrOverlapSubnet
  161. }
  162. // Store the configured subnet and sync to datatstore
  163. key := subnetKey{addrSpace, subnetInfo.Subnet.String(), ""}
  164. a.Lock()
  165. a.subnets[key] = subnetInfo
  166. a.Unlock()
  167. err = a.writeToStore()
  168. if err != nil {
  169. if _, ok := err.(types.RetryError); !ok {
  170. return types.InternalErrorf("subnet configuration failed because of %s", err.Error())
  171. }
  172. // Update to latest
  173. if erru := a.readFromStore(); erru != nil {
  174. // Restore and bail out
  175. a.Lock()
  176. delete(a.addresses, key)
  177. a.Unlock()
  178. return fmt.Errorf("failed to get updated subnets config from datastore (%v) after (%v)", erru, err)
  179. }
  180. goto retry
  181. }
  182. // Insert respective bitmasks for this subnet
  183. a.insertAddressMasks(key, subnetList)
  184. return nil
  185. }
  186. // Create and insert the internal subnet(s) addresses masks into the address database. Mask data may come from the bitseq datastore.
  187. func (a *Allocator) insertAddressMasks(parentKey subnetKey, internalSubnetList []*net.IPNet) error {
  188. for _, intSub := range internalSubnetList {
  189. var err error
  190. ones, bits := intSub.Mask.Size()
  191. numAddresses := 1 << uint(bits-ones)
  192. smallKey := subnetKey{parentKey.addressSpace, parentKey.subnet, intSub.String()}
  193. // Insert the new address masks. AddressMask content may come from datastore
  194. a.Lock()
  195. a.addresses[smallKey], err = bitseq.NewHandle(dsDataKey, a.store, smallKey.String(), uint32(numAddresses))
  196. a.Unlock()
  197. if err != nil {
  198. return err
  199. }
  200. }
  201. return nil
  202. }
  203. // Check subnets size. In case configured subnet is v6 and host size is
  204. // greater than 32 bits, adjust subnet to /96.
  205. func adjustAndCheckSubnetSize(subnet *net.IPNet) (*net.IPNet, error) {
  206. ones, bits := subnet.Mask.Size()
  207. if v6 == getAddressVersion(subnet.IP) {
  208. if ones < minNetSizeV6 {
  209. return nil, ErrInvalidSubnet
  210. }
  211. if ones < minNetSizeV6Eff {
  212. newMask := net.CIDRMask(minNetSizeV6Eff, bits)
  213. return &net.IPNet{IP: subnet.IP, Mask: newMask}, nil
  214. }
  215. } else {
  216. if ones < minNetSize {
  217. return nil, ErrInvalidSubnet
  218. }
  219. }
  220. return subnet, nil
  221. }
  222. // Checks whether the passed subnet is a superset or subset of any of the subset in the db
  223. func (a *Allocator) contains(space AddressSpace, subInfo *SubnetInfo) bool {
  224. a.Lock()
  225. defer a.Unlock()
  226. for k, v := range a.subnets {
  227. if space == k.addressSpace {
  228. if subInfo.Subnet.Contains(v.Subnet.IP) ||
  229. v.Subnet.Contains(subInfo.Subnet.IP) {
  230. return true
  231. }
  232. }
  233. }
  234. return false
  235. }
  236. // Splits the passed subnet into N internal subnets with host size equal to internalHostSize.
  237. // If the subnet's host size is equal to or smaller than internalHostSize, there won't be any
  238. // split and the return list will contain only the passed subnet.
  239. func getInternalSubnets(inSubnet *net.IPNet, internalHostSize int) ([]*net.IPNet, error) {
  240. var subnetList []*net.IPNet
  241. // Sanity check and size adjustment for v6
  242. subnet, err := adjustAndCheckSubnetSize(inSubnet)
  243. if err != nil {
  244. return subnetList, err
  245. }
  246. // Get network/host subnet information
  247. netBits, bits := subnet.Mask.Size()
  248. hostBits := bits - netBits
  249. extraBits := hostBits - internalHostSize
  250. if extraBits <= 0 {
  251. subnetList = make([]*net.IPNet, 1)
  252. subnetList[0] = subnet
  253. } else {
  254. // Split in smaller internal subnets
  255. numIntSubs := 1 << uint(extraBits)
  256. subnetList = make([]*net.IPNet, numIntSubs)
  257. // Construct one copy of the internal subnets's mask
  258. intNetBits := bits - internalHostSize
  259. intMask := net.CIDRMask(intNetBits, bits)
  260. // Construct the prefix portion for each internal subnet
  261. for i := 0; i < numIntSubs; i++ {
  262. intIP := make([]byte, len(subnet.IP))
  263. copy(intIP, subnet.IP) // IPv6 is too big, just work on the extra portion
  264. addIntToIP(intIP, i<<uint(internalHostSize))
  265. subnetList[i] = &net.IPNet{IP: intIP, Mask: intMask}
  266. }
  267. }
  268. return subnetList, nil
  269. }
  270. // RemoveSubnet removes the subnet from the specified address space
  271. func (a *Allocator) RemoveSubnet(addrSpace AddressSpace, subnet *net.IPNet) error {
  272. if addrSpace == "" {
  273. return ErrInvalidAddressSpace
  274. }
  275. if subnet == nil {
  276. return ErrInvalidSubnet
  277. }
  278. retry:
  279. // Look for the respective subnet configuration data
  280. // Remove it along with the internal subnets
  281. subKey := subnetKey{addrSpace, subnet.String(), ""}
  282. a.Lock()
  283. current, ok := a.subnets[subKey]
  284. a.Unlock()
  285. if !ok {
  286. return ErrSubnetNotFound
  287. }
  288. // Remove config and sync to datastore
  289. a.Lock()
  290. delete(a.subnets, subKey)
  291. a.Unlock()
  292. err := a.writeToStore()
  293. if err != nil {
  294. if _, ok := err.(types.RetryError); !ok {
  295. return types.InternalErrorf("subnet removal failed because of %s", err.Error())
  296. }
  297. // Update to latest
  298. if erru := a.readFromStore(); erru != nil {
  299. // Restore and bail out
  300. a.Lock()
  301. a.subnets[subKey] = current
  302. a.Unlock()
  303. return fmt.Errorf("failed to get updated subnets config from datastore (%v) after (%v)", erru, err)
  304. }
  305. goto retry
  306. }
  307. // Get the list of smaller internal subnets
  308. subnetList, err := getInternalSubnets(subnet, a.internalHostSize)
  309. if err != nil {
  310. return err
  311. }
  312. for _, s := range subnetList {
  313. sk := subnetKey{addrSpace, subKey.subnet, s.String()}
  314. a.Lock()
  315. if bm, ok := a.addresses[sk]; ok {
  316. bm.Destroy()
  317. }
  318. delete(a.addresses, sk)
  319. a.Unlock()
  320. }
  321. return nil
  322. }
  323. // AddVendorInfo adds vendor specific data
  324. func (a *Allocator) AddVendorInfo([]byte) error {
  325. // no op for us
  326. return nil
  327. }
  328. /****************
  329. * IPAM Contract
  330. ****************/
  331. // Request allows requesting an IPv4 address from the specified address space
  332. func (a *Allocator) Request(addrSpace AddressSpace, req *AddressRequest) (*AddressResponse, error) {
  333. return a.request(addrSpace, req, v4)
  334. }
  335. // RequestV6 requesting an IPv6 address from the specified address space
  336. func (a *Allocator) RequestV6(addrSpace AddressSpace, req *AddressRequest) (*AddressResponse, error) {
  337. return a.request(addrSpace, req, v6)
  338. }
  339. func (a *Allocator) request(addrSpace AddressSpace, req *AddressRequest, version ipVersion) (*AddressResponse, error) {
  340. // Empty response
  341. response := &AddressResponse{}
  342. // Sanity check
  343. if addrSpace == "" {
  344. return response, ErrInvalidAddressSpace
  345. }
  346. // Validate request
  347. if err := req.Validate(); err != nil {
  348. return response, err
  349. }
  350. // Check ip version congruence
  351. if &req.Subnet != nil && version != getAddressVersion(req.Subnet.IP) {
  352. return response, ErrInvalidRequest
  353. }
  354. // Look for an address
  355. ip, _, err := a.reserveAddress(addrSpace, &req.Subnet, req.Address, version)
  356. if err == nil {
  357. // Populate response
  358. response.Address = ip
  359. a.Lock()
  360. response.Subnet = *a.subnets[subnetKey{addrSpace, req.Subnet.String(), ""}]
  361. a.Unlock()
  362. }
  363. return response, err
  364. }
  365. // Release allows releasing the address from the specified address space
  366. func (a *Allocator) Release(addrSpace AddressSpace, address net.IP) {
  367. if address == nil {
  368. return
  369. }
  370. ver := getAddressVersion(address)
  371. if ver == v4 {
  372. address = address.To4()
  373. }
  374. for _, subKey := range a.getSubnetList(addrSpace, ver) {
  375. a.Lock()
  376. space := a.addresses[subKey]
  377. a.Unlock()
  378. sub := subKey.canonicalChildSubnet()
  379. if sub.Contains(address) {
  380. // Retrieve correspondent ordinal in the subnet
  381. ordinal := ipToInt(getHostPortionIP(address, sub))
  382. // Release it
  383. for {
  384. var err error
  385. if err = space.PushReservation(ordinal/8, ordinal%8, true); err == nil {
  386. break
  387. }
  388. if _, ok := err.(types.RetryError); ok {
  389. // bitmask must have changed, retry delete
  390. continue
  391. }
  392. log.Warnf("Failed to release address %s because of internal error: %s", address.String(), err.Error())
  393. return
  394. }
  395. return
  396. }
  397. }
  398. }
  399. func (a *Allocator) reserveAddress(addrSpace AddressSpace, subnet *net.IPNet, prefAddress net.IP, ver ipVersion) (net.IP, *net.IPNet, error) {
  400. var keyList []subnetKey
  401. // Get the list of pointers to the internal subnets
  402. if subnet != nil {
  403. // Get the list of smaller internal subnets
  404. subnetList, err := getInternalSubnets(subnet, a.internalHostSize)
  405. if err != nil {
  406. return nil, nil, err
  407. }
  408. for _, s := range subnetList {
  409. keyList = append(keyList, subnetKey{addrSpace, subnet.String(), s.String()})
  410. }
  411. } else {
  412. a.Lock()
  413. keyList = a.getSubnetList(addrSpace, ver)
  414. a.Unlock()
  415. }
  416. if len(keyList) == 0 {
  417. return nil, nil, ErrNoAvailableSubnet
  418. }
  419. for _, key := range keyList {
  420. a.Lock()
  421. bitmask, ok := a.addresses[key]
  422. a.Unlock()
  423. if !ok {
  424. fmt.Printf("\nDid not find a bitmask for subnet key: %s", key.String())
  425. continue
  426. }
  427. address, err := a.getAddress(key.canonicalChildSubnet(), bitmask, prefAddress, ver)
  428. if err == nil {
  429. return address, subnet, nil
  430. }
  431. }
  432. return nil, nil, ErrNoAvailableIPs
  433. }
  434. // Get the list of available internal subnets for the specified address space and the desired ip version
  435. func (a *Allocator) getSubnetList(addrSpace AddressSpace, ver ipVersion) []subnetKey {
  436. var list [1024]subnetKey
  437. ind := 0
  438. a.Lock()
  439. for subKey := range a.addresses {
  440. s := subKey.canonicalSubnet()
  441. subVer := getAddressVersion(s.IP)
  442. if subKey.addressSpace == addrSpace && subVer == ver {
  443. list[ind] = subKey
  444. ind++
  445. }
  446. }
  447. a.Unlock()
  448. return list[0:ind]
  449. }
  450. func (a *Allocator) getAddress(subnet *net.IPNet, bitmask *bitseq.Handle, prefAddress net.IP, ver ipVersion) (net.IP, error) {
  451. var (
  452. bytePos, bitPos int
  453. ordinal int
  454. err error
  455. )
  456. // Look for free IP, skip .0 and .255, they will be automatically reserved
  457. for {
  458. if bitmask.Unselected() <= 0 {
  459. return nil, ErrNoAvailableIPs
  460. }
  461. if prefAddress == nil {
  462. bytePos, bitPos, err = bitmask.GetFirstAvailable()
  463. } else {
  464. ordinal = ipToInt(getHostPortionIP(prefAddress, subnet))
  465. bytePos, bitPos, err = bitmask.CheckIfAvailable(ordinal)
  466. }
  467. if err != nil {
  468. return nil, ErrNoAvailableIPs
  469. }
  470. // Lock it
  471. if err = bitmask.PushReservation(bytePos, bitPos, false); err != nil {
  472. if _, ok := err.(types.RetryError); !ok {
  473. return nil, fmt.Errorf("internal failure while reserving the address: %s", err.Error())
  474. }
  475. continue
  476. }
  477. // Build IP ordinal
  478. ordinal = bitPos + bytePos*8
  479. // For v4, let reservation of .0 and .255 happen automatically
  480. if ver == v4 && !isValidIP(ordinal) {
  481. continue
  482. }
  483. break
  484. }
  485. // Convert IP ordinal for this subnet into IP address
  486. return generateAddress(ordinal, subnet), nil
  487. }
  488. // DumpDatabase dumps the internal info
  489. func (a *Allocator) DumpDatabase() {
  490. a.Lock()
  491. defer a.Unlock()
  492. for k, config := range a.subnets {
  493. fmt.Printf("\n\n%s:", config.Subnet.String())
  494. subnetList, _ := getInternalSubnets(config.Subnet, a.internalHostSize)
  495. for _, s := range subnetList {
  496. internKey := subnetKey{k.addressSpace, config.Subnet.String(), s.String()}
  497. bm := a.addresses[internKey]
  498. fmt.Printf("\n\t%s: %s\n\t%d", internKey.childSubnet, bm, bm.Unselected())
  499. }
  500. }
  501. }
  502. // It generates the ip address in the passed subnet specified by
  503. // the passed host address ordinal
  504. func generateAddress(ordinal int, network *net.IPNet) net.IP {
  505. var address [16]byte
  506. // Get network portion of IP
  507. if network.IP.To4() != nil {
  508. copy(address[:], network.IP.To4())
  509. } else {
  510. copy(address[:], network.IP)
  511. }
  512. end := len(network.Mask)
  513. addIntToIP(address[:end], ordinal)
  514. return net.IP(address[:end])
  515. }
  516. func getAddressVersion(ip net.IP) ipVersion {
  517. if ip.To4() == nil {
  518. return v6
  519. }
  520. return v4
  521. }
  522. // .0 and .255 will return false
  523. func isValidIP(i int) bool {
  524. lastByte := i & 0xff
  525. return lastByte != 0xff && lastByte != 0
  526. }
  527. // Adds the ordinal IP to the current array
  528. // 192.168.0.0 + 53 => 192.168.53
  529. func addIntToIP(array []byte, ordinal int) {
  530. for i := len(array) - 1; i >= 0; i-- {
  531. array[i] |= (byte)(ordinal & 0xff)
  532. ordinal >>= 8
  533. }
  534. }
  535. // Convert an ordinal to the respective IP address
  536. func ipToInt(ip []byte) int {
  537. value := 0
  538. for i := 0; i < len(ip); i++ {
  539. j := len(ip) - 1 - i
  540. value += int(ip[i]) << uint(j*8)
  541. }
  542. return value
  543. }
  544. // Given an address and subnet, returns the host portion address
  545. func getHostPortionIP(address net.IP, subnet *net.IPNet) net.IP {
  546. hostPortion := make([]byte, len(address))
  547. for i := 0; i < len(subnet.Mask); i++ {
  548. hostPortion[i] = address[i] &^ subnet.Mask[i]
  549. }
  550. return hostPortion
  551. }
  552. func printLine(head *bitseq.Sequence) {
  553. fmt.Println()
  554. for head != nil {
  555. fmt.Printf("-")
  556. head = head.Next
  557. }
  558. }