Make IP allocator lazy

Instead of allocating all possible IPs in advance, generate them as
needed.

A loop will cycle through all possible IPs in sequential order,
allocating them as needed and marking them as in use. Once the loop
exhausts all IPs, it will wrap back to the beginning. IPs that are
already in use will be skipped. When an IP is released, it will be
cleared and be available for allocation again.

Two decisions went into this design:

1) Minimize memory footprint by only allocating IPs that are actually
in use

2) Minimize reuse of released IP addresses to avoid sending traffic to
the wrong containers

As a side effect, the functions for IP/Mask<->int conversion have been
rewritten to never be able to fail in order to reduce the amount of
error returns.

Fixes gh-231
This commit is contained in:
Dominik Honnef 2013-03-30 23:32:10 +01:00
parent d949e2804a
commit 6f9a67a7c7
4 changed files with 203 additions and 116 deletions

View file

@ -7,6 +7,7 @@ Caleb Spare <cespare@gmail.com>
Charles Hooper <charles.hooper@dotcloud.com> Charles Hooper <charles.hooper@dotcloud.com>
Daniel Mizyrycki <daniel.mizyrycki@dotcloud.com> Daniel Mizyrycki <daniel.mizyrycki@dotcloud.com>
Daniel Robinson <gottagetmac@gmail.com> Daniel Robinson <gottagetmac@gmail.com>
Dominik Honnef <dominik@honnef.co>
Don Spaulding <donspauldingii@gmail.com> Don Spaulding <donspauldingii@gmail.com>
ezbercih <cem.ezberci@gmail.com> ezbercih <cem.ezberci@gmail.com>
Frederick F. Kautz IV <fkautz@alumni.cmu.edu> Frederick F. Kautz IV <fkautz@alumni.cmu.edu>

View file

@ -363,11 +363,10 @@ func (container *Container) allocateNetwork() error {
return nil return nil
} }
func (container *Container) releaseNetwork() error { func (container *Container) releaseNetwork() {
err := container.network.Release() container.network.Release()
container.network = nil container.network = nil
container.NetworkSettings = &NetworkSettings{} container.NetworkSettings = &NetworkSettings{}
return err
} }
func (container *Container) monitor() { func (container *Container) monitor() {
@ -382,9 +381,7 @@ func (container *Container) monitor() {
exitCode := container.cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus() exitCode := container.cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
// Cleanup // Cleanup
if err := container.releaseNetwork(); err != nil { container.releaseNetwork()
log.Printf("%v: Failed to release network: %v", container.Id, err)
}
if container.Config.OpenStdin { if container.Config.OpenStdin {
if err := container.stdin.Close(); err != nil { if err := container.stdin.Close(); err != nil {
Debugf("%s: Error close stdin: %s", container.Id, err) Debugf("%s: Error close stdin: %s", container.Id, err)

View file

@ -1,7 +1,6 @@
package docker package docker
import ( import (
"bytes"
"encoding/binary" "encoding/binary"
"errors" "errors"
"fmt" "fmt"
@ -30,40 +29,25 @@ func networkRange(network *net.IPNet) (net.IP, net.IP) {
} }
// Converts a 4 bytes IP into a 32 bit integer // Converts a 4 bytes IP into a 32 bit integer
func ipToInt(ip net.IP) (int32, error) { func ipToInt(ip net.IP) int32 {
buf := bytes.NewBuffer(ip.To4()) return int32(binary.BigEndian.Uint32(ip.To4()))
var n int32
if err := binary.Read(buf, binary.BigEndian, &n); err != nil {
return 0, err
}
return n, nil
} }
// Converts 32 bit integer into a 4 bytes IP address // Converts 32 bit integer into a 4 bytes IP address
func intToIp(n int32) (net.IP, error) { func intToIp(n int32) net.IP {
var buf bytes.Buffer b := make([]byte, 4)
if err := binary.Write(&buf, binary.BigEndian, &n); err != nil { binary.BigEndian.PutUint32(b, uint32(n))
return net.IP{}, err return net.IP(b)
}
ip := net.IPv4(0, 0, 0, 0).To4()
for i := 0; i < net.IPv4len; i++ {
ip[i] = buf.Bytes()[i]
}
return ip, nil
} }
// Given a netmask, calculates the number of available hosts // Given a netmask, calculates the number of available hosts
func networkSize(mask net.IPMask) (int32, error) { func networkSize(mask net.IPMask) int32 {
m := net.IPv4Mask(0, 0, 0, 0) m := net.IPv4Mask(0, 0, 0, 0)
for i := 0; i < net.IPv4len; i++ { for i := 0; i < net.IPv4len; i++ {
m[i] = ^mask[i] m[i] = ^mask[i]
} }
buf := bytes.NewBuffer(m)
var n int32 return int32(binary.BigEndian.Uint32(m)) + 1
if err := binary.Read(buf, binary.BigEndian, &n); err != nil {
return 0, err
}
return n + 1, nil
} }
// Wrapper around the iptables command // Wrapper around the iptables command
@ -211,66 +195,97 @@ func newPortAllocator(start, end int) (*PortAllocator, error) {
// IP allocator: Atomatically allocate and release networking ports // IP allocator: Atomatically allocate and release networking ports
type IPAllocator struct { type IPAllocator struct {
network *net.IPNet network *net.IPNet
queue chan (net.IP) queueAlloc chan allocatedIP
queueReleased chan net.IP
inUse map[int32]struct{}
} }
func (alloc *IPAllocator) populate() error { type allocatedIP struct {
ip net.IP
err error
}
func (alloc *IPAllocator) run() {
firstIP, _ := networkRange(alloc.network) firstIP, _ := networkRange(alloc.network)
size, err := networkSize(alloc.network.Mask) ipNum := ipToInt(firstIP)
if err != nil { ownIP := ipToInt(alloc.network.IP)
return err size := networkSize(alloc.network.Mask)
pos := int32(1)
max := size - 2 // -1 for the broadcast address, -1 for the gateway address
for {
var (
newNum int32
inUse bool
)
// Find first unused IP, give up after one whole round
for attempt := int32(0); attempt < max; attempt++ {
newNum = ipNum + pos
pos = pos%max + 1
// The network's IP is never okay to use
if newNum == ownIP {
continue
}
if _, inUse = alloc.inUse[newNum]; !inUse {
// We found an unused IP
break
}
}
ip := allocatedIP{ip: intToIp(newNum)}
if inUse {
ip.err = errors.New("No unallocated IP available")
}
select {
case alloc.queueAlloc <- ip:
alloc.inUse[newNum] = struct{}{}
case released := <-alloc.queueReleased:
r := ipToInt(released)
delete(alloc.inUse, r)
if inUse {
// If we couldn't allocate a new IP, the released one
// will be the only free one now, so instantly use it
// next time
pos = r - ipNum
} else {
// Use same IP as last time
if pos == 1 {
pos = max
} else {
pos--
}
}
}
} }
// The queue size should be the network size - 3
// -1 for the network address, -1 for the broadcast address and
// -1 for the gateway address
alloc.queue = make(chan net.IP, size-3)
for i := int32(1); i < size-1; i++ {
ipNum, err := ipToInt(firstIP)
if err != nil {
return err
}
ip, err := intToIp(ipNum + int32(i))
if err != nil {
return err
}
// Discard the network IP (that's the host IP address)
if ip.Equal(alloc.network.IP) {
continue
}
alloc.queue <- ip
}
return nil
} }
func (alloc *IPAllocator) Acquire() (net.IP, error) { func (alloc *IPAllocator) Acquire() (net.IP, error) {
select { ip := <-alloc.queueAlloc
case ip := <-alloc.queue: return ip.ip, ip.err
return ip, nil
default:
return net.IP{}, errors.New("No more IP addresses available")
}
return net.IP{}, nil
} }
func (alloc *IPAllocator) Release(ip net.IP) error { func (alloc *IPAllocator) Release(ip net.IP) {
select { alloc.queueReleased <- ip
case alloc.queue <- ip:
return nil
default:
return errors.New("Too many IP addresses have been released")
}
return nil
} }
func newIPAllocator(network *net.IPNet) (*IPAllocator, error) { func newIPAllocator(network *net.IPNet) *IPAllocator {
alloc := &IPAllocator{ alloc := &IPAllocator{
network: network, network: network,
queueAlloc: make(chan allocatedIP),
queueReleased: make(chan net.IP),
inUse: make(map[int32]struct{}),
} }
if err := alloc.populate(); err != nil {
return nil, err go alloc.run()
}
return alloc, nil return alloc
} }
// Network interface represents the networking stack of a container // Network interface represents the networking stack of a container
@ -297,7 +312,7 @@ func (iface *NetworkInterface) AllocatePort(port int) (int, error) {
} }
// Release: Network cleanup - release all resources // Release: Network cleanup - release all resources
func (iface *NetworkInterface) Release() error { func (iface *NetworkInterface) Release() {
for _, port := range iface.extPorts { for _, port := range iface.extPorts {
if err := iface.manager.portMapper.Unmap(port); err != nil { if err := iface.manager.portMapper.Unmap(port); err != nil {
log.Printf("Unable to unmap port %v: %v", port, err) log.Printf("Unable to unmap port %v: %v", port, err)
@ -307,7 +322,8 @@ func (iface *NetworkInterface) Release() error {
} }
} }
return iface.manager.ipAllocator.Release(iface.IPNet.IP)
iface.manager.ipAllocator.Release(iface.IPNet.IP)
} }
// Network Manager manages a set of network interfaces // Network Manager manages a set of network interfaces
@ -342,10 +358,7 @@ func newNetworkManager(bridgeIface string) (*NetworkManager, error) {
} }
network := addr.(*net.IPNet) network := addr.(*net.IPNet)
ipAllocator, err := newIPAllocator(network) ipAllocator := newIPAllocator(network)
if err != nil {
return nil, err
}
portAllocator, err := newPortAllocator(portRangeStart, portRangeEnd) portAllocator, err := newPortAllocator(portRangeStart, portRangeEnd)
if err != nil { if err != nil {

View file

@ -28,8 +28,8 @@ func TestNetworkRange(t *testing.T) {
if !last.Equal(net.ParseIP("192.168.0.255")) { if !last.Equal(net.ParseIP("192.168.0.255")) {
t.Error(last.String()) t.Error(last.String())
} }
if size, err := networkSize(network.Mask); err != nil || size != 256 { if size := networkSize(network.Mask); size != 256 {
t.Error(size, err) t.Error(size)
} }
// Class A test // Class A test
@ -41,8 +41,8 @@ func TestNetworkRange(t *testing.T) {
if !last.Equal(net.ParseIP("10.255.255.255")) { if !last.Equal(net.ParseIP("10.255.255.255")) {
t.Error(last.String()) t.Error(last.String())
} }
if size, err := networkSize(network.Mask); err != nil || size != 16777216 { if size := networkSize(network.Mask); size != 16777216 {
t.Error(size, err) t.Error(size)
} }
// Class A, random IP address // Class A, random IP address
@ -64,8 +64,8 @@ func TestNetworkRange(t *testing.T) {
if !last.Equal(net.ParseIP("10.1.2.3")) { if !last.Equal(net.ParseIP("10.1.2.3")) {
t.Error(last.String()) t.Error(last.String())
} }
if size, err := networkSize(network.Mask); err != nil || size != 1 { if size := networkSize(network.Mask); size != 1 {
t.Error(size, err) t.Error(size)
} }
// 31bit mask // 31bit mask
@ -77,8 +77,8 @@ func TestNetworkRange(t *testing.T) {
if !last.Equal(net.ParseIP("10.1.2.3")) { if !last.Equal(net.ParseIP("10.1.2.3")) {
t.Error(last.String()) t.Error(last.String())
} }
if size, err := networkSize(network.Mask); err != nil || size != 2 { if size := networkSize(network.Mask); size != 2 {
t.Error(size, err) t.Error(size)
} }
// 26bit mask // 26bit mask
@ -90,54 +90,130 @@ func TestNetworkRange(t *testing.T) {
if !last.Equal(net.ParseIP("10.1.2.63")) { if !last.Equal(net.ParseIP("10.1.2.63")) {
t.Error(last.String()) t.Error(last.String())
} }
if size, err := networkSize(network.Mask); err != nil || size != 64 { if size := networkSize(network.Mask); size != 64 {
t.Error(size, err) t.Error(size)
} }
} }
func TestConversion(t *testing.T) { func TestConversion(t *testing.T) {
ip := net.ParseIP("127.0.0.1") ip := net.ParseIP("127.0.0.1")
i, err := ipToInt(ip) i := ipToInt(ip)
if err != nil {
t.Fatal(err)
}
if i == 0 { if i == 0 {
t.Fatal("converted to zero") t.Fatal("converted to zero")
} }
conv, err := intToIp(i) conv := intToIp(i)
if err != nil {
t.Fatal(err)
}
if !ip.Equal(conv) { if !ip.Equal(conv) {
t.Error(conv.String()) t.Error(conv.String())
} }
} }
func TestIPAllocator(t *testing.T) { func TestIPAllocator(t *testing.T) {
gwIP, n, _ := net.ParseCIDR("127.0.0.1/29") expectedIPs := []net.IP{
alloc, err := newIPAllocator(&net.IPNet{IP: gwIP, Mask: n.Mask}) 0: net.IPv4(127, 0, 0, 2),
if err != nil { 1: net.IPv4(127, 0, 0, 3),
t.Fatal(err) 2: net.IPv4(127, 0, 0, 4),
3: net.IPv4(127, 0, 0, 5),
4: net.IPv4(127, 0, 0, 6),
} }
var lastIP net.IP
gwIP, n, _ := net.ParseCIDR("127.0.0.1/29")
alloc := newIPAllocator(&net.IPNet{IP: gwIP, Mask: n.Mask})
// Pool after initialisation (f = free, u = used)
// 2(f) - 3(f) - 4(f) - 5(f) - 6(f)
// ↑
// Check that we get 5 IPs, from 127.0.0.2127.0.0.6, in that
// order.
for i := 0; i < 5; i++ { for i := 0; i < 5; i++ {
ip, err := alloc.Acquire() ip, err := alloc.Acquire()
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
lastIP = ip
assertIPEquals(t, expectedIPs[i], ip)
} }
ip, err := alloc.Acquire() // Before loop begin
// 2(f) - 3(f) - 4(f) - 5(f) - 6(f)
// ↑
// After i = 0
// 2(u) - 3(f) - 4(f) - 5(f) - 6(f)
// ↑
// After i = 1
// 2(u) - 3(u) - 4(f) - 5(f) - 6(f)
// ↑
// After i = 2
// 2(u) - 3(u) - 4(u) - 5(f) - 6(f)
// ↑
// After i = 3
// 2(u) - 3(u) - 4(u) - 5(u) - 6(f)
// ↑
// After i = 4
// 2(u) - 3(u) - 4(u) - 5(u) - 6(u)
// ↑
// Check that there are no more IPs
_, err := alloc.Acquire()
if err == nil { if err == nil {
t.Fatal("There shouldn't be any IP addresses at this point") t.Fatal("There shouldn't be any IP addresses at this point")
} }
// Release 1 IP
alloc.Release(lastIP) // Release some IPs in non-sequential order
ip, err = alloc.Acquire() alloc.Release(expectedIPs[3])
if err != nil { // 2(u) - 3(u) - 4(u) - 5(f) - 6(u)
t.Fatal(err) // ↑
alloc.Release(expectedIPs[2])
// 2(u) - 3(u) - 4(f) - 5(f) - 6(u)
// ↑
alloc.Release(expectedIPs[4])
// 2(u) - 3(u) - 4(f) - 5(f) - 6(f)
// ↑
// Make sure that IPs are reused in sequential order, starting
// with the first released IP
newIPs := make([]net.IP, 3)
for i := 0; i < 3; i++ {
ip, err := alloc.Acquire()
if err != nil {
t.Fatal(err)
}
newIPs[i] = ip
} }
if !ip.Equal(lastIP) { // Before loop begin
t.Fatal(ip.String()) // 2(u) - 3(u) - 4(f) - 5(f) - 6(f)
// ↑
// After i = 0
// 2(u) - 3(u) - 4(f) - 5(u) - 6(f)
// ↑
// After i = 1
// 2(u) - 3(u) - 4(f) - 5(u) - 6(u)
// ↑
// After i = 2
// 2(u) - 3(u) - 4(u) - 5(u) - 6(u)
// ↑
assertIPEquals(t, expectedIPs[3], newIPs[0])
assertIPEquals(t, expectedIPs[4], newIPs[1])
assertIPEquals(t, expectedIPs[2], newIPs[2])
_, err = alloc.Acquire()
if err == nil {
t.Fatal("There shouldn't be any IP addresses at this point")
}
}
func assertIPEquals(t *testing.T, ip1, ip2 net.IP) {
if !ip1.Equal(ip2) {
t.Fatalf("Expected IP %s, got %s", ip1, ip2)
} }
} }