Browse Source

Make IP allocator lazy

Instead of allocating all possible IPs in advance, generate them as
needed.

A loop will cycle through all possible IPs in sequential order,
allocating them as needed and marking them as in use. Once the loop
exhausts all IPs, it will wrap back to the beginning. IPs that are
already in use will be skipped. When an IP is released, it will be
cleared and be available for allocation again.

Two decisions went into this design:

1) Minimize memory footprint by only allocating IPs that are actually
in use

2) Minimize reuse of released IP addresses to avoid sending traffic to
the wrong containers

As a side effect, the functions for IP/Mask<->int conversion have been
rewritten to never be able to fail in order to reduce the amount of
error returns.

Fixes gh-231
Dominik Honnef 12 năm trước cách đây
mục cha
commit
6f9a67a7c7
4 tập tin đã thay đổi với 200 bổ sung113 xóa
  1. 1 0
      AUTHORS
  2. 3 6
      container.go
  3. 88 75
      network.go
  4. 108 32
      network_test.go

+ 1 - 0
AUTHORS

@@ -7,6 +7,7 @@ Caleb Spare <cespare@gmail.com>
 Charles Hooper <charles.hooper@dotcloud.com>
 Daniel Mizyrycki <daniel.mizyrycki@dotcloud.com>
 Daniel Robinson <gottagetmac@gmail.com>
+Dominik Honnef <dominik@honnef.co>
 Don Spaulding <donspauldingii@gmail.com>
 ezbercih <cem.ezberci@gmail.com>
 Frederick F. Kautz IV <fkautz@alumni.cmu.edu>

+ 3 - 6
container.go

@@ -363,11 +363,10 @@ func (container *Container) allocateNetwork() error {
 	return nil
 }
 
-func (container *Container) releaseNetwork() error {
-	err := container.network.Release()
+func (container *Container) releaseNetwork()  {
+	container.network.Release()
 	container.network = nil
 	container.NetworkSettings = &NetworkSettings{}
-	return err
 }
 
 func (container *Container) monitor() {
@@ -382,9 +381,7 @@ func (container *Container) monitor() {
 	exitCode := container.cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
 
 	// Cleanup
-	if err := container.releaseNetwork(); err != nil {
-		log.Printf("%v: Failed to release network: %v", container.Id, err)
-	}
+	container.releaseNetwork()
 	if container.Config.OpenStdin {
 		if err := container.stdin.Close(); err != nil {
 			Debugf("%s: Error close stdin: %s", container.Id, err)

+ 88 - 75
network.go

@@ -1,7 +1,6 @@
 package docker
 
 import (
-	"bytes"
 	"encoding/binary"
 	"errors"
 	"fmt"
@@ -30,40 +29,25 @@ func networkRange(network *net.IPNet) (net.IP, net.IP) {
 }
 
 // Converts a 4 bytes IP into a 32 bit integer
-func ipToInt(ip net.IP) (int32, error) {
-	buf := bytes.NewBuffer(ip.To4())
-	var n int32
-	if err := binary.Read(buf, binary.BigEndian, &n); err != nil {
-		return 0, err
-	}
-	return n, nil
+func ipToInt(ip net.IP) int32 {
+	return int32(binary.BigEndian.Uint32(ip.To4()))
 }
 
 // Converts 32 bit integer into a 4 bytes IP address
-func intToIp(n int32) (net.IP, error) {
-	var buf bytes.Buffer
-	if err := binary.Write(&buf, binary.BigEndian, &n); err != nil {
-		return net.IP{}, err
-	}
-	ip := net.IPv4(0, 0, 0, 0).To4()
-	for i := 0; i < net.IPv4len; i++ {
-		ip[i] = buf.Bytes()[i]
-	}
-	return ip, nil
+func intToIp(n int32) net.IP {
+	b := make([]byte, 4)
+	binary.BigEndian.PutUint32(b, uint32(n))
+	return net.IP(b)
 }
 
 // Given a netmask, calculates the number of available hosts
-func networkSize(mask net.IPMask) (int32, error) {
+func networkSize(mask net.IPMask) int32 {
 	m := net.IPv4Mask(0, 0, 0, 0)
 	for i := 0; i < net.IPv4len; i++ {
 		m[i] = ^mask[i]
 	}
-	buf := bytes.NewBuffer(m)
-	var n int32
-	if err := binary.Read(buf, binary.BigEndian, &n); err != nil {
-		return 0, err
-	}
-	return n + 1, nil
+
+	return int32(binary.BigEndian.Uint32(m)) + 1
 }
 
 // Wrapper around the iptables command
@@ -211,66 +195,97 @@ func newPortAllocator(start, end int) (*PortAllocator, error) {
 
 // IP allocator: Atomatically allocate and release networking ports
 type IPAllocator struct {
-	network *net.IPNet
-	queue   chan (net.IP)
+	network       *net.IPNet
+	queueAlloc    chan allocatedIP
+	queueReleased chan net.IP
+	inUse         map[int32]struct{}
+}
+
+type allocatedIP struct {
+	ip  net.IP
+	err error
 }
 
-func (alloc *IPAllocator) populate() error {
+func (alloc *IPAllocator) run() {
 	firstIP, _ := networkRange(alloc.network)
-	size, err := networkSize(alloc.network.Mask)
-	if err != nil {
-		return err
-	}
-	// The queue size should be the network size - 3
-	// -1 for the network address, -1 for the broadcast address and
-	// -1 for the gateway address
-	alloc.queue = make(chan net.IP, size-3)
-	for i := int32(1); i < size-1; i++ {
-		ipNum, err := ipToInt(firstIP)
-		if err != nil {
-			return err
+	ipNum := ipToInt(firstIP)
+	ownIP := ipToInt(alloc.network.IP)
+	size := networkSize(alloc.network.Mask)
+
+	pos := int32(1)
+	max := size - 2 // -1 for the broadcast address, -1 for the gateway address
+	for {
+		var (
+			newNum int32
+			inUse  bool
+		)
+
+		// Find first unused IP, give up after one whole round
+		for attempt := int32(0); attempt < max; attempt++ {
+			newNum = ipNum + pos
+
+			pos = pos%max + 1
+
+			// The network's IP is never okay to use
+			if newNum == ownIP {
+				continue
+			}
+
+			if _, inUse = alloc.inUse[newNum]; !inUse {
+				// We found an unused IP
+				break
+			}
 		}
-		ip, err := intToIp(ipNum + int32(i))
-		if err != nil {
-			return err
+
+		ip := allocatedIP{ip: intToIp(newNum)}
+		if inUse {
+			ip.err = errors.New("No unallocated IP available")
 		}
-		// Discard the network IP (that's the host IP address)
-		if ip.Equal(alloc.network.IP) {
-			continue
+
+		select {
+		case alloc.queueAlloc <- ip:
+			alloc.inUse[newNum] = struct{}{}
+		case released := <-alloc.queueReleased:
+			r := ipToInt(released)
+			delete(alloc.inUse, r)
+
+			if inUse {
+				// If we couldn't allocate a new IP, the released one
+				// will be the only free one now, so instantly use it
+				// next time
+				pos = r - ipNum
+			} else {
+				// Use same IP as last time
+				if pos == 1 {
+					pos = max
+				} else {
+					pos--
+				}
+			}
 		}
-		alloc.queue <- ip
 	}
-	return nil
 }
 
 func (alloc *IPAllocator) Acquire() (net.IP, error) {
-	select {
-	case ip := <-alloc.queue:
-		return ip, nil
-	default:
-		return net.IP{}, errors.New("No more IP addresses available")
-	}
-	return net.IP{}, nil
+	ip := <-alloc.queueAlloc
+	return ip.ip, ip.err
 }
 
-func (alloc *IPAllocator) Release(ip net.IP) error {
-	select {
-	case alloc.queue <- ip:
-		return nil
-	default:
-		return errors.New("Too many IP addresses have been released")
-	}
-	return nil
+func (alloc *IPAllocator) Release(ip net.IP) {
+	alloc.queueReleased <- ip
 }
 
-func newIPAllocator(network *net.IPNet) (*IPAllocator, error) {
+func newIPAllocator(network *net.IPNet) *IPAllocator {
 	alloc := &IPAllocator{
-		network: network,
+		network:       network,
+		queueAlloc:    make(chan allocatedIP),
+		queueReleased: make(chan net.IP),
+		inUse:         make(map[int32]struct{}),
 	}
-	if err := alloc.populate(); err != nil {
-		return nil, err
-	}
-	return alloc, nil
+
+	go alloc.run()
+
+	return alloc
 }
 
 // Network interface represents the networking stack of a container
@@ -297,7 +312,7 @@ func (iface *NetworkInterface) AllocatePort(port int) (int, error) {
 }
 
 // Release: Network cleanup - release all resources
-func (iface *NetworkInterface) Release() error {
+func (iface *NetworkInterface) Release() {
 	for _, port := range iface.extPorts {
 		if err := iface.manager.portMapper.Unmap(port); err != nil {
 			log.Printf("Unable to unmap port %v: %v", port, err)
@@ -307,7 +322,8 @@ func (iface *NetworkInterface) Release() error {
 		}
 
 	}
-	return iface.manager.ipAllocator.Release(iface.IPNet.IP)
+
+	iface.manager.ipAllocator.Release(iface.IPNet.IP)
 }
 
 // Network Manager manages a set of network interfaces
@@ -342,10 +358,7 @@ func newNetworkManager(bridgeIface string) (*NetworkManager, error) {
 	}
 	network := addr.(*net.IPNet)
 
-	ipAllocator, err := newIPAllocator(network)
-	if err != nil {
-		return nil, err
-	}
+	ipAllocator := newIPAllocator(network)
 
 	portAllocator, err := newPortAllocator(portRangeStart, portRangeEnd)
 	if err != nil {

+ 108 - 32
network_test.go

@@ -28,8 +28,8 @@ func TestNetworkRange(t *testing.T) {
 	if !last.Equal(net.ParseIP("192.168.0.255")) {
 		t.Error(last.String())
 	}
-	if size, err := networkSize(network.Mask); err != nil || size != 256 {
-		t.Error(size, err)
+	if size := networkSize(network.Mask); size != 256 {
+		t.Error(size)
 	}
 
 	// Class A test
@@ -41,8 +41,8 @@ func TestNetworkRange(t *testing.T) {
 	if !last.Equal(net.ParseIP("10.255.255.255")) {
 		t.Error(last.String())
 	}
-	if size, err := networkSize(network.Mask); err != nil || size != 16777216 {
-		t.Error(size, err)
+	if size := networkSize(network.Mask); size != 16777216 {
+		t.Error(size)
 	}
 
 	// Class A, random IP address
@@ -64,8 +64,8 @@ func TestNetworkRange(t *testing.T) {
 	if !last.Equal(net.ParseIP("10.1.2.3")) {
 		t.Error(last.String())
 	}
-	if size, err := networkSize(network.Mask); err != nil || size != 1 {
-		t.Error(size, err)
+	if size := networkSize(network.Mask); size != 1 {
+		t.Error(size)
 	}
 
 	// 31bit mask
@@ -77,8 +77,8 @@ func TestNetworkRange(t *testing.T) {
 	if !last.Equal(net.ParseIP("10.1.2.3")) {
 		t.Error(last.String())
 	}
-	if size, err := networkSize(network.Mask); err != nil || size != 2 {
-		t.Error(size, err)
+	if size := networkSize(network.Mask); size != 2 {
+		t.Error(size)
 	}
 
 	// 26bit mask
@@ -90,54 +90,130 @@ func TestNetworkRange(t *testing.T) {
 	if !last.Equal(net.ParseIP("10.1.2.63")) {
 		t.Error(last.String())
 	}
-	if size, err := networkSize(network.Mask); err != nil || size != 64 {
-		t.Error(size, err)
+	if size := networkSize(network.Mask); size != 64 {
+		t.Error(size)
 	}
 }
 
 func TestConversion(t *testing.T) {
 	ip := net.ParseIP("127.0.0.1")
-	i, err := ipToInt(ip)
-	if err != nil {
-		t.Fatal(err)
-	}
+	i := ipToInt(ip)
 	if i == 0 {
 		t.Fatal("converted to zero")
 	}
-	conv, err := intToIp(i)
-	if err != nil {
-		t.Fatal(err)
-	}
+	conv := intToIp(i)
 	if !ip.Equal(conv) {
 		t.Error(conv.String())
 	}
 }
 
 func TestIPAllocator(t *testing.T) {
-	gwIP, n, _ := net.ParseCIDR("127.0.0.1/29")
-	alloc, err := newIPAllocator(&net.IPNet{IP: gwIP, Mask: n.Mask})
-	if err != nil {
-		t.Fatal(err)
+	expectedIPs := []net.IP{
+		0: net.IPv4(127, 0, 0, 2),
+		1: net.IPv4(127, 0, 0, 3),
+		2: net.IPv4(127, 0, 0, 4),
+		3: net.IPv4(127, 0, 0, 5),
+		4: net.IPv4(127, 0, 0, 6),
 	}
-	var lastIP net.IP
+
+	gwIP, n, _ := net.ParseCIDR("127.0.0.1/29")
+	alloc := newIPAllocator(&net.IPNet{IP: gwIP, Mask: n.Mask})
+	// Pool after initialisation (f = free, u = used)
+	// 2(f) - 3(f) - 4(f) - 5(f) - 6(f)
+	//  ↑
+
+	// Check that we get 5 IPs, from 127.0.0.2–127.0.0.6, in that
+	// order.
 	for i := 0; i < 5; i++ {
 		ip, err := alloc.Acquire()
 		if err != nil {
 			t.Fatal(err)
 		}
-		lastIP = ip
+
+		assertIPEquals(t, expectedIPs[i], ip)
 	}
-	ip, err := alloc.Acquire()
+	// Before loop begin
+	// 2(f) - 3(f) - 4(f) - 5(f) - 6(f)
+	//  ↑
+
+	// After i = 0
+	// 2(u) - 3(f) - 4(f) - 5(f) - 6(f)
+	//         ↑
+
+	// After i = 1
+	// 2(u) - 3(u) - 4(f) - 5(f) - 6(f)
+	//                ↑
+
+	// After i = 2
+	// 2(u) - 3(u) - 4(u) - 5(f) - 6(f)
+	//                       ↑
+
+	// After i = 3
+	// 2(u) - 3(u) - 4(u) - 5(u) - 6(f)
+	//                              ↑
+
+	// After i = 4
+	// 2(u) - 3(u) - 4(u) - 5(u) - 6(u)
+	//  ↑
+
+	// Check that there are no more IPs
+	_, err := alloc.Acquire()
 	if err == nil {
 		t.Fatal("There shouldn't be any IP addresses at this point")
 	}
-	// Release 1 IP
-	alloc.Release(lastIP)
-	ip, err = alloc.Acquire()
-	if err != nil {
-		t.Fatal(err)
+
+	// Release some IPs in non-sequential order
+	alloc.Release(expectedIPs[3])
+	// 2(u) - 3(u) - 4(u) - 5(f) - 6(u)
+	//                       ↑
+
+	alloc.Release(expectedIPs[2])
+	// 2(u) - 3(u) - 4(f) - 5(f) - 6(u)
+	//                       ↑
+
+	alloc.Release(expectedIPs[4])
+	// 2(u) - 3(u) - 4(f) - 5(f) - 6(f)
+	//                       ↑
+
+	// Make sure that IPs are reused in sequential order, starting
+	// with the first released IP
+	newIPs := make([]net.IP, 3)
+	for i := 0; i < 3; i++ {
+		ip, err := alloc.Acquire()
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		newIPs[i] = ip
+	}
+	// Before loop begin
+	// 2(u) - 3(u) - 4(f) - 5(f) - 6(f)
+	//                       ↑
+
+	// After i = 0
+	// 2(u) - 3(u) - 4(f) - 5(u) - 6(f)
+	//                              ↑
+
+	// After i = 1
+	// 2(u) - 3(u) - 4(f) - 5(u) - 6(u)
+	//                ↑
+
+	// After i = 2
+	// 2(u) - 3(u) - 4(u) - 5(u) - 6(u)
+	//                       ↑
+
+	assertIPEquals(t, expectedIPs[3], newIPs[0])
+	assertIPEquals(t, expectedIPs[4], newIPs[1])
+	assertIPEquals(t, expectedIPs[2], newIPs[2])
+
+	_, err = alloc.Acquire()
+	if err == nil {
+		t.Fatal("There shouldn't be any IP addresses at this point")
 	}
-	if !ip.Equal(lastIP) {
-		t.Fatal(ip.String())
+}
+
+func assertIPEquals(t *testing.T, ip1, ip2 net.IP) {
+	if !ip1.Equal(ip2) {
+		t.Fatalf("Expected IP %s, got %s", ip1, ip2)
 	}
 }