Merge pull request #23 from dotcloud/networking

Functional TCP networking: port allocation, inbound DNAT, outbound SNAT, multiple ports.
This commit is contained in:
Solomon Hykes 2013-02-28 16:32:55 -08:00
commit 76b66f73b3
7 changed files with 622 additions and 39 deletions

View file

@ -11,6 +11,7 @@ import (
"os"
"os/exec"
"path"
"strconv"
"strings"
"syscall"
"time"
@ -35,6 +36,10 @@ type Container struct {
Filesystem *Filesystem
State *State
network *NetworkInterface
networkManager *NetworkManager
NetworkSettings *NetworkSettings
SysInitPath string
lxcConfigPath string
cmd *exec.Cmd
@ -51,27 +56,36 @@ type Config struct {
Hostname string
User string
Ram int64
Ports []int
Tty bool // Attach standard streams to a tty, including stdin if it is not closed.
OpenStdin bool // Open stdin
}
func createContainer(id string, root string, command string, args []string, layers []string, config *Config) (*Container, error) {
container := &Container{
Id: id,
Root: root,
Created: time.Now(),
Path: command,
Args: args,
Config: config,
Filesystem: newFilesystem(path.Join(root, "rootfs"), path.Join(root, "rw"), layers),
State: newState(),
type NetworkSettings struct {
IpAddress string
IpPrefixLen int
Gateway string
PortMapping map[string]string
}
SysInitPath: sysInitPath,
lxcConfigPath: path.Join(root, "config.lxc"),
stdout: newWriteBroadcaster(),
stderr: newWriteBroadcaster(),
stdoutLog: new(bytes.Buffer),
stderrLog: new(bytes.Buffer),
func createContainer(id string, root string, command string, args []string, layers []string, config *Config, netManager *NetworkManager) (*Container, error) {
container := &Container{
Id: id,
Root: root,
Created: time.Now(),
Path: command,
Args: args,
Config: config,
Filesystem: newFilesystem(path.Join(root, "rootfs"), path.Join(root, "rw"), layers),
State: newState(),
networkManager: netManager,
NetworkSettings: &NetworkSettings{},
SysInitPath: sysInitPath,
lxcConfigPath: path.Join(root, "config.lxc"),
stdout: newWriteBroadcaster(),
stderr: newWriteBroadcaster(),
stdoutLog: new(bytes.Buffer),
stderrLog: new(bytes.Buffer),
}
if container.Config.OpenStdin {
container.stdin, container.stdinPipe = io.Pipe()
@ -93,17 +107,19 @@ func createContainer(id string, root string, command string, args []string, laye
return container, nil
}
func loadContainer(containerPath string) (*Container, error) {
func loadContainer(containerPath string, netManager *NetworkManager) (*Container, error) {
data, err := ioutil.ReadFile(path.Join(containerPath, "config.json"))
if err != nil {
return nil, err
}
container := &Container{
stdout: newWriteBroadcaster(),
stderr: newWriteBroadcaster(),
stdoutLog: new(bytes.Buffer),
stderrLog: new(bytes.Buffer),
lxcConfigPath: path.Join(containerPath, "config.lxc"),
stdout: newWriteBroadcaster(),
stderr: newWriteBroadcaster(),
stdoutLog: new(bytes.Buffer),
stderrLog: new(bytes.Buffer),
lxcConfigPath: path.Join(containerPath, "config.lxc"),
networkManager: netManager,
NetworkSettings: &NetworkSettings{},
}
if err := json.Unmarshal(data, container); err != nil {
return nil, err
@ -263,6 +279,9 @@ func (container *Container) Start() error {
if err := container.Filesystem.EnsureMounted(); err != nil {
return err
}
if err := container.allocateNetwork(); err != nil {
return err
}
if err := container.generateLXCConfig(); err != nil {
return err
}
@ -272,11 +291,19 @@ func (container *Container) Start() error {
"--",
"/sbin/init",
}
// Networking
params = append(params, "-g", container.network.Gateway.String())
// User
if container.Config.User != "" {
params = append(params, "-u", container.Config.User)
}
// Program
params = append(params, "--", container.Path)
params = append(params, container.Args...)
container.cmd = exec.Command("/usr/bin/lxc-start", params...)
var err error
@ -343,12 +370,43 @@ func (container *Container) StderrLog() io.Reader {
return strings.NewReader(container.stderrLog.String())
}
func (container *Container) allocateNetwork() error {
iface, err := container.networkManager.Allocate()
if err != nil {
return err
}
container.NetworkSettings.PortMapping = make(map[string]string)
for _, port := range container.Config.Ports {
if extPort, err := iface.AllocatePort(port); err != nil {
iface.Release()
return err
} else {
container.NetworkSettings.PortMapping[strconv.Itoa(port)] = strconv.Itoa(extPort)
}
}
container.network = iface
container.NetworkSettings.IpAddress = iface.IPNet.IP.String()
container.NetworkSettings.IpPrefixLen, _ = iface.IPNet.Mask.Size()
container.NetworkSettings.Gateway = iface.Gateway.String()
return nil
}
func (container *Container) releaseNetwork() error {
err := container.network.Release()
container.network = nil
container.NetworkSettings = &NetworkSettings{}
return err
}
func (container *Container) monitor() {
// Wait for the program to exit
container.cmd.Wait()
exitCode := container.cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
// Cleanup
if err := container.releaseNetwork(); err != nil {
log.Printf("%v: Failed to release network: %v", container.Id, err)
}
container.stdout.Close()
container.stderr.Close()
if err := container.Filesystem.Umount(); err != nil {

View file

@ -11,9 +11,10 @@ import (
)
type Docker struct {
root string
repository string
containers *list.List
root string
repository string
containers *list.List
networkManager *NetworkManager
}
func (docker *Docker) List() []*Container {
@ -51,7 +52,7 @@ func (docker *Docker) Create(id string, command string, args []string, layers []
return nil, fmt.Errorf("Container %v already exists", id)
}
root := path.Join(docker.repository, id)
container, err := createContainer(id, root, command, args, layers, config)
container, err := createContainer(id, root, command, args, layers, config, docker.networkManager)
if err != nil {
return nil, err
}
@ -86,7 +87,7 @@ func (docker *Docker) restore() error {
return err
}
for _, v := range dir {
container, err := loadContainer(path.Join(docker.repository, v.Name()))
container, err := loadContainer(path.Join(docker.repository, v.Name()), docker.networkManager)
if err != nil {
log.Printf("Failed to load container %v: %v", v.Name(), err)
continue
@ -101,10 +102,15 @@ func New() (*Docker, error) {
}
func NewFromDirectory(root string) (*Docker, error) {
netManager, err := newNetworkManager(networkBridgeIface)
if err != nil {
return nil, err
}
docker := &Docker{
root: root,
repository: path.Join(root, "containers"),
containers: list.New(),
root: root,
repository: path.Join(root, "containers"),
containers: list.New(),
networkManager: netManager,
}
if err := os.MkdirAll(docker.repository, 0700); err != nil && !os.IsExist(err) {

View file

@ -14,12 +14,12 @@ lxc.utsname = {{.Id}}
#lxc.aa_profile = unconfined
# network configuration
#lxc.network.type = veth
#lxc.network.flags = up
#lxc.network.link = br0
#lxc.network.name = eth0 # Internal container network interface name
#lxc.network.mtu = 1500
#lxc.network.ipv4 = {ip_address}/{ip_prefix_len}
lxc.network.type = veth
lxc.network.flags = up
lxc.network.link = lxcbr0
lxc.network.name = eth0
lxc.network.mtu = 1500
lxc.network.ipv4 = {{.NetworkSettings.IpAddress}}/{{.NetworkSettings.IpPrefixLen}}
# root filesystem
{{$ROOTFS := .Filesystem.RootFS}}

356
network.go Normal file
View file

@ -0,0 +1,356 @@
package docker
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"log"
"net"
"os/exec"
"strconv"
"strings"
)
const (
networkBridgeIface = "lxcbr0"
portRangeStart = 49153
portRangeEnd = 65535
)
// Calculates the first and last IP addresses in an IPNet
func networkRange(network *net.IPNet) (net.IP, net.IP) {
netIP := network.IP.To4()
firstIP := netIP.Mask(network.Mask)
lastIP := net.IPv4(0, 0, 0, 0).To4()
for i := 0; i < len(lastIP); i++ {
lastIP[i] = netIP[i] | ^network.Mask[i]
}
return firstIP, lastIP
}
// Converts a 4 bytes IP into a 32 bit integer
func ipToInt(ip net.IP) (int32, error) {
buf := bytes.NewBuffer(ip.To4())
var n int32
if err := binary.Read(buf, binary.BigEndian, &n); err != nil {
return 0, err
}
return n, nil
}
// Converts 32 bit integer into a 4 bytes IP address
func intToIp(n int32) (net.IP, error) {
var buf bytes.Buffer
if err := binary.Write(&buf, binary.BigEndian, &n); err != nil {
return net.IP{}, err
}
ip := net.IPv4(0, 0, 0, 0).To4()
for i := 0; i < net.IPv4len; i++ {
ip[i] = buf.Bytes()[i]
}
return ip, nil
}
// Given a netmask, calculates the number of available hosts
func networkSize(mask net.IPMask) (int32, error) {
m := net.IPv4Mask(0, 0, 0, 0)
for i := 0; i < net.IPv4len; i++ {
m[i] = ^mask[i]
}
buf := bytes.NewBuffer(m)
var n int32
if err := binary.Read(buf, binary.BigEndian, &n); err != nil {
return 0, err
}
return n + 1, nil
}
// Wrapper around the iptables command
func iptables(args ...string) error {
if err := exec.Command("/sbin/iptables", args...).Run(); err != nil {
return fmt.Errorf("iptables failed: iptables %v", strings.Join(args, " "))
}
return nil
}
// Return the IPv4 address of a network interface
func getIfaceAddr(name string) (net.Addr, error) {
iface, err := net.InterfaceByName(name)
if err != nil {
return nil, err
}
addrs, err := iface.Addrs()
if err != nil {
return nil, err
}
var addrs4 []net.Addr
for _, addr := range addrs {
ip := (addr.(*net.IPNet)).IP
if ip4 := ip.To4(); len(ip4) == net.IPv4len {
addrs4 = append(addrs4, addr)
}
}
switch {
case len(addrs4) == 0:
return nil, fmt.Errorf("Interface %v has no IP addresses", name)
case len(addrs4) > 1:
return nil, fmt.Errorf("Interface %v has more than 1 IPv4 address", name)
}
return addrs4[0], nil
}
// Port mapper takes care of mapping external ports to containers by setting
// up iptables rules.
// It keeps track of all mappings and is able to unmap at will
type PortMapper struct {
mapping map[int]net.TCPAddr
}
func (mapper *PortMapper) cleanup() error {
// Ignore errors - This could mean the chains were never set up
iptables("-t", "nat", "-D", "PREROUTING", "-j", "DOCKER")
iptables("-t", "nat", "-F", "DOCKER")
iptables("-t", "nat", "-X", "DOCKER")
mapper.mapping = make(map[int]net.TCPAddr)
return nil
}
func (mapper *PortMapper) setup() error {
if err := iptables("-t", "nat", "-N", "DOCKER"); err != nil {
return errors.New("Unable to setup port networking: Failed to create DOCKER chain")
}
if err := iptables("-t", "nat", "-A", "PREROUTING", "-j", "DOCKER"); err != nil {
return errors.New("Unable to setup port networking: Failed to inject docker in PREROUTING chain")
}
return nil
}
func (mapper *PortMapper) iptablesForward(rule string, port int, dest net.TCPAddr) error {
return iptables("-t", "nat", rule, "DOCKER", "-p", "tcp", "--dport", strconv.Itoa(port),
"-j", "DNAT", "--to-destination", net.JoinHostPort(dest.IP.String(), strconv.Itoa(dest.Port)))
}
func (mapper *PortMapper) Map(port int, dest net.TCPAddr) error {
if err := mapper.iptablesForward("-A", port, dest); err != nil {
return err
}
mapper.mapping[port] = dest
return nil
}
func (mapper *PortMapper) Unmap(port int) error {
dest, ok := mapper.mapping[port]
if !ok {
return errors.New("Port is not mapped")
}
if err := mapper.iptablesForward("-D", port, dest); err != nil {
return err
}
delete(mapper.mapping, port)
return nil
}
func newPortMapper() (*PortMapper, error) {
mapper := &PortMapper{}
if err := mapper.cleanup(); err != nil {
return nil, err
}
if err := mapper.setup(); err != nil {
return nil, err
}
return mapper, nil
}
// Port allocator: Atomatically allocate and release networking ports
type PortAllocator struct {
ports chan (int)
}
func (alloc *PortAllocator) populate(start, end int) {
alloc.ports = make(chan int, end-start)
for port := start; port < end; port++ {
alloc.ports <- port
}
}
func (alloc *PortAllocator) Acquire() (int, error) {
select {
case port := <-alloc.ports:
return port, nil
default:
return -1, errors.New("No more ports available")
}
return -1, nil
}
func (alloc *PortAllocator) Release(port int) error {
select {
case alloc.ports <- port:
return nil
default:
return errors.New("Too many ports have been released")
}
return nil
}
func newPortAllocator(start, end int) (*PortAllocator, error) {
allocator := &PortAllocator{}
allocator.populate(start, end)
return allocator, nil
}
// IP allocator: Atomatically allocate and release networking ports
type IPAllocator struct {
network *net.IPNet
queue chan (net.IP)
}
func (alloc *IPAllocator) populate() error {
firstIP, _ := networkRange(alloc.network)
size, err := networkSize(alloc.network.Mask)
if err != nil {
return err
}
// The queue size should be the network size - 3
// -1 for the network address, -1 for the broadcast address and
// -1 for the gateway address
alloc.queue = make(chan net.IP, size-3)
for i := int32(1); i < size-1; i++ {
ipNum, err := ipToInt(firstIP)
if err != nil {
return err
}
ip, err := intToIp(ipNum + int32(i))
if err != nil {
return err
}
// Discard the network IP (that's the host IP address)
if ip.Equal(alloc.network.IP) {
continue
}
alloc.queue <- ip
}
return nil
}
func (alloc *IPAllocator) Acquire() (net.IP, error) {
select {
case ip := <-alloc.queue:
return ip, nil
default:
return net.IP{}, errors.New("No more IP addresses available")
}
return net.IP{}, nil
}
func (alloc *IPAllocator) Release(ip net.IP) error {
select {
case alloc.queue <- ip:
return nil
default:
return errors.New("Too many IP addresses have been released")
}
return nil
}
func newIPAllocator(network *net.IPNet) (*IPAllocator, error) {
alloc := &IPAllocator{
network: network,
}
if err := alloc.populate(); err != nil {
return nil, err
}
return alloc, nil
}
// Network interface represents the networking stack of a container
type NetworkInterface struct {
IPNet net.IPNet
Gateway net.IP
manager *NetworkManager
extPorts []int
}
// Allocate an external TCP port and map it to the interface
func (iface *NetworkInterface) AllocatePort(port int) (int, error) {
extPort, err := iface.manager.portAllocator.Acquire()
if err != nil {
return -1, err
}
if err := iface.manager.portMapper.Map(extPort, net.TCPAddr{iface.IPNet.IP, port}); err != nil {
iface.manager.portAllocator.Release(extPort)
return -1, err
}
iface.extPorts = append(iface.extPorts, extPort)
return extPort, nil
}
// Release: Network cleanup - release all resources
func (iface *NetworkInterface) Release() error {
for _, port := range iface.extPorts {
if err := iface.manager.portMapper.Unmap(port); err != nil {
log.Printf("Unable to unmap port %v: %v", port, err)
}
if err := iface.manager.portAllocator.Release(port); err != nil {
log.Printf("Unable to release port %v: %v", port, err)
}
}
return iface.manager.ipAllocator.Release(iface.IPNet.IP)
}
// Network Manager manages a set of network interfaces
// Only *one* manager per host machine should be used
type NetworkManager struct {
bridgeIface string
bridgeNetwork *net.IPNet
ipAllocator *IPAllocator
portAllocator *PortAllocator
portMapper *PortMapper
}
// Allocate a network interface
func (manager *NetworkManager) Allocate() (*NetworkInterface, error) {
ip, err := manager.ipAllocator.Acquire()
if err != nil {
return nil, err
}
iface := &NetworkInterface{
IPNet: net.IPNet{ip, manager.bridgeNetwork.Mask},
Gateway: manager.bridgeNetwork.IP,
manager: manager,
}
return iface, nil
}
func newNetworkManager(bridgeIface string) (*NetworkManager, error) {
addr, err := getIfaceAddr(bridgeIface)
if err != nil {
return nil, err
}
network := addr.(*net.IPNet)
ipAllocator, err := newIPAllocator(network)
if err != nil {
return nil, err
}
portAllocator, err := newPortAllocator(portRangeStart, portRangeEnd)
if err != nil {
return nil, err
}
portMapper, err := newPortMapper()
manager := &NetworkManager{
bridgeIface: bridgeIface,
bridgeNetwork: network,
ipAllocator: ipAllocator,
portAllocator: portAllocator,
portMapper: portMapper,
}
return manager, nil
}

130
network_test.go Normal file
View file

@ -0,0 +1,130 @@
package docker
import (
"net"
"testing"
)
func TestNetworkRange(t *testing.T) {
// Simple class C test
_, network, _ := net.ParseCIDR("192.168.0.1/24")
first, last := networkRange(network)
if !first.Equal(net.ParseIP("192.168.0.0")) {
t.Error(first.String())
}
if !last.Equal(net.ParseIP("192.168.0.255")) {
t.Error(last.String())
}
if size, err := networkSize(network.Mask); err != nil || size != 256 {
t.Error(size, err)
}
// Class A test
_, network, _ = net.ParseCIDR("10.0.0.1/8")
first, last = networkRange(network)
if !first.Equal(net.ParseIP("10.0.0.0")) {
t.Error(first.String())
}
if !last.Equal(net.ParseIP("10.255.255.255")) {
t.Error(last.String())
}
if size, err := networkSize(network.Mask); err != nil || size != 16777216 {
t.Error(size, err)
}
// Class A, random IP address
_, network, _ = net.ParseCIDR("10.1.2.3/8")
first, last = networkRange(network)
if !first.Equal(net.ParseIP("10.0.0.0")) {
t.Error(first.String())
}
if !last.Equal(net.ParseIP("10.255.255.255")) {
t.Error(last.String())
}
// 32bit mask
_, network, _ = net.ParseCIDR("10.1.2.3/32")
first, last = networkRange(network)
if !first.Equal(net.ParseIP("10.1.2.3")) {
t.Error(first.String())
}
if !last.Equal(net.ParseIP("10.1.2.3")) {
t.Error(last.String())
}
if size, err := networkSize(network.Mask); err != nil || size != 1 {
t.Error(size, err)
}
// 31bit mask
_, network, _ = net.ParseCIDR("10.1.2.3/31")
first, last = networkRange(network)
if !first.Equal(net.ParseIP("10.1.2.2")) {
t.Error(first.String())
}
if !last.Equal(net.ParseIP("10.1.2.3")) {
t.Error(last.String())
}
if size, err := networkSize(network.Mask); err != nil || size != 2 {
t.Error(size, err)
}
// 26bit mask
_, network, _ = net.ParseCIDR("10.1.2.3/26")
first, last = networkRange(network)
if !first.Equal(net.ParseIP("10.1.2.0")) {
t.Error(first.String())
}
if !last.Equal(net.ParseIP("10.1.2.63")) {
t.Error(last.String())
}
if size, err := networkSize(network.Mask); err != nil || size != 64 {
t.Error(size, err)
}
}
func TestConversion(t *testing.T) {
ip := net.ParseIP("127.0.0.1")
i, err := ipToInt(ip)
if err != nil {
t.Fatal(err)
}
if i == 0 {
t.Fatal("converted to zero")
}
conv, err := intToIp(i)
if err != nil {
t.Fatal(err)
}
if !ip.Equal(conv) {
t.Error(conv.String())
}
}
func TestIPAllocator(t *testing.T) {
gwIP, n, _ := net.ParseCIDR("127.0.0.1/29")
alloc, err := newIPAllocator(&net.IPNet{gwIP, n.Mask})
if err != nil {
t.Fatal(err)
}
var lastIP net.IP
for i := 0; i < 5; i++ {
ip, err := alloc.Acquire()
if err != nil {
t.Fatal(err)
}
lastIP = ip
}
ip, err := alloc.Acquire()
if err == nil {
t.Fatal("There shouldn't be any IP addresses at this point")
}
// Release 1 IP
alloc.Release(lastIP)
ip, err = alloc.Acquire()
if err != nil {
t.Fatal(err)
}
if !ip.Equal(lastIP) {
t.Fatal(ip.String())
}
}

View file

@ -15,6 +15,7 @@ import (
"net/url"
"os"
"path"
"strconv"
"strings"
"sync"
"text/tabwriter"
@ -679,10 +680,10 @@ func (srv *Server) CmdLogs(stdin io.ReadCloser, stdout io.Writer, args ...string
return errors.New("No such container: " + cmd.Arg(0))
}
func (srv *Server) CreateContainer(img *image.Image, user string, tty bool, openStdin bool, comment string, cmd string, args ...string) (*docker.Container, error) {
func (srv *Server) CreateContainer(img *image.Image, ports []int, user string, tty bool, openStdin bool, comment string, cmd string, args ...string) (*docker.Container, error) {
id := future.RandomId()[:8]
container, err := srv.containers.Create(id, cmd, args, img.Layers,
&docker.Config{Hostname: id, User: user, Tty: tty, OpenStdin: openStdin})
&docker.Config{Hostname: id, Ports: ports, User: user, Tty: tty, OpenStdin: openStdin})
if err != nil {
return nil, err
}
@ -743,6 +744,22 @@ func (srv *Server) CmdAttach(stdin io.ReadCloser, stdout io.Writer, args ...stri
return nil
}
// Ports type - Used to parse multiple -p flags
type ports []int
func (p *ports) String() string {
return fmt.Sprint(*p)
}
func (p *ports) Set(value string) error {
port, err := strconv.Atoi(value)
if err != nil {
return fmt.Errorf("Invalid port: %v", value)
}
*p = append(*p, port)
return nil
}
func (srv *Server) CmdRun(stdin io.ReadCloser, stdout io.Writer, args ...string) error {
cmd := rcli.Subcmd(stdout, "run", "[OPTIONS] IMAGE COMMAND [ARG...]", "Run a command in a new container")
fl_user := cmd.String("u", "", "Username or UID")
@ -750,6 +767,8 @@ func (srv *Server) CmdRun(stdin io.ReadCloser, stdout io.Writer, args ...string)
fl_stdin := cmd.Bool("i", false, "Keep stdin open even if not attached")
fl_tty := cmd.Bool("t", false, "Allocate a pseudo-tty")
fl_comment := cmd.String("c", "", "Comment")
var fl_ports ports
cmd.Var(&fl_ports, "p", "Map a network port to the container")
if err := cmd.Parse(args); err != nil {
return nil
}
@ -775,7 +794,7 @@ func (srv *Server) CmdRun(stdin io.ReadCloser, stdout io.Writer, args ...string)
return errors.New("No such image: " + name)
}
// Create new container
container, err := srv.CreateContainer(img, *fl_user, *fl_tty, *fl_stdin, *fl_comment, cmdline[0], cmdline[1:]...)
container, err := srv.CreateContainer(img, fl_ports, *fl_user, *fl_tty, *fl_stdin, *fl_comment, cmdline[0], cmdline[1:]...)
if err != nil {
return errors.New("Error creating container: " + err.Error())
}

View file

@ -11,6 +11,17 @@ import (
"syscall"
)
// Setup networking
func setupNetworking(gw string) {
if gw == "" {
return
}
cmd := exec.Command("/sbin/route", "add", "default", "gw", gw)
if err := cmd.Run(); err != nil {
log.Fatalf("Unable to set up networking: %v", err)
}
}
// Takes care of dropping privileges to the desired user
func changeUser(u string) {
if u == "" {
@ -62,8 +73,11 @@ func SysInit() {
os.Exit(1)
}
var u = flag.String("u", "", "username or uid")
var gw = flag.String("g", "", "gateway address")
flag.Parse()
setupNetworking(*gw)
changeUser(*u)
executeProgram(flag.Arg(0), flag.Args())
}