Merge pull request #18486 from aboch/vnd

Vendoring libnetwork bbd6e6d8ca1e7c9b42f6f53277b0bde72847ff90
This commit is contained in:
Alexander Morozov 2015-12-14 13:32:01 -08:00
commit 260cbb19c9
34 changed files with 2211 additions and 173 deletions

View file

@ -22,14 +22,14 @@ clone git github.com/vdemeester/shakers 3c10293ce22b900c27acad7b28656196fcc2f73b
clone git golang.org/x/net 47990a1ba55743e6ef1affd3a14e5bac8553615d https://github.com/golang/net.git
#get libnetwork packages
clone git github.com/docker/libnetwork 04cc1fa0a89f8c407b7be8cab883d4b17531ea7d
clone git github.com/docker/libnetwork bbd6e6d8ca1e7c9b42f6f53277b0bde72847ff90
clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
clone git github.com/hashicorp/go-msgpack 71c2886f5a673a35f909803f38ece5810165097b
clone git github.com/hashicorp/memberlist 9a1e242e454d2443df330bdd51a436d5a9058fc4
clone git github.com/hashicorp/serf 7151adcef72687bf95f451a2e0ba15cb19412bf2
clone git github.com/docker/libkv c2aac5dbbaa5c872211edea7c0f32b3bd67e7410
clone git github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25
clone git github.com/vishvananda/netlink 4b5dce31de6d42af5bb9811c6d265472199e0fec
clone git github.com/vishvananda/netlink 8e810149a2e531fed9b837c0c7d8a8922d2bedf7
clone git github.com/BurntSushi/toml f706d00e3de6abe700c994cdd545a1a4915af060
clone git github.com/samuel/go-zookeeper d0e0d8e11f318e000a8cc434616d69e329edc374
clone git github.com/deckarep/golang-set ef32fa3046d9f249d399f98ebaf9be944430fd1d

View file

@ -569,6 +569,9 @@ func (s *DockerDaemonSuite) TestDockerNetworkNoDiscoveryDefaultBridgeNetwork(c *
out, err = s.d.Cmd("network", "connect", network, cid1)
c.Assert(err, check.IsNil, check.Commentf(out))
hosts, err = s.d.Cmd("exec", cid1, "cat", hostsFile)
c.Assert(err, checker.IsNil)
hostsPost, err = s.d.Cmd("exec", cid1, "cat", hostsFile)
c.Assert(err, checker.IsNil)
c.Assert(string(hosts), checker.Equals, string(hostsPost),
@ -631,6 +634,9 @@ func (s *DockerNetworkSuite) TestDockerNetworkAnonymousEndpoint(c *check.C) {
dockerCmd(c, "network", "connect", cstmBridgeNw1, cid2)
hosts2, err = readContainerFileWithExec(cid2, hostsFile)
c.Assert(err, checker.IsNil)
hosts1post, err = readContainerFileWithExec(cid1, hostsFile)
c.Assert(err, checker.IsNil)
c.Assert(string(hosts1), checker.Equals, string(hosts1post),

View file

@ -1,5 +1,52 @@
Alessandro Boch <aboch@docker.com> (@aboch)
Alexandr Morozov <lk4d4@docker.com> (@LK4D4)
Arnaud Porterie <arnaud@docker.com> (@icecrime)
Jana Radhakrishnan <mrjana@docker.com> (@mrjana)
Madhu Venugopal <madhu@docker.com> (@mavenugo)
# Libnetwork maintainers file
#
# This file describes who runs the docker/libnetwork project and how.
# This is a living document - if you see something out of date or missing, speak up!
#
# It is structured to be consumable by both humans and programs.
# To extract its contents programmatically, use any TOML-compliant parser.
#
# This file is compiled into the MAINTAINERS file in docker/opensource.
#
[Org]
[Org."Core maintainers"]
people = [
"aboch",
"LK4D4",
"icecrime",
"mrjana",
"mavenugo",
]
[people]
# A reference list of all people associated with the project.
# All other sections should refer to people by their canonical key
# in the people section.
# ADD YOURSELF HERE IN ALPHABETICAL ORDER
[people.aboch]
Name = "Alessandro Boch"
Email = "aboch@docker.com"
GitHub = "aboch"
[people.LK4D4]
Name = "Alexandr Morozov"
Email = "lk4d4@docker.com"
GitHub = "LK4D4"
[people.icecrime]
Name = "Arnaud Porterie"
Email = "arnaud@docker.com"
GitHub = "icecrime"
[people.mrjana]
Name = "Jana Radhakrishnan"
Email = "mrjana@docker.com"
GitHub = "mrjana"
[people.mavenugo]
Name = "Madhu Venugopal"
Email = "madhu@docker.com"
GitHub = "mavenugo"

View file

@ -6,8 +6,6 @@ Libnetwork provides a native Go implementation for connecting containers
The goal of libnetwork is to deliver a robust Container Network Model that provides a consistent programming interface and the required network abstractions for applications.
**NOTE**: libnetwork project is under heavy development and is not ready for general use.
#### Design
Please refer to the [design](docs/design.md) for more information.
@ -67,10 +65,6 @@ There are many networking solutions available to suit a broad range of use-cases
}
}
```
#### Current Status
Please watch this space for updates on the progress.
Currently libnetwork is nothing more than an attempt to modularize the Docker platform's networking subsystem by moving it into libnetwork as a library.
## Future
Please refer to [roadmap](ROADMAP.md) for more information.

View file

@ -24,7 +24,10 @@ const (
)
var (
errNoBitAvailable = fmt.Errorf("no bit available")
// ErrNoBitAvailable is returned when no more bits are available to set
ErrNoBitAvailable = fmt.Errorf("no bit available")
// ErrBitAllocated is returned when the specific bit requested is already set
ErrBitAllocated = fmt.Errorf("requested bit is already allocated")
)
// Handle contains the sequece representing the bitmask and its identifier
@ -94,7 +97,7 @@ func (s *sequence) toString() string {
// GetAvailableBit returns the position of the first unset bit in the bitmask represented by this sequence
func (s *sequence) getAvailableBit(from uint64) (uint64, uint64, error) {
if s.block == blockMAX || s.count == 0 {
return invalidPos, invalidPos, errNoBitAvailable
return invalidPos, invalidPos, ErrNoBitAvailable
}
bits := from
bitSel := blockFirstBit >> from
@ -197,7 +200,7 @@ func (h *Handle) SetAnyInRange(start, end uint64) (uint64, error) {
return invalidPos, fmt.Errorf("invalid bit range [%d, %d]", start, end)
}
if h.Unselected() == 0 {
return invalidPos, errNoBitAvailable
return invalidPos, ErrNoBitAvailable
}
return h.set(0, start, end, true, false)
}
@ -205,7 +208,7 @@ func (h *Handle) SetAnyInRange(start, end uint64) (uint64, error) {
// SetAny atomically sets the first unset bit in the sequence and returns the corresponding ordinal
func (h *Handle) SetAny() (uint64, error) {
if h.Unselected() == 0 {
return invalidPos, errNoBitAvailable
return invalidPos, ErrNoBitAvailable
}
return h.set(0, 0, h.bits-1, true, false)
}
@ -250,8 +253,12 @@ func (h *Handle) set(ordinal, start, end uint64, any bool, release bool) (uint64
)
for {
if h.store != nil {
if err := h.store.GetObject(datastore.Key(h.Key()...), h); err != nil && err != datastore.ErrKeyNotFound {
var store datastore.DataStore
h.Lock()
store = h.store
h.Unlock()
if store != nil {
if err := store.GetObject(datastore.Key(h.Key()...), h); err != nil && err != datastore.ErrKeyNotFound {
return ret, err
}
}
@ -265,7 +272,7 @@ func (h *Handle) set(ordinal, start, end uint64, any bool, release bool) (uint64
bytePos, bitPos, err = getFirstAvailable(h.head, start)
ret = posToOrdinal(bytePos, bitPos)
if end < ret {
err = errNoBitAvailable
err = ErrNoBitAvailable
}
} else {
bytePos, bitPos, err = checkIfAvailable(h.head, ordinal)
@ -445,7 +452,7 @@ func getFirstAvailable(head *sequence, start uint64) (uint64, uint64, error) {
byteOffset += current.count * blockBytes
current = current.next
}
return invalidPos, invalidPos, errNoBitAvailable
return invalidPos, invalidPos, ErrNoBitAvailable
}
// checkIfAvailable checks if the bit correspondent to the specified ordinal is unset
@ -463,7 +470,7 @@ func checkIfAvailable(head *sequence, ordinal uint64) (uint64, uint64, error) {
}
}
return invalidPos, invalidPos, fmt.Errorf("requested bit is not available")
return invalidPos, invalidPos, ErrBitAllocated
}
// Given the byte position and the sequences list head, return the pointer to the

View file

@ -4,7 +4,7 @@ machine:
dependencies:
override:
- sudo apt-get update; sudo apt-get install -y iptables zookeeperd
- sudo apt-get update; sudo apt-get install -y iptables zookeeperd
- go get golang.org/x/tools/cmd/vet
- go get golang.org/x/tools/cmd/goimports
- go get golang.org/x/tools/cmd/cover

View file

@ -315,17 +315,17 @@ func (c *controller) RegisterIpamDriver(name string, driver ipamapi.Ipam) error
_, ok := c.ipamDrivers[name]
c.Unlock()
if ok {
return driverapi.ErrActiveRegistration(name)
return types.ForbiddenErrorf("ipam driver %q already registered", name)
}
locAS, glbAS, err := driver.GetDefaultAddressSpaces()
if err != nil {
return fmt.Errorf("ipam driver %s failed to return default address spaces: %v", name, err)
return types.InternalErrorf("ipam driver %q failed to return default address spaces: %v", name, err)
}
c.Lock()
c.ipamDrivers[name] = &ipamData{driver: driver, defaultLocalAddressSpace: locAS, defaultGlobalAddressSpace: glbAS}
c.Unlock()
log.Debugf("Registering ipam provider: %s", name)
log.Debugf("Registering ipam driver: %q", name)
return nil
}
@ -667,7 +667,7 @@ func (c *controller) loadIpamDriver(name string) (*ipamData, error) {
id, ok := c.ipamDrivers[name]
c.Unlock()
if !ok {
return nil, ErrInvalidNetworkDriver(name)
return nil, types.BadRequestErrorf("invalid ipam driver: %q", name)
}
return id, nil
}

View file

@ -103,10 +103,18 @@ func (sb *sandbox) needDefaultGW() bool {
if ep.getNetwork().Type() == "null" || ep.getNetwork().Type() == "host" {
continue
}
if ep.joinInfo.disableGatewayService {
return false
}
// TODO v6 needs to be handled.
if len(ep.Gateway()) > 0 {
return false
}
for _, r := range ep.StaticRoutes() {
if r.Destination.String() == "0.0.0.0/0" {
return false
}
}
needGW = true
}
return needGW

View file

@ -91,6 +91,9 @@ type JoinInfo interface {
// AddStaticRoute adds a routes to the sandbox.
// It may be used in addtion to or instead of a default gateway (as above).
AddStaticRoute(destination *net.IPNet, routeType int, nextHop net.IP) error
// DisableGatewayService tells libnetwork not to provide Default GW for the container
DisableGatewayService()
}
// DriverCallback provides a Callback interface for Drivers into LibNetwork

View file

@ -151,6 +151,10 @@ func (d *driver) processQuery(q *serf.Query) {
}
func (d *driver) resolvePeer(nid string, peerIP net.IP) (net.HardwareAddr, net.IPMask, net.IP, error) {
if d.serfInstance == nil {
return nil, nil, nil, fmt.Errorf("could not resolve peer: serf instance not initialized")
}
qPayload := fmt.Sprintf("%s %s", string(nid), peerIP.String())
resp, err := d.serfInstance.Query("peerlookup", []byte(qPayload), nil)
if err != nil {

View file

@ -134,10 +134,11 @@ type StaticRoute struct {
// JoinResponse is the response to a JoinRequest.
type JoinResponse struct {
Response
InterfaceName *InterfaceName
Gateway string
GatewayIPv6 string
StaticRoutes []StaticRoute
InterfaceName *InterfaceName
Gateway string
GatewayIPv6 string
StaticRoutes []StaticRoute
DisableGatewayService bool
}
// LeaveRequest describes the API for detaching an endpoint from a sandbox.

View file

@ -231,6 +231,9 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
}
}
}
if res.DisableGatewayService {
jinfo.DisableGatewayService()
}
return nil
}

View file

@ -60,6 +60,8 @@ type endpoint struct {
anonymous bool
generic map[string]interface{}
joinLeaveDone chan struct{}
prefAddress net.IP
ipamOptions map[string]string
dbIndex uint64
dbExists bool
sync.Mutex
@ -386,6 +388,9 @@ func (ep *endpoint) sbJoin(sbox Sandbox, options ...EndpointOption) error {
}
}()
// Watch for service records
network.getController().watchSvcRecord(ep)
address := ""
if ip := ep.getFirstInterfaceAddress(); ip != nil {
address = ip.String()
@ -394,9 +399,6 @@ func (ep *endpoint) sbJoin(sbox Sandbox, options ...EndpointOption) error {
return err
}
// Watch for service records
network.getController().watchSvcRecord(ep)
if err = sb.updateDNS(network.enableIPv6); err != nil {
return err
}
@ -559,7 +561,7 @@ func (ep *endpoint) sbLeave(sbox Sandbox, options ...EndpointOption) error {
sb.deleteHostsEntries(n.getSvcRecords(ep))
if sb.needDefaultGW() {
if !sb.inDelete && sb.needDefaultGW() {
ep := sb.getEPwithoutGateway()
if ep == nil {
return fmt.Errorf("endpoint without GW expected, but not found")
@ -685,6 +687,14 @@ func EndpointOptionGeneric(generic map[string]interface{}) EndpointOption {
}
}
// CreateOptionIpam function returns an option setter for the ipam configuration for this endpoint
func CreateOptionIpam(prefAddress net.IP, ipamOptions map[string]string) EndpointOption {
return func(ep *endpoint) {
ep.prefAddress = prefAddress
ep.ipamOptions = ipamOptions
}
}
// CreateOptionExposedPorts function returns an option setter for the container exposed
// ports option to be passed to network.CreateEndpoint() method.
func CreateOptionExposedPorts(exposedPorts []types.TransportPort) EndpointOption {
@ -799,7 +809,7 @@ func (ep *endpoint) assignAddressVersion(ipVer int, ipam ipamapi.Ipam) error {
if *address != nil {
prefIP = (*address).IP
}
addr, _, err := ipam.RequestAddress(d.PoolID, prefIP, nil)
addr, _, err := ipam.RequestAddress(d.PoolID, prefIP, ep.ipamOptions)
if err == nil {
ep.Lock()
*address = addr

View file

@ -25,6 +25,10 @@ type EndpointInfo interface {
// This will only return a valid value if a container has joined the endpoint.
GatewayIPv6() net.IP
// StaticRoutes returns the list of static routes configured by the network
// driver when the container joins a network
StaticRoutes() []*types.StaticRoute
// Sandbox returns the attached sandbox if there, nil otherwise.
Sandbox() Sandbox
}
@ -136,9 +140,10 @@ func (epi *endpointInterface) CopyTo(dstEpi *endpointInterface) error {
}
type endpointJoinInfo struct {
gw net.IP
gw6 net.IP
StaticRoutes []*types.StaticRoute
gw net.IP
gw6 net.IP
StaticRoutes []*types.StaticRoute
disableGatewayService bool
}
func (ep *endpoint) Info() EndpointInfo {
@ -295,6 +300,17 @@ func (ep *endpoint) Sandbox() Sandbox {
return cnt
}
func (ep *endpoint) StaticRoutes() []*types.StaticRoute {
ep.Lock()
defer ep.Unlock()
if ep.joinInfo == nil {
return nil
}
return ep.joinInfo.StaticRoutes
}
func (ep *endpoint) Gateway() net.IP {
ep.Lock()
defer ep.Unlock()
@ -340,3 +356,10 @@ func (ep *endpoint) retrieveFromStore() (*endpoint, error) {
}
return n.getEndpointFromStore(ep.ID())
}
func (ep *endpoint) DisableGatewayService() {
ep.Lock()
defer ep.Unlock()
ep.joinInfo.disableGatewayService = true
}

View file

@ -1,6 +1,7 @@
package etchosts
import (
"bufio"
"bytes"
"fmt"
"io"
@ -138,19 +139,36 @@ func Delete(path string, recs []Record) error {
if len(recs) == 0 {
return nil
}
old, err := ioutil.ReadFile(path)
old, err := os.Open(path)
if err != nil {
return err
}
regexpStr := fmt.Sprintf("\\S*\\t%s\\n", regexp.QuoteMeta(recs[0].Hosts))
for _, r := range recs[1:] {
regexpStr = regexpStr + "|" + fmt.Sprintf("\\S*\\t%s\\n", regexp.QuoteMeta(r.Hosts))
}
var buf bytes.Buffer
var re = regexp.MustCompile(regexpStr)
return ioutil.WriteFile(path, re.ReplaceAll(old, []byte("")), 0644)
s := bufio.NewScanner(old)
eol := []byte{'\n'}
loop:
for s.Scan() {
b := s.Bytes()
if b[0] == '#' {
buf.Write(b)
buf.Write(eol)
continue
}
for _, r := range recs {
if bytes.HasSuffix(b, []byte("\t"+r.Hosts)) {
continue loop
}
}
buf.Write(b)
buf.Write(eol)
}
old.Close()
if err := s.Err(); err != nil {
return err
}
return ioutil.WriteFile(path, buf.Bytes(), 0644)
}
// Update all IP addresses where hostname matches.

View file

@ -76,8 +76,7 @@ func NewAllocator(lcDs, glDs datastore.DataStore) (*Allocator, error) {
func (a *Allocator) refresh(as string) error {
aSpace, err := a.getAddressSpaceFromStore(as)
if err != nil {
return fmt.Errorf("error getting pools config from store during init: %v",
err)
return types.InternalErrorf("error getting pools config from store: %v", err)
}
if aSpace == nil {
@ -239,7 +238,7 @@ func (a *Allocator) insertBitMask(key SubnetKey, pool *net.IPNet) error {
store := a.getStore(key.AddressSpace)
if store == nil {
return fmt.Errorf("could not find store for address space %s while inserting bit mask", key.AddressSpace)
return types.InternalErrorf("could not find store for address space %s while inserting bit mask", key.AddressSpace)
}
ipVer := getAddressVersion(pool.IP)
@ -279,7 +278,7 @@ func (a *Allocator) retrieveBitmask(k SubnetKey, n *net.IPNet) (*bitseq.Handle,
if !ok {
log.Debugf("Retrieving bitmask (%s, %s)", k.String(), n.String())
if err := a.insertBitMask(k, n); err != nil {
return nil, fmt.Errorf("could not find bitmask in datastore for %s", k.String())
return nil, types.InternalErrorf("could not find bitmask in datastore for %s", k.String())
}
a.Lock()
bm = a.addresses[k]
@ -306,7 +305,7 @@ func (a *Allocator) getPredefinedPool(as string, ipV6 bool) (*net.IPNet, error)
}
if as != localAddressSpace && as != globalAddressSpace {
return nil, fmt.Errorf("no default pool availbale for non-default addresss spaces")
return nil, types.NotImplementedErrorf("no default pool availbale for non-default addresss spaces")
}
aSpace, err := a.getAddrSpace(as)
@ -378,7 +377,7 @@ func (a *Allocator) RequestAddress(poolID string, prefAddress net.IP, opts map[s
bm, err := a.retrieveBitmask(k, c.Pool)
if err != nil {
return nil, nil, fmt.Errorf("could not find bitmask in datastore for %s on address %v request from pool %s: %v",
return nil, nil, types.InternalErrorf("could not find bitmask in datastore for %s on address %v request from pool %s: %v",
k.String(), prefAddress, poolID, err)
}
ip, err := a.getAddress(p.Pool, bm, prefAddress, p.Range)
@ -410,12 +409,12 @@ func (a *Allocator) ReleaseAddress(poolID string, address net.IP) error {
p, ok := aSpace.subnets[k]
if !ok {
aSpace.Unlock()
return ipamapi.ErrBadPool
return types.NotFoundErrorf("cannot find address pool for poolID:%s", poolID)
}
if address == nil {
aSpace.Unlock()
return ipamapi.ErrInvalidRequest
return types.BadRequestErrorf("invalid address: nil")
}
if !p.Pool.Contains(address) {
@ -434,12 +433,12 @@ func (a *Allocator) ReleaseAddress(poolID string, address net.IP) error {
h, err := types.GetHostPartIP(address, mask)
if err != nil {
return fmt.Errorf("failed to release address %s: %v", address.String(), err)
return types.InternalErrorf("failed to release address %s: %v", address.String(), err)
}
bm, err := a.retrieveBitmask(k, c.Pool)
if err != nil {
return fmt.Errorf("could not find bitmask in datastore for %s on address %v release from pool %s: %v",
return types.InternalErrorf("could not find bitmask in datastore for %s on address %v release from pool %s: %v",
k.String(), address, poolID, err)
}
@ -463,19 +462,25 @@ func (a *Allocator) getAddress(nw *net.IPNet, bitmask *bitseq.Handle, prefAddres
} else if prefAddress != nil {
hostPart, e := types.GetHostPartIP(prefAddress, base.Mask)
if e != nil {
return nil, fmt.Errorf("failed to allocate preferred address %s: %v", prefAddress.String(), e)
return nil, types.InternalErrorf("failed to allocate preferred address %s: %v", prefAddress.String(), e)
}
ordinal = ipToUint64(types.GetMinimalIP(hostPart))
err = bitmask.Set(ordinal)
} else {
ordinal, err = bitmask.SetAnyInRange(ipr.Start, ipr.End)
}
if err != nil {
return nil, ipamapi.ErrNoAvailableIPs
}
// Convert IP ordinal for this subnet into IP address
return generateAddress(ordinal, base), nil
switch err {
case nil:
// Convert IP ordinal for this subnet into IP address
return generateAddress(ordinal, base), nil
case bitseq.ErrBitAllocated:
return nil, ipamapi.ErrIPAlreadyAllocated
case bitseq.ErrNoBitAvailable:
return nil, ipamapi.ErrNoAvailableIPs
default:
return nil, err
}
}
// DumpDatabase dumps the internal info

View file

@ -2,7 +2,6 @@ package ipam
import (
"encoding/json"
"fmt"
log "github.com/Sirupsen/logrus"
"github.com/docker/libnetwork/datastore"
@ -84,7 +83,7 @@ func (a *Allocator) getStore(as string) datastore.DataStore {
func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) {
store := a.getStore(as)
if store == nil {
return nil, fmt.Errorf("store for address space %s not found", as)
return nil, types.InternalErrorf("store for address space %s not found", as)
}
pc := &addrSpace{id: dsConfigKey + "/" + as, ds: store, alloc: a}
@ -93,7 +92,7 @@ func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) {
return nil, nil
}
return nil, fmt.Errorf("could not get pools config from store: %v", err)
return nil, types.InternalErrorf("could not get pools config from store: %v", err)
}
return pc, nil
@ -102,7 +101,7 @@ func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) {
func (a *Allocator) writeToStore(aSpace *addrSpace) error {
store := aSpace.store()
if store == nil {
return fmt.Errorf("invalid store while trying to write %s address space", aSpace.DataScope())
return types.InternalErrorf("invalid store while trying to write %s address space", aSpace.DataScope())
}
err := store.PutObjectAtomic(aSpace)
@ -116,7 +115,7 @@ func (a *Allocator) writeToStore(aSpace *addrSpace) error {
func (a *Allocator) deleteFromStore(aSpace *addrSpace) error {
store := aSpace.store()
if store == nil {
return fmt.Errorf("invalid store while trying to delete %s address space", aSpace.DataScope())
return types.InternalErrorf("invalid store while trying to delete %s address space", aSpace.DataScope())
}
return store.DeleteObjectAtomic(aSpace)

View file

@ -88,12 +88,12 @@ func (s *SubnetKey) String() string {
// FromString populate the SubnetKey object reading it from string
func (s *SubnetKey) FromString(str string) error {
if str == "" || !strings.Contains(str, "/") {
return fmt.Errorf("invalid string form for subnetkey: %s", str)
return types.BadRequestErrorf("invalid string form for subnetkey: %s", str)
}
p := strings.Split(str, "/")
if len(p) != 3 && len(p) != 5 {
return fmt.Errorf("invalid string form for subnetkey: %s", str)
return types.BadRequestErrorf("invalid string form for subnetkey: %s", str)
}
s.AddressSpace = p[0]
s.Subnet = fmt.Sprintf("%s/%s", p[1], p[2])
@ -317,7 +317,7 @@ func (aSpace *addrSpace) updatePoolDBOnRemoval(k SubnetKey) (func() error, error
return func() error {
bm, err := aSpace.alloc.retrieveBitmask(k, c.Pool)
if err != nil {
return fmt.Errorf("could not find bitmask in datastore for pool %s removal: %v", k.String(), err)
return types.InternalErrorf("could not find bitmask in datastore for pool %s removal: %v", k.String(), err)
}
return bm.Destroy()
}, nil

View file

@ -2,8 +2,9 @@
package ipamapi
import (
"errors"
"net"
"github.com/docker/libnetwork/types"
)
/********************
@ -15,6 +16,8 @@ const (
DefaultIPAM = "default"
// PluginEndpointType represents the Endpoint Type used by Plugin system
PluginEndpointType = "IpamDriver"
// RequestAddressType represents the Address Type used when requesting an address
RequestAddressType = "RequestAddressType"
)
// Callback provides a Callback interface for registering an IPAM instance into LibNetwork
@ -29,22 +32,19 @@ type Callback interface {
// Weel-known errors returned by IPAM
var (
ErrInvalidIpamService = errors.New("Invalid IPAM Service")
ErrInvalidIpamConfigService = errors.New("Invalid IPAM Config Service")
ErrIpamNotAvailable = errors.New("IPAM Service not available")
ErrIpamInternalError = errors.New("IPAM Internal Error")
ErrInvalidAddressSpace = errors.New("Invalid Address Space")
ErrInvalidPool = errors.New("Invalid Address Pool")
ErrInvalidSubPool = errors.New("Invalid Address SubPool")
ErrInvalidRequest = errors.New("Invalid Request")
ErrPoolNotFound = errors.New("Address Pool not found")
ErrOverlapPool = errors.New("Address pool overlaps with existing pool on this address space")
ErrNoAvailablePool = errors.New("No available pool")
ErrNoAvailableIPs = errors.New("No available addresses on this pool")
ErrIPAlreadyAllocated = errors.New("Address already in use")
ErrIPOutOfRange = errors.New("Requested address is out of range")
ErrPoolOverlap = errors.New("Pool overlaps with other one on this address space")
ErrBadPool = errors.New("Address space does not contain specified address pool")
ErrIpamInternalError = types.InternalErrorf("IPAM Internal Error")
ErrInvalidAddressSpace = types.BadRequestErrorf("Invalid Address Space")
ErrInvalidPool = types.BadRequestErrorf("Invalid Address Pool")
ErrInvalidSubPool = types.BadRequestErrorf("Invalid Address SubPool")
ErrInvalidRequest = types.BadRequestErrorf("Invalid Request")
ErrPoolNotFound = types.BadRequestErrorf("Address Pool not found")
ErrOverlapPool = types.ForbiddenErrorf("Address pool overlaps with existing pool on this address space")
ErrNoAvailablePool = types.NoServiceErrorf("No available pool")
ErrNoAvailableIPs = types.NoServiceErrorf("No available addresses on this pool")
ErrIPAlreadyAllocated = types.ForbiddenErrorf("Address already in use")
ErrIPOutOfRange = types.BadRequestErrorf("Requested address is out of range")
ErrPoolOverlap = types.ForbiddenErrorf("Pool overlaps with other one on this address space")
ErrBadPool = types.BadRequestErrorf("Address space does not contain specified address pool")
)
/*******************************

View file

@ -603,12 +603,13 @@ func (n *network) Delete() error {
if err = n.getController().deleteFromStore(n.getEpCnt()); err != nil {
return fmt.Errorf("error deleting network endpoint count from store: %v", err)
}
n.ipamRelease()
if err = n.getController().deleteFromStore(n); err != nil {
return fmt.Errorf("error deleting network from store: %v", err)
}
n.ipamRelease()
return nil
}
@ -970,7 +971,10 @@ func (n *network) ipamAllocateVersion(ipVer int, ipam ipamapi.Ipam) error {
// irrespective of whether ipam driver returned a gateway already.
// If none of the above is true, libnetwork will allocate one.
if cfg.Gateway != "" || d.Gateway == nil {
if d.Gateway, _, err = ipam.RequestAddress(d.PoolID, net.ParseIP(cfg.Gateway), nil); err != nil {
var gatewayOpts = map[string]string{
ipamapi.RequestAddressType: netlabel.Gateway,
}
if d.Gateway, _, err = ipam.RequestAddress(d.PoolID, net.ParseIP(cfg.Gateway), gatewayOpts); err != nil {
return types.InternalErrorf("failed to allocate gateway (%v): %v", cfg.Gateway, err)
}
}

View file

@ -62,7 +62,6 @@ type sandbox struct {
osSbox osl.Sandbox
controller *controller
refCnt int
hostsOnce sync.Once
endpoints epHeap
epPriority map[string]int
joinLeaveDone chan struct{}
@ -601,41 +600,21 @@ func (sb *sandbox) buildHostsFile() error {
}
func (sb *sandbox) updateHostsFile(ifaceIP string, svcRecords []etchosts.Record) error {
var err error
var mhost string
if sb.config.originHostsPath != "" {
return nil
}
max := func(a, b int) int {
if a < b {
return b
}
return a
if sb.config.domainName != "" {
mhost = fmt.Sprintf("%s.%s %s", sb.config.hostName, sb.config.domainName,
sb.config.hostName)
} else {
mhost = sb.config.hostName
}
extraContent := make([]etchosts.Record, 0,
max(len(sb.config.extraHosts), len(svcRecords)))
sb.hostsOnce.Do(func() {
// Rebuild the hosts file accounting for the passed
// interface IP and service records
for _, extraHost := range sb.config.extraHosts {
extraContent = append(extraContent,
etchosts.Record{Hosts: extraHost.name, IP: extraHost.IP})
}
err = etchosts.Build(sb.config.hostsPath, ifaceIP,
sb.config.hostName, sb.config.domainName, extraContent)
})
if err != nil {
return err
}
extraContent = extraContent[:0]
extraContent := make([]etchosts.Record, 0, len(svcRecords)+1)
extraContent = append(extraContent, etchosts.Record{Hosts: mhost, IP: ifaceIP})
for _, svc := range svcRecords {
extraContent = append(extraContent, svc)
}

View file

@ -0,0 +1,110 @@
package netlink
import (
"fmt"
)
type Class interface {
Attrs() *ClassAttrs
Type() string
}
// Class represents a netlink class. A filter is associated with a link,
// has a handle and a parent. The root filter of a device should have a
// parent == HANDLE_ROOT.
type ClassAttrs struct {
LinkIndex int
Handle uint32
Parent uint32
Leaf uint32
}
func (q ClassAttrs) String() string {
return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Leaf: %s}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Leaf)
}
type HtbClassAttrs struct {
// TODO handle all attributes
Rate uint64
Ceil uint64
Buffer uint32
Cbuffer uint32
Quantum uint32
Level uint32
Prio uint32
}
func (q HtbClassAttrs) String() string {
return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer)
}
// Htb class
type HtbClass struct {
ClassAttrs
Rate uint64
Ceil uint64
Buffer uint32
Cbuffer uint32
Quantum uint32
Level uint32
Prio uint32
}
func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass {
mtu := 1600
rate := cattrs.Rate / 8
ceil := cattrs.Ceil / 8
buffer := cattrs.Buffer
cbuffer := cattrs.Cbuffer
if ceil == 0 {
ceil = rate
}
if buffer == 0 {
buffer = uint32(float64(rate)/Hz() + float64(mtu))
}
buffer = uint32(Xmittime(rate, buffer))
if cbuffer == 0 {
cbuffer = uint32(float64(ceil)/Hz() + float64(mtu))
}
cbuffer = uint32(Xmittime(ceil, cbuffer))
return &HtbClass{
ClassAttrs: attrs,
Rate: rate,
Ceil: ceil,
Buffer: buffer,
Cbuffer: cbuffer,
Quantum: 10,
Level: 0,
Prio: 0,
}
}
func (q HtbClass) String() string {
return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer)
}
func (class *HtbClass) Attrs() *ClassAttrs {
return &class.ClassAttrs
}
func (class *HtbClass) Type() string {
return "htb"
}
// GenericClass classes represent types that are not currently understood
// by this netlink library.
type GenericClass struct {
ClassAttrs
ClassType string
}
func (class *GenericClass) Attrs() *ClassAttrs {
return &class.ClassAttrs
}
func (class *GenericClass) Type() string {
return class.ClassType
}

View file

@ -0,0 +1,144 @@
package netlink
import (
"syscall"
"github.com/vishvananda/netlink/nl"
)
// ClassDel will delete a class from the system.
// Equivalent to: `tc class del $class`
func ClassDel(class Class) error {
req := nl.NewNetlinkRequest(syscall.RTM_DELTCLASS, syscall.NLM_F_ACK)
base := class.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Ifindex: int32(base.LinkIndex),
Handle: base.Handle,
Parent: base.Parent,
}
req.AddData(msg)
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
return err
}
// ClassAdd will add a class to the system.
// Equivalent to: `tc class add $class`
func ClassAdd(class Class) error {
req := nl.NewNetlinkRequest(syscall.RTM_NEWTCLASS, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
base := class.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Ifindex: int32(base.LinkIndex),
Handle: base.Handle,
Parent: base.Parent,
}
req.AddData(msg)
req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(class.Type())))
options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
if htb, ok := class.(*HtbClass); ok {
opt := nl.TcHtbCopt{}
opt.Rate.Rate = uint32(htb.Rate)
opt.Ceil.Rate = uint32(htb.Ceil)
opt.Buffer = htb.Buffer
opt.Cbuffer = htb.Cbuffer
opt.Quantum = htb.Quantum
opt.Level = htb.Level
opt.Prio = htb.Prio
// TODO: Handle Debug properly. For now default to 0
nl.NewRtAttrChild(options, nl.TCA_HTB_PARMS, opt.Serialize())
}
req.AddData(options)
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
return err
}
// ClassList gets a list of classes in the system.
// Equivalent to: `tc class show`.
// Generally retunrs nothing if link and parent are not specified.
func ClassList(link Link, parent uint32) ([]Class, error) {
req := nl.NewNetlinkRequest(syscall.RTM_GETTCLASS, syscall.NLM_F_DUMP)
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Parent: parent,
}
if link != nil {
base := link.Attrs()
ensureIndex(base)
msg.Ifindex = int32(base.Index)
}
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTCLASS)
if err != nil {
return nil, err
}
var res []Class
for _, m := range msgs {
msg := nl.DeserializeTcMsg(m)
attrs, err := nl.ParseRouteAttr(m[msg.Len():])
if err != nil {
return nil, err
}
base := ClassAttrs{
LinkIndex: int(msg.Ifindex),
Handle: msg.Handle,
Parent: msg.Parent,
}
var class Class
classType := ""
for _, attr := range attrs {
switch attr.Attr.Type {
case nl.TCA_KIND:
classType = string(attr.Value[:len(attr.Value)-1])
switch classType {
case "htb":
class = &HtbClass{}
default:
class = &GenericClass{ClassType: classType}
}
case nl.TCA_OPTIONS:
switch classType {
case "htb":
data, err := nl.ParseRouteAttr(attr.Value)
if err != nil {
return nil, err
}
_, err = parseHtbClassData(class, data)
if err != nil {
return nil, err
}
}
}
}
*class.Attrs() = base
res = append(res, class)
}
return res, nil
}
func parseHtbClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, error) {
htb := class.(*HtbClass)
detailed := false
for _, datum := range data {
switch datum.Attr.Type {
case nl.TCA_HTB_PARMS:
opt := nl.DeserializeTcHtbCopt(datum.Value)
htb.Rate = uint64(opt.Rate.Rate)
htb.Ceil = uint64(opt.Ceil.Rate)
htb.Buffer = opt.Buffer
htb.Cbuffer = opt.Cbuffer
htb.Quantum = opt.Quantum
htb.Level = opt.Level
htb.Prio = opt.Prio
}
}
return detailed, nil
}

View file

@ -0,0 +1,140 @@
package netlink
import (
"errors"
"fmt"
"github.com/vishvananda/netlink/nl"
)
type Filter interface {
Attrs() *FilterAttrs
Type() string
}
// Filter represents a netlink filter. A filter is associated with a link,
// has a handle and a parent. The root filter of a device should have a
// parent == HANDLE_ROOT.
type FilterAttrs struct {
LinkIndex int
Handle uint32
Parent uint32
Priority uint16 // lower is higher priority
Protocol uint16 // syscall.ETH_P_*
}
func (q FilterAttrs) String() string {
return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Priority: %d, Protocol: %d}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Priority, q.Protocol)
}
// U32 filters on many packet related properties
type U32 struct {
FilterAttrs
// Currently only supports redirecting to another interface
RedirIndex int
}
func (filter *U32) Attrs() *FilterAttrs {
return &filter.FilterAttrs
}
func (filter *U32) Type() string {
return "u32"
}
type FilterFwAttrs struct {
ClassId uint32
InDev string
Mask uint32
Index uint32
Buffer uint32
Mtu uint32
Mpu uint16
Rate uint32
AvRate uint32
PeakRate uint32
Action int
Overhead uint16
LinkLayer int
}
// FwFilter filters on firewall marks
type Fw struct {
FilterAttrs
ClassId uint32
Police nl.TcPolice
InDev string
// TODO Action
Mask uint32
AvRate uint32
Rtab [256]uint32
Ptab [256]uint32
}
func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) {
var rtab [256]uint32
var ptab [256]uint32
rcell_log := -1
pcell_log := -1
avrate := fattrs.AvRate / 8
police := nl.TcPolice{}
police.Rate.Rate = fattrs.Rate / 8
police.PeakRate.Rate = fattrs.PeakRate / 8
buffer := fattrs.Buffer
linklayer := nl.LINKLAYER_ETHERNET
if fattrs.LinkLayer != nl.LINKLAYER_UNSPEC {
linklayer = fattrs.LinkLayer
}
police.Action = int32(fattrs.Action)
if police.Rate.Rate != 0 {
police.Rate.Mpu = fattrs.Mpu
police.Rate.Overhead = fattrs.Overhead
if CalcRtable(&police.Rate, rtab, rcell_log, fattrs.Mtu, linklayer) < 0 {
return nil, errors.New("TBF: failed to calculate rate table.")
}
police.Burst = uint32(Xmittime(uint64(police.Rate.Rate), uint32(buffer)))
}
police.Mtu = fattrs.Mtu
if police.PeakRate.Rate != 0 {
police.PeakRate.Mpu = fattrs.Mpu
police.PeakRate.Overhead = fattrs.Overhead
if CalcRtable(&police.PeakRate, ptab, pcell_log, fattrs.Mtu, linklayer) < 0 {
return nil, errors.New("POLICE: failed to calculate peak rate table.")
}
}
return &Fw{
FilterAttrs: attrs,
ClassId: fattrs.ClassId,
InDev: fattrs.InDev,
Mask: fattrs.Mask,
Police: police,
AvRate: avrate,
Rtab: rtab,
Ptab: ptab,
}, nil
}
func (filter *Fw) Attrs() *FilterAttrs {
return &filter.FilterAttrs
}
func (filter *Fw) Type() string {
return "fw"
}
// GenericFilter filters represent types that are not currently understood
// by this netlink library.
type GenericFilter struct {
FilterAttrs
FilterType string
}
func (filter *GenericFilter) Attrs() *FilterAttrs {
return &filter.FilterAttrs
}
func (filter *GenericFilter) Type() string {
return filter.FilterType
}

View file

@ -0,0 +1,322 @@
package netlink
import (
"bytes"
"encoding/binary"
"fmt"
"syscall"
"github.com/vishvananda/netlink/nl"
)
// FilterDel will delete a filter from the system.
// Equivalent to: `tc filter del $filter`
func FilterDel(filter Filter) error {
req := nl.NewNetlinkRequest(syscall.RTM_DELTFILTER, syscall.NLM_F_ACK)
base := filter.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Ifindex: int32(base.LinkIndex),
Handle: base.Handle,
Parent: base.Parent,
Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)),
}
req.AddData(msg)
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
return err
}
// FilterAdd will add a filter to the system.
// Equivalent to: `tc filter add $filter`
func FilterAdd(filter Filter) error {
native = nl.NativeEndian()
req := nl.NewNetlinkRequest(syscall.RTM_NEWTFILTER, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
base := filter.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Ifindex: int32(base.LinkIndex),
Handle: base.Handle,
Parent: base.Parent,
Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)),
}
req.AddData(msg)
req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(filter.Type())))
options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
if u32, ok := filter.(*U32); ok {
// match all
sel := nl.TcU32Sel{
Nkeys: 1,
Flags: nl.TC_U32_TERMINAL,
}
sel.Keys = append(sel.Keys, nl.TcU32Key{})
nl.NewRtAttrChild(options, nl.TCA_U32_SEL, sel.Serialize())
actions := nl.NewRtAttrChild(options, nl.TCA_U32_ACT, nil)
table := nl.NewRtAttrChild(actions, nl.TCA_ACT_TAB, nil)
nl.NewRtAttrChild(table, nl.TCA_KIND, nl.ZeroTerminated("mirred"))
// redirect to other interface
mir := nl.TcMirred{
Action: nl.TC_ACT_STOLEN,
Eaction: nl.TCA_EGRESS_REDIR,
Ifindex: uint32(u32.RedirIndex),
}
aopts := nl.NewRtAttrChild(table, nl.TCA_OPTIONS, nil)
nl.NewRtAttrChild(aopts, nl.TCA_MIRRED_PARMS, mir.Serialize())
} else if fw, ok := filter.(*Fw); ok {
if fw.Mask != 0 {
b := make([]byte, 4)
native.PutUint32(b, fw.Mask)
nl.NewRtAttrChild(options, nl.TCA_FW_MASK, b)
}
if fw.InDev != "" {
nl.NewRtAttrChild(options, nl.TCA_FW_INDEV, nl.ZeroTerminated(fw.InDev))
}
if (fw.Police != nl.TcPolice{}) {
police := nl.NewRtAttrChild(options, nl.TCA_FW_POLICE, nil)
nl.NewRtAttrChild(police, nl.TCA_POLICE_TBF, fw.Police.Serialize())
if (fw.Police.Rate != nl.TcRateSpec{}) {
payload := SerializeRtab(fw.Rtab)
nl.NewRtAttrChild(police, nl.TCA_POLICE_RATE, payload)
}
if (fw.Police.PeakRate != nl.TcRateSpec{}) {
payload := SerializeRtab(fw.Ptab)
nl.NewRtAttrChild(police, nl.TCA_POLICE_PEAKRATE, payload)
}
}
if fw.ClassId != 0 {
b := make([]byte, 4)
native.PutUint32(b, fw.ClassId)
nl.NewRtAttrChild(options, nl.TCA_FW_CLASSID, b)
}
}
req.AddData(options)
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
return err
}
// FilterList gets a list of filters in the system.
// Equivalent to: `tc filter show`.
// Generally retunrs nothing if link and parent are not specified.
func FilterList(link Link, parent uint32) ([]Filter, error) {
req := nl.NewNetlinkRequest(syscall.RTM_GETTFILTER, syscall.NLM_F_DUMP)
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Parent: parent,
}
if link != nil {
base := link.Attrs()
ensureIndex(base)
msg.Ifindex = int32(base.Index)
}
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTFILTER)
if err != nil {
return nil, err
}
var res []Filter
for _, m := range msgs {
msg := nl.DeserializeTcMsg(m)
attrs, err := nl.ParseRouteAttr(m[msg.Len():])
if err != nil {
return nil, err
}
base := FilterAttrs{
LinkIndex: int(msg.Ifindex),
Handle: msg.Handle,
Parent: msg.Parent,
}
base.Priority, base.Protocol = MajorMinor(msg.Info)
base.Protocol = nl.Swap16(base.Protocol)
var filter Filter
filterType := ""
detailed := false
for _, attr := range attrs {
switch attr.Attr.Type {
case nl.TCA_KIND:
filterType = string(attr.Value[:len(attr.Value)-1])
switch filterType {
case "u32":
filter = &U32{}
case "fw":
filter = &Fw{}
default:
filter = &GenericFilter{FilterType: filterType}
}
case nl.TCA_OPTIONS:
switch filterType {
case "u32":
data, err := nl.ParseRouteAttr(attr.Value)
if err != nil {
return nil, err
}
detailed, err = parseU32Data(filter, data)
if err != nil {
return nil, err
}
case "fw":
data, err := nl.ParseRouteAttr(attr.Value)
if err != nil {
return nil, err
}
detailed, err = parseFwData(filter, data)
if err != nil {
return nil, err
}
}
}
}
// only return the detailed version of the filter
if detailed {
*filter.Attrs() = base
res = append(res, filter)
}
}
return res, nil
}
func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
native = nl.NativeEndian()
u32 := filter.(*U32)
detailed := false
for _, datum := range data {
switch datum.Attr.Type {
case nl.TCA_U32_SEL:
detailed = true
sel := nl.DeserializeTcU32Sel(datum.Value)
// only parse if we have a very basic redirect
if sel.Flags&nl.TC_U32_TERMINAL == 0 || sel.Nkeys != 1 {
return detailed, nil
}
case nl.TCA_U32_ACT:
table, err := nl.ParseRouteAttr(datum.Value)
if err != nil {
return detailed, err
}
if len(table) != 1 || table[0].Attr.Type != nl.TCA_ACT_TAB {
return detailed, fmt.Errorf("Action table not formed properly")
}
aattrs, err := nl.ParseRouteAttr(table[0].Value)
for _, aattr := range aattrs {
switch aattr.Attr.Type {
case nl.TCA_KIND:
actionType := string(aattr.Value[:len(aattr.Value)-1])
// only parse if the action is mirred
if actionType != "mirred" {
return detailed, nil
}
case nl.TCA_OPTIONS:
adata, err := nl.ParseRouteAttr(aattr.Value)
if err != nil {
return detailed, err
}
for _, adatum := range adata {
switch adatum.Attr.Type {
case nl.TCA_MIRRED_PARMS:
mir := nl.DeserializeTcMirred(adatum.Value)
u32.RedirIndex = int(mir.Ifindex)
}
}
}
}
}
}
return detailed, nil
}
func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
native = nl.NativeEndian()
fw := filter.(*Fw)
detailed := true
for _, datum := range data {
switch datum.Attr.Type {
case nl.TCA_FW_MASK:
fw.Mask = native.Uint32(datum.Value[0:4])
case nl.TCA_FW_CLASSID:
fw.ClassId = native.Uint32(datum.Value[0:4])
case nl.TCA_FW_INDEV:
fw.InDev = string(datum.Value[:len(datum.Value)-1])
case nl.TCA_FW_POLICE:
adata, _ := nl.ParseRouteAttr(datum.Value)
for _, aattr := range adata {
switch aattr.Attr.Type {
case nl.TCA_POLICE_TBF:
fw.Police = *nl.DeserializeTcPolice(aattr.Value)
case nl.TCA_POLICE_RATE:
fw.Rtab = DeserializeRtab(aattr.Value)
case nl.TCA_POLICE_PEAKRATE:
fw.Ptab = DeserializeRtab(aattr.Value)
}
}
}
}
return detailed, nil
}
func AlignToAtm(size uint) uint {
var linksize, cells int
cells = int(size / nl.ATM_CELL_PAYLOAD)
if (size % nl.ATM_CELL_PAYLOAD) > 0 {
cells++
}
linksize = cells * nl.ATM_CELL_SIZE
return uint(linksize)
}
func AdjustSize(sz uint, mpu uint, linklayer int) uint {
if sz < mpu {
sz = mpu
}
switch linklayer {
case nl.LINKLAYER_ATM:
return AlignToAtm(sz)
default:
return sz
}
}
func CalcRtable(rate *nl.TcRateSpec, rtab [256]uint32, cell_log int, mtu uint32, linklayer int) int {
bps := rate.Rate
mpu := rate.Mpu
var sz uint
if mtu == 0 {
mtu = 2047
}
if cell_log < 0 {
cell_log = 0
for (mtu >> uint(cell_log)) > 255 {
cell_log++
}
}
for i := 0; i < 256; i++ {
sz = AdjustSize(uint((i+1)<<uint32(cell_log)), uint(mpu), linklayer)
rtab[i] = uint32(Xmittime(uint64(bps), uint32(sz)))
}
rate.CellAlign = -1
rate.CellLog = uint8(cell_log)
rate.Linklayer = uint8(linklayer & nl.TC_LINKLAYER_MASK)
return cell_log
}
func DeserializeRtab(b []byte) [256]uint32 {
var rtab [256]uint32
native := nl.NativeEndian()
r := bytes.NewReader(b)
_ = binary.Read(r, native, &rtab)
return rtab
}
func SerializeRtab(rtab [256]uint32) []byte {
native := nl.NativeEndian()
var w bytes.Buffer
_ = binary.Write(&w, native, rtab)
return w.Bytes()
}

View file

@ -1,6 +1,9 @@
package netlink
import "net"
import (
"net"
"syscall"
)
// Link represents a link device from netlink. Shared link attributes
// like name may be retrieved using the Attrs() method. Unique data
@ -62,6 +65,19 @@ func (dummy *Dummy) Type() string {
return "dummy"
}
// Ifb links are advanced dummy devices for packet filtering
type Ifb struct {
LinkAttrs
}
func (ifb *Ifb) Attrs() *LinkAttrs {
return &ifb.LinkAttrs
}
func (ifb *Ifb) Type() string {
return "ifb"
}
// Bridge links are simple linux bridges
type Bridge struct {
LinkAttrs
@ -114,6 +130,36 @@ func (macvlan *Macvlan) Type() string {
return "macvlan"
}
// Macvtap - macvtap is a virtual interfaces based on macvlan
type Macvtap struct {
Macvlan
}
func (macvtap Macvtap) Type() string {
return "macvtap"
}
type TuntapMode uint16
const (
TUNTAP_MODE_TUN TuntapMode = syscall.IFF_TUN
TUNTAP_MODE_TAP TuntapMode = syscall.IFF_TAP
)
// Tuntap links created via /dev/tun/tap, but can be destroyed via netlink
type Tuntap struct {
LinkAttrs
Mode TuntapMode
}
func (tuntap *Tuntap) Attrs() *LinkAttrs {
return &tuntap.LinkAttrs
}
func (tuntap *Tuntap) Type() string {
return "tuntap"
}
// Veth devices must specify PeerName on create
type Veth struct {
LinkAttrs
@ -128,18 +174,18 @@ func (veth *Veth) Type() string {
return "veth"
}
// Generic links represent types that are not currently understood
// GenericLink links represent types that are not currently understood
// by this netlink library.
type Generic struct {
type GenericLink struct {
LinkAttrs
LinkType string
}
func (generic *Generic) Attrs() *LinkAttrs {
func (generic *GenericLink) Attrs() *LinkAttrs {
return &generic.LinkAttrs
}
func (generic *Generic) Type() string {
func (generic *GenericLink) Type() string {
return generic.LinkType
}

View file

@ -5,7 +5,9 @@ import (
"encoding/binary"
"fmt"
"net"
"os"
"syscall"
"unsafe"
"github.com/vishvananda/netlink/nl"
)
@ -285,6 +287,44 @@ func LinkAdd(link Link) error {
return fmt.Errorf("LinkAttrs.Name cannot be empty!")
}
if tuntap, ok := link.(*Tuntap); ok {
// TODO: support user
// TODO: support group
// TODO: support non- one_queue
// TODO: support pi | vnet_hdr | multi_queue
// TODO: support non- exclusive
// TODO: support non- persistent
if tuntap.Mode < syscall.IFF_TUN || tuntap.Mode > syscall.IFF_TAP {
return fmt.Errorf("Tuntap.Mode %v unknown!", tuntap.Mode)
}
file, err := os.OpenFile("/dev/net/tun", os.O_RDWR, 0)
if err != nil {
return err
}
defer file.Close()
var req ifReq
req.Flags |= syscall.IFF_ONE_QUEUE
req.Flags |= syscall.IFF_TUN_EXCL
copy(req.Name[:15], base.Name)
req.Flags |= uint16(tuntap.Mode)
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), uintptr(syscall.TUNSETIFF), uintptr(unsafe.Pointer(&req)))
if errno != 0 {
return fmt.Errorf("Tuntap IOCTL TUNSETIFF failed, errno %v", errno)
}
_, _, errno = syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), uintptr(syscall.TUNSETPERSIST), 1)
if errno != 0 {
return fmt.Errorf("Tuntap IOCTL TUNSETPERSIST failed, errno %v", errno)
}
ensureIndex(base)
// can't set master during create, so set it afterwards
if base.MasterIndex != 0 {
// TODO: verify MasterIndex is actually a bridge?
return LinkSetMasterByIndex(link, base.MasterIndex)
}
return nil
}
req := nl.NewNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
@ -493,6 +533,8 @@ func linkDeserialize(m []byte) (Link, error) {
switch linkType {
case "dummy":
link = &Dummy{}
case "ifb":
link = &Ifb{}
case "bridge":
link = &Bridge{}
case "vlan":
@ -505,8 +547,10 @@ func linkDeserialize(m []byte) (Link, error) {
link = &IPVlan{}
case "macvlan":
link = &Macvlan{}
case "macvtap":
link = &Macvtap{}
default:
link = &Generic{LinkType: linkType}
link = &GenericLink{LinkType: linkType}
}
case nl.IFLA_INFO_DATA:
data, err := nl.ParseRouteAttr(info.Value)
@ -522,6 +566,8 @@ func linkDeserialize(m []byte) (Link, error) {
parseIPVlanData(link, data)
case "macvlan":
parseMacvlanData(link, data)
case "macvtap":
parseMacvtapData(link, data)
}
}
}
@ -583,6 +629,46 @@ func LinkList() ([]Link, error) {
return res, nil
}
// LinkUpdate is used to pass information back from LinkSubscribe()
type LinkUpdate struct {
nl.IfInfomsg
Link
}
// LinkSubscribe takes a chan down which notifications will be sent
// when links change. Close the 'done' chan to stop subscription.
func LinkSubscribe(ch chan<- LinkUpdate, done <-chan struct{}) error {
s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_LINK)
if err != nil {
return err
}
if done != nil {
go func() {
<-done
s.Close()
}()
}
go func() {
defer close(ch)
for {
msgs, err := s.Receive()
if err != nil {
return
}
for _, m := range msgs {
ifmsg := nl.DeserializeIfInfomsg(m.Data)
link, err := linkDeserialize(m.Data)
if err != nil {
return
}
ch <- LinkUpdate{IfInfomsg: *ifmsg, Link: link}
}
}
}()
return nil
}
func LinkSetHairpin(link Link, mode bool) error {
return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_MODE)
}
@ -696,6 +782,11 @@ func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) {
}
}
func parseMacvtapData(link Link, data []syscall.NetlinkRouteAttr) {
macv := link.(*Macvtap)
parseMacvlanData(&macv.Macvlan, data)
}
func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) {
macv := link.(*Macvlan)
for _, datum := range data {

View file

@ -0,0 +1,14 @@
package netlink
// ideally golang.org/x/sys/unix would define IfReq but it only has
// IFNAMSIZ, hence this minimalistic implementation
const (
SizeOfIfReq = 40
IFNAMSIZ = 16
)
type ifReq struct {
Name [IFNAMSIZ]byte
Flags uint16
pad [SizeOfIfReq - IFNAMSIZ - 2]byte
}

View file

@ -142,7 +142,7 @@ func (a *RtAttr) Len() int {
}
// Serialize the RtAttr into a byte array
// This can't ust unsafe.cast because it must iterate through children.
// This can't just unsafe.cast because it must iterate through children.
func (a *RtAttr) Serialize() []byte {
native := NativeEndian()

View file

@ -0,0 +1,508 @@
package nl
import (
"unsafe"
)
// LinkLayer
const (
LINKLAYER_UNSPEC = iota
LINKLAYER_ETHERNET
LINKLAYER_ATM
)
// ATM
const (
ATM_CELL_PAYLOAD = 48
ATM_CELL_SIZE = 53
)
const TC_LINKLAYER_MASK = 0x0F
// Police
const (
TCA_POLICE_UNSPEC = iota
TCA_POLICE_TBF
TCA_POLICE_RATE
TCA_POLICE_PEAKRATE
TCA_POLICE_AVRATE
TCA_POLICE_RESULT
TCA_POLICE_MAX = TCA_POLICE_RESULT
)
// Message types
const (
TCA_UNSPEC = iota
TCA_KIND
TCA_OPTIONS
TCA_STATS
TCA_XSTATS
TCA_RATE
TCA_FCNT
TCA_STATS2
TCA_STAB
TCA_MAX = TCA_STAB
)
const (
TCA_ACT_TAB = 1
TCAA_MAX = 1
)
const (
TCA_PRIO_UNSPEC = iota
TCA_PRIO_MQ
TCA_PRIO_MAX = TCA_PRIO_MQ
)
const (
SizeofTcMsg = 0x14
SizeofTcActionMsg = 0x04
SizeofTcPrioMap = 0x14
SizeofTcRateSpec = 0x0c
SizeofTcTbfQopt = 2*SizeofTcRateSpec + 0x0c
SizeofTcHtbCopt = 2*SizeofTcRateSpec + 0x14
SizeofTcHtbGlob = 0x14
SizeofTcU32Key = 0x10
SizeofTcU32Sel = 0x10 // without keys
SizeofTcMirred = 0x1c
SizeofTcPolice = 2*SizeofTcRateSpec + 0x20
)
// struct tcmsg {
// unsigned char tcm_family;
// unsigned char tcm__pad1;
// unsigned short tcm__pad2;
// int tcm_ifindex;
// __u32 tcm_handle;
// __u32 tcm_parent;
// __u32 tcm_info;
// };
type TcMsg struct {
Family uint8
Pad [3]byte
Ifindex int32
Handle uint32
Parent uint32
Info uint32
}
func (msg *TcMsg) Len() int {
return SizeofTcMsg
}
func DeserializeTcMsg(b []byte) *TcMsg {
return (*TcMsg)(unsafe.Pointer(&b[0:SizeofTcMsg][0]))
}
func (x *TcMsg) Serialize() []byte {
return (*(*[SizeofTcMsg]byte)(unsafe.Pointer(x)))[:]
}
// struct tcamsg {
// unsigned char tca_family;
// unsigned char tca__pad1;
// unsigned short tca__pad2;
// };
type TcActionMsg struct {
Family uint8
Pad [3]byte
}
func (msg *TcActionMsg) Len() int {
return SizeofTcActionMsg
}
func DeserializeTcActionMsg(b []byte) *TcActionMsg {
return (*TcActionMsg)(unsafe.Pointer(&b[0:SizeofTcActionMsg][0]))
}
func (x *TcActionMsg) Serialize() []byte {
return (*(*[SizeofTcActionMsg]byte)(unsafe.Pointer(x)))[:]
}
const (
TC_PRIO_MAX = 15
)
// struct tc_prio_qopt {
// int bands; /* Number of bands */
// __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */
// };
type TcPrioMap struct {
Bands int32
Priomap [TC_PRIO_MAX + 1]uint8
}
func (msg *TcPrioMap) Len() int {
return SizeofTcPrioMap
}
func DeserializeTcPrioMap(b []byte) *TcPrioMap {
return (*TcPrioMap)(unsafe.Pointer(&b[0:SizeofTcPrioMap][0]))
}
func (x *TcPrioMap) Serialize() []byte {
return (*(*[SizeofTcPrioMap]byte)(unsafe.Pointer(x)))[:]
}
const (
TCA_TBF_UNSPEC = iota
TCA_TBF_PARMS
TCA_TBF_RTAB
TCA_TBF_PTAB
TCA_TBF_RATE64
TCA_TBF_PRATE64
TCA_TBF_BURST
TCA_TBF_PBURST
TCA_TBF_MAX = TCA_TBF_PBURST
)
// struct tc_ratespec {
// unsigned char cell_log;
// __u8 linklayer; /* lower 4 bits */
// unsigned short overhead;
// short cell_align;
// unsigned short mpu;
// __u32 rate;
// };
type TcRateSpec struct {
CellLog uint8
Linklayer uint8
Overhead uint16
CellAlign int16
Mpu uint16
Rate uint32
}
func (msg *TcRateSpec) Len() int {
return SizeofTcRateSpec
}
func DeserializeTcRateSpec(b []byte) *TcRateSpec {
return (*TcRateSpec)(unsafe.Pointer(&b[0:SizeofTcRateSpec][0]))
}
func (x *TcRateSpec) Serialize() []byte {
return (*(*[SizeofTcRateSpec]byte)(unsafe.Pointer(x)))[:]
}
// struct tc_tbf_qopt {
// struct tc_ratespec rate;
// struct tc_ratespec peakrate;
// __u32 limit;
// __u32 buffer;
// __u32 mtu;
// };
type TcTbfQopt struct {
Rate TcRateSpec
Peakrate TcRateSpec
Limit uint32
Buffer uint32
Mtu uint32
}
func (msg *TcTbfQopt) Len() int {
return SizeofTcTbfQopt
}
func DeserializeTcTbfQopt(b []byte) *TcTbfQopt {
return (*TcTbfQopt)(unsafe.Pointer(&b[0:SizeofTcTbfQopt][0]))
}
func (x *TcTbfQopt) Serialize() []byte {
return (*(*[SizeofTcTbfQopt]byte)(unsafe.Pointer(x)))[:]
}
const (
TCA_HTB_UNSPEC = iota
TCA_HTB_PARMS
TCA_HTB_INIT
TCA_HTB_CTAB
TCA_HTB_RTAB
TCA_HTB_DIRECT_QLEN
TCA_HTB_RATE64
TCA_HTB_CEIL64
TCA_HTB_MAX = TCA_HTB_CEIL64
)
//struct tc_htb_opt {
// struct tc_ratespec rate;
// struct tc_ratespec ceil;
// __u32 buffer;
// __u32 cbuffer;
// __u32 quantum;
// __u32 level; /* out only */
// __u32 prio;
//};
type TcHtbCopt struct {
Rate TcRateSpec
Ceil TcRateSpec
Buffer uint32
Cbuffer uint32
Quantum uint32
Level uint32
Prio uint32
}
func (msg *TcHtbCopt) Len() int {
return SizeofTcHtbCopt
}
func DeserializeTcHtbCopt(b []byte) *TcHtbCopt {
return (*TcHtbCopt)(unsafe.Pointer(&b[0:SizeofTcHtbCopt][0]))
}
func (x *TcHtbCopt) Serialize() []byte {
return (*(*[SizeofTcHtbCopt]byte)(unsafe.Pointer(x)))[:]
}
type TcHtbGlob struct {
Version uint32
Rate2Quantum uint32
Defcls uint32
Debug uint32
DirectPkts uint32
}
func (msg *TcHtbGlob) Len() int {
return SizeofTcHtbGlob
}
func DeserializeTcHtbGlob(b []byte) *TcHtbGlob {
return (*TcHtbGlob)(unsafe.Pointer(&b[0:SizeofTcHtbGlob][0]))
}
func (x *TcHtbGlob) Serialize() []byte {
return (*(*[SizeofTcHtbGlob]byte)(unsafe.Pointer(x)))[:]
}
const (
TCA_U32_UNSPEC = iota
TCA_U32_CLASSID
TCA_U32_HASH
TCA_U32_LINK
TCA_U32_DIVISOR
TCA_U32_SEL
TCA_U32_POLICE
TCA_U32_ACT
TCA_U32_INDEV
TCA_U32_PCNT
TCA_U32_MARK
TCA_U32_MAX = TCA_U32_MARK
)
// struct tc_u32_key {
// __be32 mask;
// __be32 val;
// int off;
// int offmask;
// };
type TcU32Key struct {
Mask uint32 // big endian
Val uint32 // big endian
Off int32
OffMask int32
}
func (msg *TcU32Key) Len() int {
return SizeofTcU32Key
}
func DeserializeTcU32Key(b []byte) *TcU32Key {
return (*TcU32Key)(unsafe.Pointer(&b[0:SizeofTcU32Key][0]))
}
func (x *TcU32Key) Serialize() []byte {
return (*(*[SizeofTcU32Key]byte)(unsafe.Pointer(x)))[:]
}
// struct tc_u32_sel {
// unsigned char flags;
// unsigned char offshift;
// unsigned char nkeys;
//
// __be16 offmask;
// __u16 off;
// short offoff;
//
// short hoff;
// __be32 hmask;
// struct tc_u32_key keys[0];
// };
const (
TC_U32_TERMINAL = 1 << iota
TC_U32_OFFSET = 1 << iota
TC_U32_VAROFFSET = 1 << iota
TC_U32_EAT = 1 << iota
)
type TcU32Sel struct {
Flags uint8
Offshift uint8
Nkeys uint8
Pad uint8
Offmask uint16 // big endian
Off uint16
Offoff int16
Hoff int16
Hmask uint32 // big endian
Keys []TcU32Key
}
func (msg *TcU32Sel) Len() int {
return SizeofTcU32Sel + int(msg.Nkeys)*SizeofTcU32Key
}
func DeserializeTcU32Sel(b []byte) *TcU32Sel {
x := &TcU32Sel{}
copy((*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:], b)
next := SizeofTcU32Sel
var i uint8
for i = 0; i < x.Nkeys; i++ {
x.Keys = append(x.Keys, *DeserializeTcU32Key(b[next:]))
next += SizeofTcU32Key
}
return x
}
func (x *TcU32Sel) Serialize() []byte {
// This can't just unsafe.cast because it must iterate through keys.
buf := make([]byte, x.Len())
copy(buf, (*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:])
next := SizeofTcU32Sel
for _, key := range x.Keys {
keyBuf := key.Serialize()
copy(buf[next:], keyBuf)
next += SizeofTcU32Key
}
return buf
}
const (
TCA_ACT_MIRRED = 8
)
const (
TCA_MIRRED_UNSPEC = iota
TCA_MIRRED_TM
TCA_MIRRED_PARMS
TCA_MIRRED_MAX = TCA_MIRRED_PARMS
)
const (
TCA_EGRESS_REDIR = 1 /* packet redirect to EGRESS*/
TCA_EGRESS_MIRROR = 2 /* mirror packet to EGRESS */
TCA_INGRESS_REDIR = 3 /* packet redirect to INGRESS*/
TCA_INGRESS_MIRROR = 4 /* mirror packet to INGRESS */
)
const (
TC_ACT_UNSPEC = int32(-1)
TC_ACT_OK = 0
TC_ACT_RECLASSIFY = 1
TC_ACT_SHOT = 2
TC_ACT_PIPE = 3
TC_ACT_STOLEN = 4
TC_ACT_QUEUED = 5
TC_ACT_REPEAT = 6
TC_ACT_JUMP = 0x10000000
)
// #define tc_gen \
// __u32 index; \
// __u32 capab; \
// int action; \
// int refcnt; \
// int bindcnt
// struct tc_mirred {
// tc_gen;
// int eaction; /* one of IN/EGRESS_MIRROR/REDIR */
// __u32 ifindex; /* ifindex of egress port */
// };
type TcMirred struct {
Index uint32
Capab uint32
Action int32
Refcnt int32
Bindcnt int32
Eaction int32
Ifindex uint32
}
func (msg *TcMirred) Len() int {
return SizeofTcMirred
}
func DeserializeTcMirred(b []byte) *TcMirred {
return (*TcMirred)(unsafe.Pointer(&b[0:SizeofTcMirred][0]))
}
func (x *TcMirred) Serialize() []byte {
return (*(*[SizeofTcMirred]byte)(unsafe.Pointer(x)))[:]
}
const (
TC_POLICE_UNSPEC = TC_ACT_UNSPEC
TC_POLICE_OK = TC_ACT_OK
TC_POLICE_RECLASSIFY = TC_ACT_RECLASSIFY
TC_POLICE_SHOT = TC_ACT_SHOT
TC_POLICE_PIPE = TC_ACT_PIPE
)
// struct tc_police {
// __u32 index;
// int action;
// __u32 limit;
// __u32 burst;
// __u32 mtu;
// struct tc_ratespec rate;
// struct tc_ratespec peakrate;
// int refcnt;
// int bindcnt;
// __u32 capab;
// };
type TcPolice struct {
Index uint32
Action int32
Limit uint32
Burst uint32
Mtu uint32
Rate TcRateSpec
PeakRate TcRateSpec
Refcnt int32
Bindcnt int32
Capab uint32
}
func (msg *TcPolice) Len() int {
return SizeofTcPolice
}
func DeserializeTcPolice(b []byte) *TcPolice {
return (*TcPolice)(unsafe.Pointer(&b[0:SizeofTcPolice][0]))
}
func (x *TcPolice) Serialize() []byte {
return (*(*[SizeofTcPolice]byte)(unsafe.Pointer(x)))[:]
}
const (
TCA_FW_UNSPEC = iota
TCA_FW_CLASSID
TCA_FW_POLICE
TCA_FW_INDEV
TCA_FW_ACT
TCA_FW_MASK
TCA_FW_MAX = TCA_FW_MASK
)

View file

@ -0,0 +1,167 @@
package netlink
import (
"fmt"
)
const (
HANDLE_NONE = 0
HANDLE_INGRESS = 0xFFFFFFF1
HANDLE_ROOT = 0xFFFFFFFF
PRIORITY_MAP_LEN = 16
)
type Qdisc interface {
Attrs() *QdiscAttrs
Type() string
}
// Qdisc represents a netlink qdisc. A qdisc is associated with a link,
// has a handle, a parent and a refcnt. The root qdisc of a device should
// have parent == HANDLE_ROOT.
type QdiscAttrs struct {
LinkIndex int
Handle uint32
Parent uint32
Refcnt uint32 // read only
}
func (q QdiscAttrs) String() string {
return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Refcnt: %s}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Refcnt)
}
func MakeHandle(major, minor uint16) uint32 {
return (uint32(major) << 16) | uint32(minor)
}
func MajorMinor(handle uint32) (uint16, uint16) {
return uint16((handle & 0xFFFF0000) >> 16), uint16(handle & 0x0000FFFFF)
}
func HandleStr(handle uint32) string {
switch handle {
case HANDLE_NONE:
return "none"
case HANDLE_INGRESS:
return "ingress"
case HANDLE_ROOT:
return "root"
default:
major, minor := MajorMinor(handle)
return fmt.Sprintf("%x:%x", major, minor)
}
}
// PfifoFast is the default qdisc created by the kernel if one has not
// been defined for the interface
type PfifoFast struct {
QdiscAttrs
Bands uint8
PriorityMap [PRIORITY_MAP_LEN]uint8
}
func (qdisc *PfifoFast) Attrs() *QdiscAttrs {
return &qdisc.QdiscAttrs
}
func (qdisc *PfifoFast) Type() string {
return "pfifo_fast"
}
// Prio is a basic qdisc that works just like PfifoFast
type Prio struct {
QdiscAttrs
Bands uint8
PriorityMap [PRIORITY_MAP_LEN]uint8
}
func NewPrio(attrs QdiscAttrs) *Prio {
return &Prio{
QdiscAttrs: attrs,
Bands: 3,
PriorityMap: [PRIORITY_MAP_LEN]uint8{1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1},
}
}
func (qdisc *Prio) Attrs() *QdiscAttrs {
return &qdisc.QdiscAttrs
}
func (qdisc *Prio) Type() string {
return "prio"
}
// Htb is a classful qdisc that rate limits based on tokens
type Htb struct {
QdiscAttrs
Version uint32
Rate2Quantum uint32
Defcls uint32
Debug uint32
DirectPkts uint32
}
func NewHtb(attrs QdiscAttrs) *Htb {
return &Htb{
QdiscAttrs: attrs,
Version: 3,
Defcls: 0,
Rate2Quantum: 10,
Debug: 0,
DirectPkts: 0,
}
}
func (qdisc *Htb) Attrs() *QdiscAttrs {
return &qdisc.QdiscAttrs
}
func (qdisc *Htb) Type() string {
return "htb"
}
// Tbf is a classless qdisc that rate limits based on tokens
type Tbf struct {
QdiscAttrs
// TODO: handle 64bit rate properly
Rate uint64
Limit uint32
Buffer uint32
// TODO: handle other settings
}
func (qdisc *Tbf) Attrs() *QdiscAttrs {
return &qdisc.QdiscAttrs
}
func (qdisc *Tbf) Type() string {
return "tbf"
}
// Ingress is a qdisc for adding ingress filters
type Ingress struct {
QdiscAttrs
}
func (qdisc *Ingress) Attrs() *QdiscAttrs {
return &qdisc.QdiscAttrs
}
func (qdisc *Ingress) Type() string {
return "ingress"
}
// GenericQdisc qdiscs represent types that are not currently understood
// by this netlink library.
type GenericQdisc struct {
QdiscAttrs
QdiscType string
}
func (qdisc *GenericQdisc) Attrs() *QdiscAttrs {
return &qdisc.QdiscAttrs
}
func (qdisc *GenericQdisc) Type() string {
return qdisc.QdiscType
}

View file

@ -0,0 +1,316 @@
package netlink
import (
"fmt"
"io/ioutil"
"strconv"
"strings"
"syscall"
"github.com/vishvananda/netlink/nl"
)
// QdiscDel will delete a qdisc from the system.
// Equivalent to: `tc qdisc del $qdisc`
func QdiscDel(qdisc Qdisc) error {
req := nl.NewNetlinkRequest(syscall.RTM_DELQDISC, syscall.NLM_F_ACK)
base := qdisc.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Ifindex: int32(base.LinkIndex),
Handle: base.Handle,
Parent: base.Parent,
}
req.AddData(msg)
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
return err
}
// QdiscAdd will add a qdisc to the system.
// Equivalent to: `tc qdisc add $qdisc`
func QdiscAdd(qdisc Qdisc) error {
req := nl.NewNetlinkRequest(syscall.RTM_NEWQDISC, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
base := qdisc.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Ifindex: int32(base.LinkIndex),
Handle: base.Handle,
Parent: base.Parent,
}
req.AddData(msg)
req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type())))
options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
if prio, ok := qdisc.(*Prio); ok {
tcmap := nl.TcPrioMap{
Bands: int32(prio.Bands),
Priomap: prio.PriorityMap,
}
options = nl.NewRtAttr(nl.TCA_OPTIONS, tcmap.Serialize())
} else if tbf, ok := qdisc.(*Tbf); ok {
opt := nl.TcTbfQopt{}
// TODO: handle rate > uint32
opt.Rate.Rate = uint32(tbf.Rate)
opt.Limit = tbf.Limit
opt.Buffer = tbf.Buffer
nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize())
} else if htb, ok := qdisc.(*Htb); ok {
opt := nl.TcHtbGlob{}
opt.Version = htb.Version
opt.Rate2Quantum = htb.Rate2Quantum
opt.Defcls = htb.Defcls
// TODO: Handle Debug properly. For now default to 0
opt.Debug = htb.Debug
opt.DirectPkts = htb.DirectPkts
nl.NewRtAttrChild(options, nl.TCA_HTB_INIT, opt.Serialize())
// nl.NewRtAttrChild(options, nl.TCA_HTB_DIRECT_QLEN, opt.Serialize())
} else if _, ok := qdisc.(*Ingress); ok {
// ingress filters must use the proper handle
if msg.Parent != HANDLE_INGRESS {
return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS")
}
}
req.AddData(options)
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
return err
}
// QdiscList gets a list of qdiscs in the system.
// Equivalent to: `tc qdisc show`.
// The list can be filtered by link.
func QdiscList(link Link) ([]Qdisc, error) {
req := nl.NewNetlinkRequest(syscall.RTM_GETQDISC, syscall.NLM_F_DUMP)
index := int32(0)
if link != nil {
base := link.Attrs()
ensureIndex(base)
index = int32(base.Index)
}
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Ifindex: index,
}
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWQDISC)
if err != nil {
return nil, err
}
var res []Qdisc
for _, m := range msgs {
msg := nl.DeserializeTcMsg(m)
attrs, err := nl.ParseRouteAttr(m[msg.Len():])
if err != nil {
return nil, err
}
// skip qdiscs from other interfaces
if link != nil && msg.Ifindex != index {
continue
}
base := QdiscAttrs{
LinkIndex: int(msg.Ifindex),
Handle: msg.Handle,
Parent: msg.Parent,
Refcnt: msg.Info,
}
var qdisc Qdisc
qdiscType := ""
for _, attr := range attrs {
switch attr.Attr.Type {
case nl.TCA_KIND:
qdiscType = string(attr.Value[:len(attr.Value)-1])
switch qdiscType {
case "pfifo_fast":
qdisc = &PfifoFast{}
case "prio":
qdisc = &Prio{}
case "tbf":
qdisc = &Tbf{}
case "ingress":
qdisc = &Ingress{}
case "htb":
qdisc = &Htb{}
default:
qdisc = &GenericQdisc{QdiscType: qdiscType}
}
case nl.TCA_OPTIONS:
switch qdiscType {
case "pfifo_fast":
// pfifo returns TcPrioMap directly without wrapping it in rtattr
if err := parsePfifoFastData(qdisc, attr.Value); err != nil {
return nil, err
}
case "prio":
// prio returns TcPrioMap directly without wrapping it in rtattr
if err := parsePrioData(qdisc, attr.Value); err != nil {
return nil, err
}
case "tbf":
data, err := nl.ParseRouteAttr(attr.Value)
if err != nil {
return nil, err
}
if err := parseTbfData(qdisc, data); err != nil {
return nil, err
}
case "htb":
data, err := nl.ParseRouteAttr(attr.Value)
if err != nil {
return nil, err
}
if err := parseHtbData(qdisc, data); err != nil {
return nil, err
}
// no options for ingress
}
}
}
*qdisc.Attrs() = base
res = append(res, qdisc)
}
return res, nil
}
func parsePfifoFastData(qdisc Qdisc, value []byte) error {
pfifo := qdisc.(*PfifoFast)
tcmap := nl.DeserializeTcPrioMap(value)
pfifo.PriorityMap = tcmap.Priomap
pfifo.Bands = uint8(tcmap.Bands)
return nil
}
func parsePrioData(qdisc Qdisc, value []byte) error {
prio := qdisc.(*Prio)
tcmap := nl.DeserializeTcPrioMap(value)
prio.PriorityMap = tcmap.Priomap
prio.Bands = uint8(tcmap.Bands)
return nil
}
func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
native = nl.NativeEndian()
htb := qdisc.(*Htb)
for _, datum := range data {
switch datum.Attr.Type {
case nl.TCA_HTB_INIT:
opt := nl.DeserializeTcHtbGlob(datum.Value)
htb.Version = opt.Version
htb.Rate2Quantum = opt.Rate2Quantum
htb.Defcls = opt.Defcls
htb.Debug = opt.Debug
htb.DirectPkts = opt.DirectPkts
case nl.TCA_HTB_DIRECT_QLEN:
// TODO
//htb.DirectQlen = native.uint32(datum.Value)
}
}
return nil
}
func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
native = nl.NativeEndian()
tbf := qdisc.(*Tbf)
for _, datum := range data {
switch datum.Attr.Type {
case nl.TCA_TBF_PARMS:
opt := nl.DeserializeTcTbfQopt(datum.Value)
tbf.Rate = uint64(opt.Rate.Rate)
tbf.Limit = opt.Limit
tbf.Buffer = opt.Buffer
case nl.TCA_TBF_RATE64:
tbf.Rate = native.Uint64(datum.Value[0:4])
}
}
return nil
}
const (
TIME_UNITS_PER_SEC = 1000000
)
var (
tickInUsec float64 = 0.0
clockFactor float64 = 0.0
hz float64 = 0.0
)
func initClock() {
data, err := ioutil.ReadFile("/proc/net/psched")
if err != nil {
return
}
parts := strings.Split(strings.TrimSpace(string(data)), " ")
if len(parts) < 3 {
return
}
var vals [3]uint64
for i := range vals {
val, err := strconv.ParseUint(parts[i], 16, 32)
if err != nil {
return
}
vals[i] = val
}
// compatibility
if vals[2] == 1000000000 {
vals[0] = vals[1]
}
clockFactor = float64(vals[2]) / TIME_UNITS_PER_SEC
tickInUsec = float64(vals[0]) / float64(vals[1]) * clockFactor
hz = float64(vals[0])
}
func TickInUsec() float64 {
if tickInUsec == 0.0 {
initClock()
}
return tickInUsec
}
func ClockFactor() float64 {
if clockFactor == 0.0 {
initClock()
}
return clockFactor
}
func Hz() float64 {
if hz == 0.0 {
initClock()
}
return hz
}
func time2Tick(time uint32) uint32 {
return uint32(float64(time) * TickInUsec())
}
func tick2Time(tick uint32) uint32 {
return uint32(float64(tick) / TickInUsec())
}
func time2Ktime(time uint32) uint32 {
return uint32(float64(time) * ClockFactor())
}
func ktime2Time(ktime uint32) uint32 {
return uint32(float64(ktime) / ClockFactor())
}
func burst(rate uint64, buffer uint32) uint32 {
return uint32(float64(rate) * float64(tick2Time(buffer)) / TIME_UNITS_PER_SEC)
}
func latency(rate uint64, limit, buffer uint32) float64 {
return TIME_UNITS_PER_SEC*(float64(limit)/float64(rate)) - float64(tick2Time(buffer))
}
func Xmittime(rate uint64, size uint32) float64 {
return TickInUsec() * TIME_UNITS_PER_SEC * (float64(size) / float64(rate))
}

View file

@ -17,6 +17,13 @@ const (
SCOPE_NOWHERE Scope = syscall.RT_SCOPE_NOWHERE
)
type NextHopFlag int
const (
FLAG_ONLINK NextHopFlag = syscall.RTNH_F_ONLINK
FLAG_PERVASIVE NextHopFlag = syscall.RTNH_F_PERVASIVE
)
// Route represents a netlink route. A route is associated with a link,
// has a destination network, an optional source ip, and optional
// gateway. Advanced route parameters and non-main routing tables are
@ -27,9 +34,44 @@ type Route struct {
Dst *net.IPNet
Src net.IP
Gw net.IP
Flags int
}
func (r Route) String() string {
return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s}", r.LinkIndex, r.Dst,
r.Src, r.Gw)
return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s Flags: %s}", r.LinkIndex, r.Dst,
r.Src, r.Gw, r.ListFlags())
}
func (r *Route) SetFlag(flag NextHopFlag) {
r.Flags |= int(flag)
}
func (r *Route) ClearFlag(flag NextHopFlag) {
r.Flags &^= int(flag)
}
type flagString struct {
f NextHopFlag
s string
}
var testFlags = []flagString{
flagString{f: FLAG_ONLINK, s: "onlink"},
flagString{f: FLAG_PERVASIVE, s: "pervasive"},
}
func (r *Route) ListFlags() []string {
var flags []string
for _, tf := range testFlags {
if r.Flags&int(tf.f) != 0 {
flags = append(flags, tf.s)
}
}
return flags
}
// RouteUpdate is sent when a route changes - type is RTM_NEWROUTE or RTM_DELROUTE
type RouteUpdate struct {
Type uint16
Route
}

View file

@ -17,7 +17,7 @@ func RouteAdd(route *Route) error {
return routeHandle(route, req, nl.NewRtMsg())
}
// RouteAdd will delete a route from the system.
// RouteDel will delete a route from the system.
// Equivalent to: `ip route del $route`
func RouteDel(route *Route) error {
req := nl.NewNetlinkRequest(syscall.RTM_DELROUTE, syscall.NLM_F_ACK)
@ -30,6 +30,7 @@ func routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error {
}
msg.Scope = uint8(route.Scope)
msg.Flags = uint32(route.Flags)
family := -1
var rtAttrs []*nl.RtAttr
@ -117,7 +118,6 @@ func RouteList(link Link, family int) ([]Route, error) {
index = base.Index
}
native := nl.NativeEndian()
var res []Route
for _, m := range msgs {
msg := nl.DeserializeRtMsg(m)
@ -132,31 +132,14 @@ func RouteList(link Link, family int) ([]Route, error) {
continue
}
attrs, err := nl.ParseRouteAttr(m[msg.Len():])
route, err := deserializeRoute(m)
if err != nil {
return nil, err
}
route := Route{Scope: Scope(msg.Scope)}
for _, attr := range attrs {
switch attr.Attr.Type {
case syscall.RTA_GATEWAY:
route.Gw = net.IP(attr.Value)
case syscall.RTA_PREFSRC:
route.Src = net.IP(attr.Value)
case syscall.RTA_DST:
route.Dst = &net.IPNet{
IP: attr.Value,
Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
}
case syscall.RTA_OIF:
routeIndex := int(native.Uint32(attr.Value[0:4]))
if link != nil && routeIndex != index {
// Ignore routes from other interfaces
continue
}
route.LinkIndex = routeIndex
}
if link != nil && route.LinkIndex != index {
// Ignore routes from other interfaces
continue
}
res = append(res, route)
}
@ -164,6 +147,37 @@ func RouteList(link Link, family int) ([]Route, error) {
return res, nil
}
// deserializeRoute decodes a binary netlink message into a Route struct
func deserializeRoute(m []byte) (Route, error) {
route := Route{}
msg := nl.DeserializeRtMsg(m)
attrs, err := nl.ParseRouteAttr(m[msg.Len():])
if err != nil {
return route, err
}
route.Scope = Scope(msg.Scope)
route.Flags = int(msg.Flags)
native := nl.NativeEndian()
for _, attr := range attrs {
switch attr.Attr.Type {
case syscall.RTA_GATEWAY:
route.Gw = net.IP(attr.Value)
case syscall.RTA_PREFSRC:
route.Src = net.IP(attr.Value)
case syscall.RTA_DST:
route.Dst = &net.IPNet{
IP: attr.Value,
Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
}
case syscall.RTA_OIF:
routeIndex := int(native.Uint32(attr.Value[0:4]))
route.LinkIndex = routeIndex
}
}
return route, nil
}
// RouteGet gets a route to a specific destination from the host system.
// Equivalent to: 'ip route get'.
func RouteGet(destination net.IP) ([]Route, error) {
@ -191,34 +205,47 @@ func RouteGet(destination net.IP) ([]Route, error) {
return nil, err
}
native := nl.NativeEndian()
var res []Route
for _, m := range msgs {
msg := nl.DeserializeRtMsg(m)
attrs, err := nl.ParseRouteAttr(m[msg.Len():])
route, err := deserializeRoute(m)
if err != nil {
return nil, err
}
route := Route{}
for _, attr := range attrs {
switch attr.Attr.Type {
case syscall.RTA_GATEWAY:
route.Gw = net.IP(attr.Value)
case syscall.RTA_PREFSRC:
route.Src = net.IP(attr.Value)
case syscall.RTA_DST:
route.Dst = &net.IPNet{
IP: attr.Value,
Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
}
case syscall.RTA_OIF:
routeIndex := int(native.Uint32(attr.Value[0:4]))
route.LinkIndex = routeIndex
}
}
res = append(res, route)
}
return res, nil
}
// RouteSubscribe takes a chan down which notifications will be sent
// when routes are added or deleted. Close the 'done' chan to stop subscription.
func RouteSubscribe(ch chan<- RouteUpdate, done <-chan struct{}) error {
s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_ROUTE, syscall.RTNLGRP_IPV6_ROUTE)
if err != nil {
return err
}
if done != nil {
go func() {
<-done
s.Close()
}()
}
go func() {
defer close(ch)
for {
msgs, err := s.Receive()
if err != nil {
return
}
for _, m := range msgs {
route, err := deserializeRoute(m.Data)
if err != nil {
return
}
ch <- RouteUpdate{Type: m.Header.Type, Route: route}
}
}
}()
return nil
}