Merge pull request #18486 from aboch/vnd

Vendoring libnetwork bbd6e6d8ca1e7c9b42f6f53277b0bde72847ff90
This commit is contained in:
Alexander Morozov 2015-12-14 13:32:01 -08:00
commit 260cbb19c9
34 changed files with 2211 additions and 173 deletions

View file

@ -22,14 +22,14 @@ clone git github.com/vdemeester/shakers 3c10293ce22b900c27acad7b28656196fcc2f73b
clone git golang.org/x/net 47990a1ba55743e6ef1affd3a14e5bac8553615d https://github.com/golang/net.git clone git golang.org/x/net 47990a1ba55743e6ef1affd3a14e5bac8553615d https://github.com/golang/net.git
#get libnetwork packages #get libnetwork packages
clone git github.com/docker/libnetwork 04cc1fa0a89f8c407b7be8cab883d4b17531ea7d clone git github.com/docker/libnetwork bbd6e6d8ca1e7c9b42f6f53277b0bde72847ff90
clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
clone git github.com/hashicorp/go-msgpack 71c2886f5a673a35f909803f38ece5810165097b clone git github.com/hashicorp/go-msgpack 71c2886f5a673a35f909803f38ece5810165097b
clone git github.com/hashicorp/memberlist 9a1e242e454d2443df330bdd51a436d5a9058fc4 clone git github.com/hashicorp/memberlist 9a1e242e454d2443df330bdd51a436d5a9058fc4
clone git github.com/hashicorp/serf 7151adcef72687bf95f451a2e0ba15cb19412bf2 clone git github.com/hashicorp/serf 7151adcef72687bf95f451a2e0ba15cb19412bf2
clone git github.com/docker/libkv c2aac5dbbaa5c872211edea7c0f32b3bd67e7410 clone git github.com/docker/libkv c2aac5dbbaa5c872211edea7c0f32b3bd67e7410
clone git github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25 clone git github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25
clone git github.com/vishvananda/netlink 4b5dce31de6d42af5bb9811c6d265472199e0fec clone git github.com/vishvananda/netlink 8e810149a2e531fed9b837c0c7d8a8922d2bedf7
clone git github.com/BurntSushi/toml f706d00e3de6abe700c994cdd545a1a4915af060 clone git github.com/BurntSushi/toml f706d00e3de6abe700c994cdd545a1a4915af060
clone git github.com/samuel/go-zookeeper d0e0d8e11f318e000a8cc434616d69e329edc374 clone git github.com/samuel/go-zookeeper d0e0d8e11f318e000a8cc434616d69e329edc374
clone git github.com/deckarep/golang-set ef32fa3046d9f249d399f98ebaf9be944430fd1d clone git github.com/deckarep/golang-set ef32fa3046d9f249d399f98ebaf9be944430fd1d

View file

@ -569,6 +569,9 @@ func (s *DockerDaemonSuite) TestDockerNetworkNoDiscoveryDefaultBridgeNetwork(c *
out, err = s.d.Cmd("network", "connect", network, cid1) out, err = s.d.Cmd("network", "connect", network, cid1)
c.Assert(err, check.IsNil, check.Commentf(out)) c.Assert(err, check.IsNil, check.Commentf(out))
hosts, err = s.d.Cmd("exec", cid1, "cat", hostsFile)
c.Assert(err, checker.IsNil)
hostsPost, err = s.d.Cmd("exec", cid1, "cat", hostsFile) hostsPost, err = s.d.Cmd("exec", cid1, "cat", hostsFile)
c.Assert(err, checker.IsNil) c.Assert(err, checker.IsNil)
c.Assert(string(hosts), checker.Equals, string(hostsPost), c.Assert(string(hosts), checker.Equals, string(hostsPost),
@ -631,6 +634,9 @@ func (s *DockerNetworkSuite) TestDockerNetworkAnonymousEndpoint(c *check.C) {
dockerCmd(c, "network", "connect", cstmBridgeNw1, cid2) dockerCmd(c, "network", "connect", cstmBridgeNw1, cid2)
hosts2, err = readContainerFileWithExec(cid2, hostsFile)
c.Assert(err, checker.IsNil)
hosts1post, err = readContainerFileWithExec(cid1, hostsFile) hosts1post, err = readContainerFileWithExec(cid1, hostsFile)
c.Assert(err, checker.IsNil) c.Assert(err, checker.IsNil)
c.Assert(string(hosts1), checker.Equals, string(hosts1post), c.Assert(string(hosts1), checker.Equals, string(hosts1post),

View file

@ -1,5 +1,52 @@
Alessandro Boch <aboch@docker.com> (@aboch) # Libnetwork maintainers file
Alexandr Morozov <lk4d4@docker.com> (@LK4D4) #
Arnaud Porterie <arnaud@docker.com> (@icecrime) # This file describes who runs the docker/libnetwork project and how.
Jana Radhakrishnan <mrjana@docker.com> (@mrjana) # This is a living document - if you see something out of date or missing, speak up!
Madhu Venugopal <madhu@docker.com> (@mavenugo) #
# It is structured to be consumable by both humans and programs.
# To extract its contents programmatically, use any TOML-compliant parser.
#
# This file is compiled into the MAINTAINERS file in docker/opensource.
#
[Org]
[Org."Core maintainers"]
people = [
"aboch",
"LK4D4",
"icecrime",
"mrjana",
"mavenugo",
]
[people]
# A reference list of all people associated with the project.
# All other sections should refer to people by their canonical key
# in the people section.
# ADD YOURSELF HERE IN ALPHABETICAL ORDER
[people.aboch]
Name = "Alessandro Boch"
Email = "aboch@docker.com"
GitHub = "aboch"
[people.LK4D4]
Name = "Alexandr Morozov"
Email = "lk4d4@docker.com"
GitHub = "LK4D4"
[people.icecrime]
Name = "Arnaud Porterie"
Email = "arnaud@docker.com"
GitHub = "icecrime"
[people.mrjana]
Name = "Jana Radhakrishnan"
Email = "mrjana@docker.com"
GitHub = "mrjana"
[people.mavenugo]
Name = "Madhu Venugopal"
Email = "madhu@docker.com"
GitHub = "mavenugo"

View file

@ -6,8 +6,6 @@ Libnetwork provides a native Go implementation for connecting containers
The goal of libnetwork is to deliver a robust Container Network Model that provides a consistent programming interface and the required network abstractions for applications. The goal of libnetwork is to deliver a robust Container Network Model that provides a consistent programming interface and the required network abstractions for applications.
**NOTE**: libnetwork project is under heavy development and is not ready for general use.
#### Design #### Design
Please refer to the [design](docs/design.md) for more information. Please refer to the [design](docs/design.md) for more information.
@ -67,10 +65,6 @@ There are many networking solutions available to suit a broad range of use-cases
} }
} }
``` ```
#### Current Status
Please watch this space for updates on the progress.
Currently libnetwork is nothing more than an attempt to modularize the Docker platform's networking subsystem by moving it into libnetwork as a library.
## Future ## Future
Please refer to [roadmap](ROADMAP.md) for more information. Please refer to [roadmap](ROADMAP.md) for more information.

View file

@ -24,7 +24,10 @@ const (
) )
var ( var (
errNoBitAvailable = fmt.Errorf("no bit available") // ErrNoBitAvailable is returned when no more bits are available to set
ErrNoBitAvailable = fmt.Errorf("no bit available")
// ErrBitAllocated is returned when the specific bit requested is already set
ErrBitAllocated = fmt.Errorf("requested bit is already allocated")
) )
// Handle contains the sequece representing the bitmask and its identifier // Handle contains the sequece representing the bitmask and its identifier
@ -94,7 +97,7 @@ func (s *sequence) toString() string {
// GetAvailableBit returns the position of the first unset bit in the bitmask represented by this sequence // GetAvailableBit returns the position of the first unset bit in the bitmask represented by this sequence
func (s *sequence) getAvailableBit(from uint64) (uint64, uint64, error) { func (s *sequence) getAvailableBit(from uint64) (uint64, uint64, error) {
if s.block == blockMAX || s.count == 0 { if s.block == blockMAX || s.count == 0 {
return invalidPos, invalidPos, errNoBitAvailable return invalidPos, invalidPos, ErrNoBitAvailable
} }
bits := from bits := from
bitSel := blockFirstBit >> from bitSel := blockFirstBit >> from
@ -197,7 +200,7 @@ func (h *Handle) SetAnyInRange(start, end uint64) (uint64, error) {
return invalidPos, fmt.Errorf("invalid bit range [%d, %d]", start, end) return invalidPos, fmt.Errorf("invalid bit range [%d, %d]", start, end)
} }
if h.Unselected() == 0 { if h.Unselected() == 0 {
return invalidPos, errNoBitAvailable return invalidPos, ErrNoBitAvailable
} }
return h.set(0, start, end, true, false) return h.set(0, start, end, true, false)
} }
@ -205,7 +208,7 @@ func (h *Handle) SetAnyInRange(start, end uint64) (uint64, error) {
// SetAny atomically sets the first unset bit in the sequence and returns the corresponding ordinal // SetAny atomically sets the first unset bit in the sequence and returns the corresponding ordinal
func (h *Handle) SetAny() (uint64, error) { func (h *Handle) SetAny() (uint64, error) {
if h.Unselected() == 0 { if h.Unselected() == 0 {
return invalidPos, errNoBitAvailable return invalidPos, ErrNoBitAvailable
} }
return h.set(0, 0, h.bits-1, true, false) return h.set(0, 0, h.bits-1, true, false)
} }
@ -250,8 +253,12 @@ func (h *Handle) set(ordinal, start, end uint64, any bool, release bool) (uint64
) )
for { for {
if h.store != nil { var store datastore.DataStore
if err := h.store.GetObject(datastore.Key(h.Key()...), h); err != nil && err != datastore.ErrKeyNotFound { h.Lock()
store = h.store
h.Unlock()
if store != nil {
if err := store.GetObject(datastore.Key(h.Key()...), h); err != nil && err != datastore.ErrKeyNotFound {
return ret, err return ret, err
} }
} }
@ -265,7 +272,7 @@ func (h *Handle) set(ordinal, start, end uint64, any bool, release bool) (uint64
bytePos, bitPos, err = getFirstAvailable(h.head, start) bytePos, bitPos, err = getFirstAvailable(h.head, start)
ret = posToOrdinal(bytePos, bitPos) ret = posToOrdinal(bytePos, bitPos)
if end < ret { if end < ret {
err = errNoBitAvailable err = ErrNoBitAvailable
} }
} else { } else {
bytePos, bitPos, err = checkIfAvailable(h.head, ordinal) bytePos, bitPos, err = checkIfAvailable(h.head, ordinal)
@ -445,7 +452,7 @@ func getFirstAvailable(head *sequence, start uint64) (uint64, uint64, error) {
byteOffset += current.count * blockBytes byteOffset += current.count * blockBytes
current = current.next current = current.next
} }
return invalidPos, invalidPos, errNoBitAvailable return invalidPos, invalidPos, ErrNoBitAvailable
} }
// checkIfAvailable checks if the bit correspondent to the specified ordinal is unset // checkIfAvailable checks if the bit correspondent to the specified ordinal is unset
@ -463,7 +470,7 @@ func checkIfAvailable(head *sequence, ordinal uint64) (uint64, uint64, error) {
} }
} }
return invalidPos, invalidPos, fmt.Errorf("requested bit is not available") return invalidPos, invalidPos, ErrBitAllocated
} }
// Given the byte position and the sequences list head, return the pointer to the // Given the byte position and the sequences list head, return the pointer to the

View file

@ -4,7 +4,7 @@ machine:
dependencies: dependencies:
override: override:
- sudo apt-get update; sudo apt-get install -y iptables zookeeperd - sudo apt-get update; sudo apt-get install -y iptables zookeeperd
- go get golang.org/x/tools/cmd/vet - go get golang.org/x/tools/cmd/vet
- go get golang.org/x/tools/cmd/goimports - go get golang.org/x/tools/cmd/goimports
- go get golang.org/x/tools/cmd/cover - go get golang.org/x/tools/cmd/cover

View file

@ -315,17 +315,17 @@ func (c *controller) RegisterIpamDriver(name string, driver ipamapi.Ipam) error
_, ok := c.ipamDrivers[name] _, ok := c.ipamDrivers[name]
c.Unlock() c.Unlock()
if ok { if ok {
return driverapi.ErrActiveRegistration(name) return types.ForbiddenErrorf("ipam driver %q already registered", name)
} }
locAS, glbAS, err := driver.GetDefaultAddressSpaces() locAS, glbAS, err := driver.GetDefaultAddressSpaces()
if err != nil { if err != nil {
return fmt.Errorf("ipam driver %s failed to return default address spaces: %v", name, err) return types.InternalErrorf("ipam driver %q failed to return default address spaces: %v", name, err)
} }
c.Lock() c.Lock()
c.ipamDrivers[name] = &ipamData{driver: driver, defaultLocalAddressSpace: locAS, defaultGlobalAddressSpace: glbAS} c.ipamDrivers[name] = &ipamData{driver: driver, defaultLocalAddressSpace: locAS, defaultGlobalAddressSpace: glbAS}
c.Unlock() c.Unlock()
log.Debugf("Registering ipam provider: %s", name) log.Debugf("Registering ipam driver: %q", name)
return nil return nil
} }
@ -667,7 +667,7 @@ func (c *controller) loadIpamDriver(name string) (*ipamData, error) {
id, ok := c.ipamDrivers[name] id, ok := c.ipamDrivers[name]
c.Unlock() c.Unlock()
if !ok { if !ok {
return nil, ErrInvalidNetworkDriver(name) return nil, types.BadRequestErrorf("invalid ipam driver: %q", name)
} }
return id, nil return id, nil
} }

View file

@ -103,10 +103,18 @@ func (sb *sandbox) needDefaultGW() bool {
if ep.getNetwork().Type() == "null" || ep.getNetwork().Type() == "host" { if ep.getNetwork().Type() == "null" || ep.getNetwork().Type() == "host" {
continue continue
} }
if ep.joinInfo.disableGatewayService {
return false
}
// TODO v6 needs to be handled. // TODO v6 needs to be handled.
if len(ep.Gateway()) > 0 { if len(ep.Gateway()) > 0 {
return false return false
} }
for _, r := range ep.StaticRoutes() {
if r.Destination.String() == "0.0.0.0/0" {
return false
}
}
needGW = true needGW = true
} }
return needGW return needGW

View file

@ -91,6 +91,9 @@ type JoinInfo interface {
// AddStaticRoute adds a routes to the sandbox. // AddStaticRoute adds a routes to the sandbox.
// It may be used in addtion to or instead of a default gateway (as above). // It may be used in addtion to or instead of a default gateway (as above).
AddStaticRoute(destination *net.IPNet, routeType int, nextHop net.IP) error AddStaticRoute(destination *net.IPNet, routeType int, nextHop net.IP) error
// DisableGatewayService tells libnetwork not to provide Default GW for the container
DisableGatewayService()
} }
// DriverCallback provides a Callback interface for Drivers into LibNetwork // DriverCallback provides a Callback interface for Drivers into LibNetwork

View file

@ -151,6 +151,10 @@ func (d *driver) processQuery(q *serf.Query) {
} }
func (d *driver) resolvePeer(nid string, peerIP net.IP) (net.HardwareAddr, net.IPMask, net.IP, error) { func (d *driver) resolvePeer(nid string, peerIP net.IP) (net.HardwareAddr, net.IPMask, net.IP, error) {
if d.serfInstance == nil {
return nil, nil, nil, fmt.Errorf("could not resolve peer: serf instance not initialized")
}
qPayload := fmt.Sprintf("%s %s", string(nid), peerIP.String()) qPayload := fmt.Sprintf("%s %s", string(nid), peerIP.String())
resp, err := d.serfInstance.Query("peerlookup", []byte(qPayload), nil) resp, err := d.serfInstance.Query("peerlookup", []byte(qPayload), nil)
if err != nil { if err != nil {

View file

@ -134,10 +134,11 @@ type StaticRoute struct {
// JoinResponse is the response to a JoinRequest. // JoinResponse is the response to a JoinRequest.
type JoinResponse struct { type JoinResponse struct {
Response Response
InterfaceName *InterfaceName InterfaceName *InterfaceName
Gateway string Gateway string
GatewayIPv6 string GatewayIPv6 string
StaticRoutes []StaticRoute StaticRoutes []StaticRoute
DisableGatewayService bool
} }
// LeaveRequest describes the API for detaching an endpoint from a sandbox. // LeaveRequest describes the API for detaching an endpoint from a sandbox.

View file

@ -231,6 +231,9 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
} }
} }
} }
if res.DisableGatewayService {
jinfo.DisableGatewayService()
}
return nil return nil
} }

View file

@ -60,6 +60,8 @@ type endpoint struct {
anonymous bool anonymous bool
generic map[string]interface{} generic map[string]interface{}
joinLeaveDone chan struct{} joinLeaveDone chan struct{}
prefAddress net.IP
ipamOptions map[string]string
dbIndex uint64 dbIndex uint64
dbExists bool dbExists bool
sync.Mutex sync.Mutex
@ -386,6 +388,9 @@ func (ep *endpoint) sbJoin(sbox Sandbox, options ...EndpointOption) error {
} }
}() }()
// Watch for service records
network.getController().watchSvcRecord(ep)
address := "" address := ""
if ip := ep.getFirstInterfaceAddress(); ip != nil { if ip := ep.getFirstInterfaceAddress(); ip != nil {
address = ip.String() address = ip.String()
@ -394,9 +399,6 @@ func (ep *endpoint) sbJoin(sbox Sandbox, options ...EndpointOption) error {
return err return err
} }
// Watch for service records
network.getController().watchSvcRecord(ep)
if err = sb.updateDNS(network.enableIPv6); err != nil { if err = sb.updateDNS(network.enableIPv6); err != nil {
return err return err
} }
@ -559,7 +561,7 @@ func (ep *endpoint) sbLeave(sbox Sandbox, options ...EndpointOption) error {
sb.deleteHostsEntries(n.getSvcRecords(ep)) sb.deleteHostsEntries(n.getSvcRecords(ep))
if sb.needDefaultGW() { if !sb.inDelete && sb.needDefaultGW() {
ep := sb.getEPwithoutGateway() ep := sb.getEPwithoutGateway()
if ep == nil { if ep == nil {
return fmt.Errorf("endpoint without GW expected, but not found") return fmt.Errorf("endpoint without GW expected, but not found")
@ -685,6 +687,14 @@ func EndpointOptionGeneric(generic map[string]interface{}) EndpointOption {
} }
} }
// CreateOptionIpam function returns an option setter for the ipam configuration for this endpoint
func CreateOptionIpam(prefAddress net.IP, ipamOptions map[string]string) EndpointOption {
return func(ep *endpoint) {
ep.prefAddress = prefAddress
ep.ipamOptions = ipamOptions
}
}
// CreateOptionExposedPorts function returns an option setter for the container exposed // CreateOptionExposedPorts function returns an option setter for the container exposed
// ports option to be passed to network.CreateEndpoint() method. // ports option to be passed to network.CreateEndpoint() method.
func CreateOptionExposedPorts(exposedPorts []types.TransportPort) EndpointOption { func CreateOptionExposedPorts(exposedPorts []types.TransportPort) EndpointOption {
@ -799,7 +809,7 @@ func (ep *endpoint) assignAddressVersion(ipVer int, ipam ipamapi.Ipam) error {
if *address != nil { if *address != nil {
prefIP = (*address).IP prefIP = (*address).IP
} }
addr, _, err := ipam.RequestAddress(d.PoolID, prefIP, nil) addr, _, err := ipam.RequestAddress(d.PoolID, prefIP, ep.ipamOptions)
if err == nil { if err == nil {
ep.Lock() ep.Lock()
*address = addr *address = addr

View file

@ -25,6 +25,10 @@ type EndpointInfo interface {
// This will only return a valid value if a container has joined the endpoint. // This will only return a valid value if a container has joined the endpoint.
GatewayIPv6() net.IP GatewayIPv6() net.IP
// StaticRoutes returns the list of static routes configured by the network
// driver when the container joins a network
StaticRoutes() []*types.StaticRoute
// Sandbox returns the attached sandbox if there, nil otherwise. // Sandbox returns the attached sandbox if there, nil otherwise.
Sandbox() Sandbox Sandbox() Sandbox
} }
@ -136,9 +140,10 @@ func (epi *endpointInterface) CopyTo(dstEpi *endpointInterface) error {
} }
type endpointJoinInfo struct { type endpointJoinInfo struct {
gw net.IP gw net.IP
gw6 net.IP gw6 net.IP
StaticRoutes []*types.StaticRoute StaticRoutes []*types.StaticRoute
disableGatewayService bool
} }
func (ep *endpoint) Info() EndpointInfo { func (ep *endpoint) Info() EndpointInfo {
@ -295,6 +300,17 @@ func (ep *endpoint) Sandbox() Sandbox {
return cnt return cnt
} }
func (ep *endpoint) StaticRoutes() []*types.StaticRoute {
ep.Lock()
defer ep.Unlock()
if ep.joinInfo == nil {
return nil
}
return ep.joinInfo.StaticRoutes
}
func (ep *endpoint) Gateway() net.IP { func (ep *endpoint) Gateway() net.IP {
ep.Lock() ep.Lock()
defer ep.Unlock() defer ep.Unlock()
@ -340,3 +356,10 @@ func (ep *endpoint) retrieveFromStore() (*endpoint, error) {
} }
return n.getEndpointFromStore(ep.ID()) return n.getEndpointFromStore(ep.ID())
} }
func (ep *endpoint) DisableGatewayService() {
ep.Lock()
defer ep.Unlock()
ep.joinInfo.disableGatewayService = true
}

View file

@ -1,6 +1,7 @@
package etchosts package etchosts
import ( import (
"bufio"
"bytes" "bytes"
"fmt" "fmt"
"io" "io"
@ -138,19 +139,36 @@ func Delete(path string, recs []Record) error {
if len(recs) == 0 { if len(recs) == 0 {
return nil return nil
} }
old, err := os.Open(path)
old, err := ioutil.ReadFile(path)
if err != nil { if err != nil {
return err return err
} }
regexpStr := fmt.Sprintf("\\S*\\t%s\\n", regexp.QuoteMeta(recs[0].Hosts)) var buf bytes.Buffer
for _, r := range recs[1:] {
regexpStr = regexpStr + "|" + fmt.Sprintf("\\S*\\t%s\\n", regexp.QuoteMeta(r.Hosts))
}
var re = regexp.MustCompile(regexpStr) s := bufio.NewScanner(old)
return ioutil.WriteFile(path, re.ReplaceAll(old, []byte("")), 0644) eol := []byte{'\n'}
loop:
for s.Scan() {
b := s.Bytes()
if b[0] == '#' {
buf.Write(b)
buf.Write(eol)
continue
}
for _, r := range recs {
if bytes.HasSuffix(b, []byte("\t"+r.Hosts)) {
continue loop
}
}
buf.Write(b)
buf.Write(eol)
}
old.Close()
if err := s.Err(); err != nil {
return err
}
return ioutil.WriteFile(path, buf.Bytes(), 0644)
} }
// Update all IP addresses where hostname matches. // Update all IP addresses where hostname matches.

View file

@ -76,8 +76,7 @@ func NewAllocator(lcDs, glDs datastore.DataStore) (*Allocator, error) {
func (a *Allocator) refresh(as string) error { func (a *Allocator) refresh(as string) error {
aSpace, err := a.getAddressSpaceFromStore(as) aSpace, err := a.getAddressSpaceFromStore(as)
if err != nil { if err != nil {
return fmt.Errorf("error getting pools config from store during init: %v", return types.InternalErrorf("error getting pools config from store: %v", err)
err)
} }
if aSpace == nil { if aSpace == nil {
@ -239,7 +238,7 @@ func (a *Allocator) insertBitMask(key SubnetKey, pool *net.IPNet) error {
store := a.getStore(key.AddressSpace) store := a.getStore(key.AddressSpace)
if store == nil { if store == nil {
return fmt.Errorf("could not find store for address space %s while inserting bit mask", key.AddressSpace) return types.InternalErrorf("could not find store for address space %s while inserting bit mask", key.AddressSpace)
} }
ipVer := getAddressVersion(pool.IP) ipVer := getAddressVersion(pool.IP)
@ -279,7 +278,7 @@ func (a *Allocator) retrieveBitmask(k SubnetKey, n *net.IPNet) (*bitseq.Handle,
if !ok { if !ok {
log.Debugf("Retrieving bitmask (%s, %s)", k.String(), n.String()) log.Debugf("Retrieving bitmask (%s, %s)", k.String(), n.String())
if err := a.insertBitMask(k, n); err != nil { if err := a.insertBitMask(k, n); err != nil {
return nil, fmt.Errorf("could not find bitmask in datastore for %s", k.String()) return nil, types.InternalErrorf("could not find bitmask in datastore for %s", k.String())
} }
a.Lock() a.Lock()
bm = a.addresses[k] bm = a.addresses[k]
@ -306,7 +305,7 @@ func (a *Allocator) getPredefinedPool(as string, ipV6 bool) (*net.IPNet, error)
} }
if as != localAddressSpace && as != globalAddressSpace { if as != localAddressSpace && as != globalAddressSpace {
return nil, fmt.Errorf("no default pool availbale for non-default addresss spaces") return nil, types.NotImplementedErrorf("no default pool availbale for non-default addresss spaces")
} }
aSpace, err := a.getAddrSpace(as) aSpace, err := a.getAddrSpace(as)
@ -378,7 +377,7 @@ func (a *Allocator) RequestAddress(poolID string, prefAddress net.IP, opts map[s
bm, err := a.retrieveBitmask(k, c.Pool) bm, err := a.retrieveBitmask(k, c.Pool)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("could not find bitmask in datastore for %s on address %v request from pool %s: %v", return nil, nil, types.InternalErrorf("could not find bitmask in datastore for %s on address %v request from pool %s: %v",
k.String(), prefAddress, poolID, err) k.String(), prefAddress, poolID, err)
} }
ip, err := a.getAddress(p.Pool, bm, prefAddress, p.Range) ip, err := a.getAddress(p.Pool, bm, prefAddress, p.Range)
@ -410,12 +409,12 @@ func (a *Allocator) ReleaseAddress(poolID string, address net.IP) error {
p, ok := aSpace.subnets[k] p, ok := aSpace.subnets[k]
if !ok { if !ok {
aSpace.Unlock() aSpace.Unlock()
return ipamapi.ErrBadPool return types.NotFoundErrorf("cannot find address pool for poolID:%s", poolID)
} }
if address == nil { if address == nil {
aSpace.Unlock() aSpace.Unlock()
return ipamapi.ErrInvalidRequest return types.BadRequestErrorf("invalid address: nil")
} }
if !p.Pool.Contains(address) { if !p.Pool.Contains(address) {
@ -434,12 +433,12 @@ func (a *Allocator) ReleaseAddress(poolID string, address net.IP) error {
h, err := types.GetHostPartIP(address, mask) h, err := types.GetHostPartIP(address, mask)
if err != nil { if err != nil {
return fmt.Errorf("failed to release address %s: %v", address.String(), err) return types.InternalErrorf("failed to release address %s: %v", address.String(), err)
} }
bm, err := a.retrieveBitmask(k, c.Pool) bm, err := a.retrieveBitmask(k, c.Pool)
if err != nil { if err != nil {
return fmt.Errorf("could not find bitmask in datastore for %s on address %v release from pool %s: %v", return types.InternalErrorf("could not find bitmask in datastore for %s on address %v release from pool %s: %v",
k.String(), address, poolID, err) k.String(), address, poolID, err)
} }
@ -463,19 +462,25 @@ func (a *Allocator) getAddress(nw *net.IPNet, bitmask *bitseq.Handle, prefAddres
} else if prefAddress != nil { } else if prefAddress != nil {
hostPart, e := types.GetHostPartIP(prefAddress, base.Mask) hostPart, e := types.GetHostPartIP(prefAddress, base.Mask)
if e != nil { if e != nil {
return nil, fmt.Errorf("failed to allocate preferred address %s: %v", prefAddress.String(), e) return nil, types.InternalErrorf("failed to allocate preferred address %s: %v", prefAddress.String(), e)
} }
ordinal = ipToUint64(types.GetMinimalIP(hostPart)) ordinal = ipToUint64(types.GetMinimalIP(hostPart))
err = bitmask.Set(ordinal) err = bitmask.Set(ordinal)
} else { } else {
ordinal, err = bitmask.SetAnyInRange(ipr.Start, ipr.End) ordinal, err = bitmask.SetAnyInRange(ipr.Start, ipr.End)
} }
if err != nil {
return nil, ipamapi.ErrNoAvailableIPs
}
// Convert IP ordinal for this subnet into IP address switch err {
return generateAddress(ordinal, base), nil case nil:
// Convert IP ordinal for this subnet into IP address
return generateAddress(ordinal, base), nil
case bitseq.ErrBitAllocated:
return nil, ipamapi.ErrIPAlreadyAllocated
case bitseq.ErrNoBitAvailable:
return nil, ipamapi.ErrNoAvailableIPs
default:
return nil, err
}
} }
// DumpDatabase dumps the internal info // DumpDatabase dumps the internal info

View file

@ -2,7 +2,6 @@ package ipam
import ( import (
"encoding/json" "encoding/json"
"fmt"
log "github.com/Sirupsen/logrus" log "github.com/Sirupsen/logrus"
"github.com/docker/libnetwork/datastore" "github.com/docker/libnetwork/datastore"
@ -84,7 +83,7 @@ func (a *Allocator) getStore(as string) datastore.DataStore {
func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) { func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) {
store := a.getStore(as) store := a.getStore(as)
if store == nil { if store == nil {
return nil, fmt.Errorf("store for address space %s not found", as) return nil, types.InternalErrorf("store for address space %s not found", as)
} }
pc := &addrSpace{id: dsConfigKey + "/" + as, ds: store, alloc: a} pc := &addrSpace{id: dsConfigKey + "/" + as, ds: store, alloc: a}
@ -93,7 +92,7 @@ func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) {
return nil, nil return nil, nil
} }
return nil, fmt.Errorf("could not get pools config from store: %v", err) return nil, types.InternalErrorf("could not get pools config from store: %v", err)
} }
return pc, nil return pc, nil
@ -102,7 +101,7 @@ func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) {
func (a *Allocator) writeToStore(aSpace *addrSpace) error { func (a *Allocator) writeToStore(aSpace *addrSpace) error {
store := aSpace.store() store := aSpace.store()
if store == nil { if store == nil {
return fmt.Errorf("invalid store while trying to write %s address space", aSpace.DataScope()) return types.InternalErrorf("invalid store while trying to write %s address space", aSpace.DataScope())
} }
err := store.PutObjectAtomic(aSpace) err := store.PutObjectAtomic(aSpace)
@ -116,7 +115,7 @@ func (a *Allocator) writeToStore(aSpace *addrSpace) error {
func (a *Allocator) deleteFromStore(aSpace *addrSpace) error { func (a *Allocator) deleteFromStore(aSpace *addrSpace) error {
store := aSpace.store() store := aSpace.store()
if store == nil { if store == nil {
return fmt.Errorf("invalid store while trying to delete %s address space", aSpace.DataScope()) return types.InternalErrorf("invalid store while trying to delete %s address space", aSpace.DataScope())
} }
return store.DeleteObjectAtomic(aSpace) return store.DeleteObjectAtomic(aSpace)

View file

@ -88,12 +88,12 @@ func (s *SubnetKey) String() string {
// FromString populate the SubnetKey object reading it from string // FromString populate the SubnetKey object reading it from string
func (s *SubnetKey) FromString(str string) error { func (s *SubnetKey) FromString(str string) error {
if str == "" || !strings.Contains(str, "/") { if str == "" || !strings.Contains(str, "/") {
return fmt.Errorf("invalid string form for subnetkey: %s", str) return types.BadRequestErrorf("invalid string form for subnetkey: %s", str)
} }
p := strings.Split(str, "/") p := strings.Split(str, "/")
if len(p) != 3 && len(p) != 5 { if len(p) != 3 && len(p) != 5 {
return fmt.Errorf("invalid string form for subnetkey: %s", str) return types.BadRequestErrorf("invalid string form for subnetkey: %s", str)
} }
s.AddressSpace = p[0] s.AddressSpace = p[0]
s.Subnet = fmt.Sprintf("%s/%s", p[1], p[2]) s.Subnet = fmt.Sprintf("%s/%s", p[1], p[2])
@ -317,7 +317,7 @@ func (aSpace *addrSpace) updatePoolDBOnRemoval(k SubnetKey) (func() error, error
return func() error { return func() error {
bm, err := aSpace.alloc.retrieveBitmask(k, c.Pool) bm, err := aSpace.alloc.retrieveBitmask(k, c.Pool)
if err != nil { if err != nil {
return fmt.Errorf("could not find bitmask in datastore for pool %s removal: %v", k.String(), err) return types.InternalErrorf("could not find bitmask in datastore for pool %s removal: %v", k.String(), err)
} }
return bm.Destroy() return bm.Destroy()
}, nil }, nil

View file

@ -2,8 +2,9 @@
package ipamapi package ipamapi
import ( import (
"errors"
"net" "net"
"github.com/docker/libnetwork/types"
) )
/******************** /********************
@ -15,6 +16,8 @@ const (
DefaultIPAM = "default" DefaultIPAM = "default"
// PluginEndpointType represents the Endpoint Type used by Plugin system // PluginEndpointType represents the Endpoint Type used by Plugin system
PluginEndpointType = "IpamDriver" PluginEndpointType = "IpamDriver"
// RequestAddressType represents the Address Type used when requesting an address
RequestAddressType = "RequestAddressType"
) )
// Callback provides a Callback interface for registering an IPAM instance into LibNetwork // Callback provides a Callback interface for registering an IPAM instance into LibNetwork
@ -29,22 +32,19 @@ type Callback interface {
// Weel-known errors returned by IPAM // Weel-known errors returned by IPAM
var ( var (
ErrInvalidIpamService = errors.New("Invalid IPAM Service") ErrIpamInternalError = types.InternalErrorf("IPAM Internal Error")
ErrInvalidIpamConfigService = errors.New("Invalid IPAM Config Service") ErrInvalidAddressSpace = types.BadRequestErrorf("Invalid Address Space")
ErrIpamNotAvailable = errors.New("IPAM Service not available") ErrInvalidPool = types.BadRequestErrorf("Invalid Address Pool")
ErrIpamInternalError = errors.New("IPAM Internal Error") ErrInvalidSubPool = types.BadRequestErrorf("Invalid Address SubPool")
ErrInvalidAddressSpace = errors.New("Invalid Address Space") ErrInvalidRequest = types.BadRequestErrorf("Invalid Request")
ErrInvalidPool = errors.New("Invalid Address Pool") ErrPoolNotFound = types.BadRequestErrorf("Address Pool not found")
ErrInvalidSubPool = errors.New("Invalid Address SubPool") ErrOverlapPool = types.ForbiddenErrorf("Address pool overlaps with existing pool on this address space")
ErrInvalidRequest = errors.New("Invalid Request") ErrNoAvailablePool = types.NoServiceErrorf("No available pool")
ErrPoolNotFound = errors.New("Address Pool not found") ErrNoAvailableIPs = types.NoServiceErrorf("No available addresses on this pool")
ErrOverlapPool = errors.New("Address pool overlaps with existing pool on this address space") ErrIPAlreadyAllocated = types.ForbiddenErrorf("Address already in use")
ErrNoAvailablePool = errors.New("No available pool") ErrIPOutOfRange = types.BadRequestErrorf("Requested address is out of range")
ErrNoAvailableIPs = errors.New("No available addresses on this pool") ErrPoolOverlap = types.ForbiddenErrorf("Pool overlaps with other one on this address space")
ErrIPAlreadyAllocated = errors.New("Address already in use") ErrBadPool = types.BadRequestErrorf("Address space does not contain specified address pool")
ErrIPOutOfRange = errors.New("Requested address is out of range")
ErrPoolOverlap = errors.New("Pool overlaps with other one on this address space")
ErrBadPool = errors.New("Address space does not contain specified address pool")
) )
/******************************* /*******************************

View file

@ -603,12 +603,13 @@ func (n *network) Delete() error {
if err = n.getController().deleteFromStore(n.getEpCnt()); err != nil { if err = n.getController().deleteFromStore(n.getEpCnt()); err != nil {
return fmt.Errorf("error deleting network endpoint count from store: %v", err) return fmt.Errorf("error deleting network endpoint count from store: %v", err)
} }
n.ipamRelease()
if err = n.getController().deleteFromStore(n); err != nil { if err = n.getController().deleteFromStore(n); err != nil {
return fmt.Errorf("error deleting network from store: %v", err) return fmt.Errorf("error deleting network from store: %v", err)
} }
n.ipamRelease()
return nil return nil
} }
@ -970,7 +971,10 @@ func (n *network) ipamAllocateVersion(ipVer int, ipam ipamapi.Ipam) error {
// irrespective of whether ipam driver returned a gateway already. // irrespective of whether ipam driver returned a gateway already.
// If none of the above is true, libnetwork will allocate one. // If none of the above is true, libnetwork will allocate one.
if cfg.Gateway != "" || d.Gateway == nil { if cfg.Gateway != "" || d.Gateway == nil {
if d.Gateway, _, err = ipam.RequestAddress(d.PoolID, net.ParseIP(cfg.Gateway), nil); err != nil { var gatewayOpts = map[string]string{
ipamapi.RequestAddressType: netlabel.Gateway,
}
if d.Gateway, _, err = ipam.RequestAddress(d.PoolID, net.ParseIP(cfg.Gateway), gatewayOpts); err != nil {
return types.InternalErrorf("failed to allocate gateway (%v): %v", cfg.Gateway, err) return types.InternalErrorf("failed to allocate gateway (%v): %v", cfg.Gateway, err)
} }
} }

View file

@ -62,7 +62,6 @@ type sandbox struct {
osSbox osl.Sandbox osSbox osl.Sandbox
controller *controller controller *controller
refCnt int refCnt int
hostsOnce sync.Once
endpoints epHeap endpoints epHeap
epPriority map[string]int epPriority map[string]int
joinLeaveDone chan struct{} joinLeaveDone chan struct{}
@ -601,41 +600,21 @@ func (sb *sandbox) buildHostsFile() error {
} }
func (sb *sandbox) updateHostsFile(ifaceIP string, svcRecords []etchosts.Record) error { func (sb *sandbox) updateHostsFile(ifaceIP string, svcRecords []etchosts.Record) error {
var err error var mhost string
if sb.config.originHostsPath != "" { if sb.config.originHostsPath != "" {
return nil return nil
} }
max := func(a, b int) int { if sb.config.domainName != "" {
if a < b { mhost = fmt.Sprintf("%s.%s %s", sb.config.hostName, sb.config.domainName,
return b sb.config.hostName)
} } else {
mhost = sb.config.hostName
return a
} }
extraContent := make([]etchosts.Record, 0, extraContent := make([]etchosts.Record, 0, len(svcRecords)+1)
max(len(sb.config.extraHosts), len(svcRecords))) extraContent = append(extraContent, etchosts.Record{Hosts: mhost, IP: ifaceIP})
sb.hostsOnce.Do(func() {
// Rebuild the hosts file accounting for the passed
// interface IP and service records
for _, extraHost := range sb.config.extraHosts {
extraContent = append(extraContent,
etchosts.Record{Hosts: extraHost.name, IP: extraHost.IP})
}
err = etchosts.Build(sb.config.hostsPath, ifaceIP,
sb.config.hostName, sb.config.domainName, extraContent)
})
if err != nil {
return err
}
extraContent = extraContent[:0]
for _, svc := range svcRecords { for _, svc := range svcRecords {
extraContent = append(extraContent, svc) extraContent = append(extraContent, svc)
} }

View file

@ -0,0 +1,110 @@
package netlink
import (
"fmt"
)
type Class interface {
Attrs() *ClassAttrs
Type() string
}
// Class represents a netlink class. A filter is associated with a link,
// has a handle and a parent. The root filter of a device should have a
// parent == HANDLE_ROOT.
type ClassAttrs struct {
LinkIndex int
Handle uint32
Parent uint32
Leaf uint32
}
func (q ClassAttrs) String() string {
return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Leaf: %s}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Leaf)
}
type HtbClassAttrs struct {
// TODO handle all attributes
Rate uint64
Ceil uint64
Buffer uint32
Cbuffer uint32
Quantum uint32
Level uint32
Prio uint32
}
func (q HtbClassAttrs) String() string {
return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer)
}
// Htb class
type HtbClass struct {
ClassAttrs
Rate uint64
Ceil uint64
Buffer uint32
Cbuffer uint32
Quantum uint32
Level uint32
Prio uint32
}
func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass {
mtu := 1600
rate := cattrs.Rate / 8
ceil := cattrs.Ceil / 8
buffer := cattrs.Buffer
cbuffer := cattrs.Cbuffer
if ceil == 0 {
ceil = rate
}
if buffer == 0 {
buffer = uint32(float64(rate)/Hz() + float64(mtu))
}
buffer = uint32(Xmittime(rate, buffer))
if cbuffer == 0 {
cbuffer = uint32(float64(ceil)/Hz() + float64(mtu))
}
cbuffer = uint32(Xmittime(ceil, cbuffer))
return &HtbClass{
ClassAttrs: attrs,
Rate: rate,
Ceil: ceil,
Buffer: buffer,
Cbuffer: cbuffer,
Quantum: 10,
Level: 0,
Prio: 0,
}
}
func (q HtbClass) String() string {
return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer)
}
func (class *HtbClass) Attrs() *ClassAttrs {
return &class.ClassAttrs
}
func (class *HtbClass) Type() string {
return "htb"
}
// GenericClass classes represent types that are not currently understood
// by this netlink library.
type GenericClass struct {
ClassAttrs
ClassType string
}
func (class *GenericClass) Attrs() *ClassAttrs {
return &class.ClassAttrs
}
func (class *GenericClass) Type() string {
return class.ClassType
}

View file

@ -0,0 +1,144 @@
package netlink
import (
"syscall"
"github.com/vishvananda/netlink/nl"
)
// ClassDel will delete a class from the system.
// Equivalent to: `tc class del $class`
func ClassDel(class Class) error {
req := nl.NewNetlinkRequest(syscall.RTM_DELTCLASS, syscall.NLM_F_ACK)
base := class.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Ifindex: int32(base.LinkIndex),
Handle: base.Handle,
Parent: base.Parent,
}
req.AddData(msg)
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
return err
}
// ClassAdd will add a class to the system.
// Equivalent to: `tc class add $class`
func ClassAdd(class Class) error {
req := nl.NewNetlinkRequest(syscall.RTM_NEWTCLASS, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
base := class.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Ifindex: int32(base.LinkIndex),
Handle: base.Handle,
Parent: base.Parent,
}
req.AddData(msg)
req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(class.Type())))
options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
if htb, ok := class.(*HtbClass); ok {
opt := nl.TcHtbCopt{}
opt.Rate.Rate = uint32(htb.Rate)
opt.Ceil.Rate = uint32(htb.Ceil)
opt.Buffer = htb.Buffer
opt.Cbuffer = htb.Cbuffer
opt.Quantum = htb.Quantum
opt.Level = htb.Level
opt.Prio = htb.Prio
// TODO: Handle Debug properly. For now default to 0
nl.NewRtAttrChild(options, nl.TCA_HTB_PARMS, opt.Serialize())
}
req.AddData(options)
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
return err
}
// ClassList gets a list of classes in the system.
// Equivalent to: `tc class show`.
// Generally retunrs nothing if link and parent are not specified.
func ClassList(link Link, parent uint32) ([]Class, error) {
req := nl.NewNetlinkRequest(syscall.RTM_GETTCLASS, syscall.NLM_F_DUMP)
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Parent: parent,
}
if link != nil {
base := link.Attrs()
ensureIndex(base)
msg.Ifindex = int32(base.Index)
}
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTCLASS)
if err != nil {
return nil, err
}
var res []Class
for _, m := range msgs {
msg := nl.DeserializeTcMsg(m)
attrs, err := nl.ParseRouteAttr(m[msg.Len():])
if err != nil {
return nil, err
}
base := ClassAttrs{
LinkIndex: int(msg.Ifindex),
Handle: msg.Handle,
Parent: msg.Parent,
}
var class Class
classType := ""
for _, attr := range attrs {
switch attr.Attr.Type {
case nl.TCA_KIND:
classType = string(attr.Value[:len(attr.Value)-1])
switch classType {
case "htb":
class = &HtbClass{}
default:
class = &GenericClass{ClassType: classType}
}
case nl.TCA_OPTIONS:
switch classType {
case "htb":
data, err := nl.ParseRouteAttr(attr.Value)
if err != nil {
return nil, err
}
_, err = parseHtbClassData(class, data)
if err != nil {
return nil, err
}
}
}
}
*class.Attrs() = base
res = append(res, class)
}
return res, nil
}
func parseHtbClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, error) {
htb := class.(*HtbClass)
detailed := false
for _, datum := range data {
switch datum.Attr.Type {
case nl.TCA_HTB_PARMS:
opt := nl.DeserializeTcHtbCopt(datum.Value)
htb.Rate = uint64(opt.Rate.Rate)
htb.Ceil = uint64(opt.Ceil.Rate)
htb.Buffer = opt.Buffer
htb.Cbuffer = opt.Cbuffer
htb.Quantum = opt.Quantum
htb.Level = opt.Level
htb.Prio = opt.Prio
}
}
return detailed, nil
}

View file

@ -0,0 +1,140 @@
package netlink
import (
"errors"
"fmt"
"github.com/vishvananda/netlink/nl"
)
type Filter interface {
Attrs() *FilterAttrs
Type() string
}
// Filter represents a netlink filter. A filter is associated with a link,
// has a handle and a parent. The root filter of a device should have a
// parent == HANDLE_ROOT.
type FilterAttrs struct {
LinkIndex int
Handle uint32
Parent uint32
Priority uint16 // lower is higher priority
Protocol uint16 // syscall.ETH_P_*
}
func (q FilterAttrs) String() string {
return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Priority: %d, Protocol: %d}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Priority, q.Protocol)
}
// U32 filters on many packet related properties
type U32 struct {
FilterAttrs
// Currently only supports redirecting to another interface
RedirIndex int
}
func (filter *U32) Attrs() *FilterAttrs {
return &filter.FilterAttrs
}
func (filter *U32) Type() string {
return "u32"
}
type FilterFwAttrs struct {
ClassId uint32
InDev string
Mask uint32
Index uint32
Buffer uint32
Mtu uint32
Mpu uint16
Rate uint32
AvRate uint32
PeakRate uint32
Action int
Overhead uint16
LinkLayer int
}
// FwFilter filters on firewall marks
type Fw struct {
FilterAttrs
ClassId uint32
Police nl.TcPolice
InDev string
// TODO Action
Mask uint32
AvRate uint32
Rtab [256]uint32
Ptab [256]uint32
}
func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) {
var rtab [256]uint32
var ptab [256]uint32
rcell_log := -1
pcell_log := -1
avrate := fattrs.AvRate / 8
police := nl.TcPolice{}
police.Rate.Rate = fattrs.Rate / 8
police.PeakRate.Rate = fattrs.PeakRate / 8
buffer := fattrs.Buffer
linklayer := nl.LINKLAYER_ETHERNET
if fattrs.LinkLayer != nl.LINKLAYER_UNSPEC {
linklayer = fattrs.LinkLayer
}
police.Action = int32(fattrs.Action)
if police.Rate.Rate != 0 {
police.Rate.Mpu = fattrs.Mpu
police.Rate.Overhead = fattrs.Overhead
if CalcRtable(&police.Rate, rtab, rcell_log, fattrs.Mtu, linklayer) < 0 {
return nil, errors.New("TBF: failed to calculate rate table.")
}
police.Burst = uint32(Xmittime(uint64(police.Rate.Rate), uint32(buffer)))
}
police.Mtu = fattrs.Mtu
if police.PeakRate.Rate != 0 {
police.PeakRate.Mpu = fattrs.Mpu
police.PeakRate.Overhead = fattrs.Overhead
if CalcRtable(&police.PeakRate, ptab, pcell_log, fattrs.Mtu, linklayer) < 0 {
return nil, errors.New("POLICE: failed to calculate peak rate table.")
}
}
return &Fw{
FilterAttrs: attrs,
ClassId: fattrs.ClassId,
InDev: fattrs.InDev,
Mask: fattrs.Mask,
Police: police,
AvRate: avrate,
Rtab: rtab,
Ptab: ptab,
}, nil
}
func (filter *Fw) Attrs() *FilterAttrs {
return &filter.FilterAttrs
}
func (filter *Fw) Type() string {
return "fw"
}
// GenericFilter filters represent types that are not currently understood
// by this netlink library.
type GenericFilter struct {
FilterAttrs
FilterType string
}
func (filter *GenericFilter) Attrs() *FilterAttrs {
return &filter.FilterAttrs
}
func (filter *GenericFilter) Type() string {
return filter.FilterType
}

View file

@ -0,0 +1,322 @@
package netlink
import (
"bytes"
"encoding/binary"
"fmt"
"syscall"
"github.com/vishvananda/netlink/nl"
)
// FilterDel will delete a filter from the system.
// Equivalent to: `tc filter del $filter`
func FilterDel(filter Filter) error {
req := nl.NewNetlinkRequest(syscall.RTM_DELTFILTER, syscall.NLM_F_ACK)
base := filter.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Ifindex: int32(base.LinkIndex),
Handle: base.Handle,
Parent: base.Parent,
Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)),
}
req.AddData(msg)
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
return err
}
// FilterAdd will add a filter to the system.
// Equivalent to: `tc filter add $filter`
func FilterAdd(filter Filter) error {
native = nl.NativeEndian()
req := nl.NewNetlinkRequest(syscall.RTM_NEWTFILTER, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
base := filter.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Ifindex: int32(base.LinkIndex),
Handle: base.Handle,
Parent: base.Parent,
Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)),
}
req.AddData(msg)
req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(filter.Type())))
options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
if u32, ok := filter.(*U32); ok {
// match all
sel := nl.TcU32Sel{
Nkeys: 1,
Flags: nl.TC_U32_TERMINAL,
}
sel.Keys = append(sel.Keys, nl.TcU32Key{})
nl.NewRtAttrChild(options, nl.TCA_U32_SEL, sel.Serialize())
actions := nl.NewRtAttrChild(options, nl.TCA_U32_ACT, nil)
table := nl.NewRtAttrChild(actions, nl.TCA_ACT_TAB, nil)
nl.NewRtAttrChild(table, nl.TCA_KIND, nl.ZeroTerminated("mirred"))
// redirect to other interface
mir := nl.TcMirred{
Action: nl.TC_ACT_STOLEN,
Eaction: nl.TCA_EGRESS_REDIR,
Ifindex: uint32(u32.RedirIndex),
}
aopts := nl.NewRtAttrChild(table, nl.TCA_OPTIONS, nil)
nl.NewRtAttrChild(aopts, nl.TCA_MIRRED_PARMS, mir.Serialize())
} else if fw, ok := filter.(*Fw); ok {
if fw.Mask != 0 {
b := make([]byte, 4)
native.PutUint32(b, fw.Mask)
nl.NewRtAttrChild(options, nl.TCA_FW_MASK, b)
}
if fw.InDev != "" {
nl.NewRtAttrChild(options, nl.TCA_FW_INDEV, nl.ZeroTerminated(fw.InDev))
}
if (fw.Police != nl.TcPolice{}) {
police := nl.NewRtAttrChild(options, nl.TCA_FW_POLICE, nil)
nl.NewRtAttrChild(police, nl.TCA_POLICE_TBF, fw.Police.Serialize())
if (fw.Police.Rate != nl.TcRateSpec{}) {
payload := SerializeRtab(fw.Rtab)
nl.NewRtAttrChild(police, nl.TCA_POLICE_RATE, payload)
}
if (fw.Police.PeakRate != nl.TcRateSpec{}) {
payload := SerializeRtab(fw.Ptab)
nl.NewRtAttrChild(police, nl.TCA_POLICE_PEAKRATE, payload)
}
}
if fw.ClassId != 0 {
b := make([]byte, 4)
native.PutUint32(b, fw.ClassId)
nl.NewRtAttrChild(options, nl.TCA_FW_CLASSID, b)
}
}
req.AddData(options)
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
return err
}
// FilterList gets a list of filters in the system.
// Equivalent to: `tc filter show`.
// Generally retunrs nothing if link and parent are not specified.
func FilterList(link Link, parent uint32) ([]Filter, error) {
req := nl.NewNetlinkRequest(syscall.RTM_GETTFILTER, syscall.NLM_F_DUMP)
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Parent: parent,
}
if link != nil {
base := link.Attrs()
ensureIndex(base)
msg.Ifindex = int32(base.Index)
}
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTFILTER)
if err != nil {
return nil, err
}
var res []Filter
for _, m := range msgs {
msg := nl.DeserializeTcMsg(m)
attrs, err := nl.ParseRouteAttr(m[msg.Len():])
if err != nil {
return nil, err
}
base := FilterAttrs{
LinkIndex: int(msg.Ifindex),
Handle: msg.Handle,
Parent: msg.Parent,
}
base.Priority, base.Protocol = MajorMinor(msg.Info)
base.Protocol = nl.Swap16(base.Protocol)
var filter Filter
filterType := ""
detailed := false
for _, attr := range attrs {
switch attr.Attr.Type {
case nl.TCA_KIND:
filterType = string(attr.Value[:len(attr.Value)-1])
switch filterType {
case "u32":
filter = &U32{}
case "fw":
filter = &Fw{}
default:
filter = &GenericFilter{FilterType: filterType}
}
case nl.TCA_OPTIONS:
switch filterType {
case "u32":
data, err := nl.ParseRouteAttr(attr.Value)
if err != nil {
return nil, err
}
detailed, err = parseU32Data(filter, data)
if err != nil {
return nil, err
}
case "fw":
data, err := nl.ParseRouteAttr(attr.Value)
if err != nil {
return nil, err
}
detailed, err = parseFwData(filter, data)
if err != nil {
return nil, err
}
}
}
}
// only return the detailed version of the filter
if detailed {
*filter.Attrs() = base
res = append(res, filter)
}
}
return res, nil
}
func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
native = nl.NativeEndian()
u32 := filter.(*U32)
detailed := false
for _, datum := range data {
switch datum.Attr.Type {
case nl.TCA_U32_SEL:
detailed = true
sel := nl.DeserializeTcU32Sel(datum.Value)
// only parse if we have a very basic redirect
if sel.Flags&nl.TC_U32_TERMINAL == 0 || sel.Nkeys != 1 {
return detailed, nil
}
case nl.TCA_U32_ACT:
table, err := nl.ParseRouteAttr(datum.Value)
if err != nil {
return detailed, err
}
if len(table) != 1 || table[0].Attr.Type != nl.TCA_ACT_TAB {
return detailed, fmt.Errorf("Action table not formed properly")
}
aattrs, err := nl.ParseRouteAttr(table[0].Value)
for _, aattr := range aattrs {
switch aattr.Attr.Type {
case nl.TCA_KIND:
actionType := string(aattr.Value[:len(aattr.Value)-1])
// only parse if the action is mirred
if actionType != "mirred" {
return detailed, nil
}
case nl.TCA_OPTIONS:
adata, err := nl.ParseRouteAttr(aattr.Value)
if err != nil {
return detailed, err
}
for _, adatum := range adata {
switch adatum.Attr.Type {
case nl.TCA_MIRRED_PARMS:
mir := nl.DeserializeTcMirred(adatum.Value)
u32.RedirIndex = int(mir.Ifindex)
}
}
}
}
}
}
return detailed, nil
}
func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
native = nl.NativeEndian()
fw := filter.(*Fw)
detailed := true
for _, datum := range data {
switch datum.Attr.Type {
case nl.TCA_FW_MASK:
fw.Mask = native.Uint32(datum.Value[0:4])
case nl.TCA_FW_CLASSID:
fw.ClassId = native.Uint32(datum.Value[0:4])
case nl.TCA_FW_INDEV:
fw.InDev = string(datum.Value[:len(datum.Value)-1])
case nl.TCA_FW_POLICE:
adata, _ := nl.ParseRouteAttr(datum.Value)
for _, aattr := range adata {
switch aattr.Attr.Type {
case nl.TCA_POLICE_TBF:
fw.Police = *nl.DeserializeTcPolice(aattr.Value)
case nl.TCA_POLICE_RATE:
fw.Rtab = DeserializeRtab(aattr.Value)
case nl.TCA_POLICE_PEAKRATE:
fw.Ptab = DeserializeRtab(aattr.Value)
}
}
}
}
return detailed, nil
}
func AlignToAtm(size uint) uint {
var linksize, cells int
cells = int(size / nl.ATM_CELL_PAYLOAD)
if (size % nl.ATM_CELL_PAYLOAD) > 0 {
cells++
}
linksize = cells * nl.ATM_CELL_SIZE
return uint(linksize)
}
func AdjustSize(sz uint, mpu uint, linklayer int) uint {
if sz < mpu {
sz = mpu
}
switch linklayer {
case nl.LINKLAYER_ATM:
return AlignToAtm(sz)
default:
return sz
}
}
func CalcRtable(rate *nl.TcRateSpec, rtab [256]uint32, cell_log int, mtu uint32, linklayer int) int {
bps := rate.Rate
mpu := rate.Mpu
var sz uint
if mtu == 0 {
mtu = 2047
}
if cell_log < 0 {
cell_log = 0
for (mtu >> uint(cell_log)) > 255 {
cell_log++
}
}
for i := 0; i < 256; i++ {
sz = AdjustSize(uint((i+1)<<uint32(cell_log)), uint(mpu), linklayer)
rtab[i] = uint32(Xmittime(uint64(bps), uint32(sz)))
}
rate.CellAlign = -1
rate.CellLog = uint8(cell_log)
rate.Linklayer = uint8(linklayer & nl.TC_LINKLAYER_MASK)
return cell_log
}
func DeserializeRtab(b []byte) [256]uint32 {
var rtab [256]uint32
native := nl.NativeEndian()
r := bytes.NewReader(b)
_ = binary.Read(r, native, &rtab)
return rtab
}
func SerializeRtab(rtab [256]uint32) []byte {
native := nl.NativeEndian()
var w bytes.Buffer
_ = binary.Write(&w, native, rtab)
return w.Bytes()
}

View file

@ -1,6 +1,9 @@
package netlink package netlink
import "net" import (
"net"
"syscall"
)
// Link represents a link device from netlink. Shared link attributes // Link represents a link device from netlink. Shared link attributes
// like name may be retrieved using the Attrs() method. Unique data // like name may be retrieved using the Attrs() method. Unique data
@ -62,6 +65,19 @@ func (dummy *Dummy) Type() string {
return "dummy" return "dummy"
} }
// Ifb links are advanced dummy devices for packet filtering
type Ifb struct {
LinkAttrs
}
func (ifb *Ifb) Attrs() *LinkAttrs {
return &ifb.LinkAttrs
}
func (ifb *Ifb) Type() string {
return "ifb"
}
// Bridge links are simple linux bridges // Bridge links are simple linux bridges
type Bridge struct { type Bridge struct {
LinkAttrs LinkAttrs
@ -114,6 +130,36 @@ func (macvlan *Macvlan) Type() string {
return "macvlan" return "macvlan"
} }
// Macvtap - macvtap is a virtual interfaces based on macvlan
type Macvtap struct {
Macvlan
}
func (macvtap Macvtap) Type() string {
return "macvtap"
}
type TuntapMode uint16
const (
TUNTAP_MODE_TUN TuntapMode = syscall.IFF_TUN
TUNTAP_MODE_TAP TuntapMode = syscall.IFF_TAP
)
// Tuntap links created via /dev/tun/tap, but can be destroyed via netlink
type Tuntap struct {
LinkAttrs
Mode TuntapMode
}
func (tuntap *Tuntap) Attrs() *LinkAttrs {
return &tuntap.LinkAttrs
}
func (tuntap *Tuntap) Type() string {
return "tuntap"
}
// Veth devices must specify PeerName on create // Veth devices must specify PeerName on create
type Veth struct { type Veth struct {
LinkAttrs LinkAttrs
@ -128,18 +174,18 @@ func (veth *Veth) Type() string {
return "veth" return "veth"
} }
// Generic links represent types that are not currently understood // GenericLink links represent types that are not currently understood
// by this netlink library. // by this netlink library.
type Generic struct { type GenericLink struct {
LinkAttrs LinkAttrs
LinkType string LinkType string
} }
func (generic *Generic) Attrs() *LinkAttrs { func (generic *GenericLink) Attrs() *LinkAttrs {
return &generic.LinkAttrs return &generic.LinkAttrs
} }
func (generic *Generic) Type() string { func (generic *GenericLink) Type() string {
return generic.LinkType return generic.LinkType
} }

View file

@ -5,7 +5,9 @@ import (
"encoding/binary" "encoding/binary"
"fmt" "fmt"
"net" "net"
"os"
"syscall" "syscall"
"unsafe"
"github.com/vishvananda/netlink/nl" "github.com/vishvananda/netlink/nl"
) )
@ -285,6 +287,44 @@ func LinkAdd(link Link) error {
return fmt.Errorf("LinkAttrs.Name cannot be empty!") return fmt.Errorf("LinkAttrs.Name cannot be empty!")
} }
if tuntap, ok := link.(*Tuntap); ok {
// TODO: support user
// TODO: support group
// TODO: support non- one_queue
// TODO: support pi | vnet_hdr | multi_queue
// TODO: support non- exclusive
// TODO: support non- persistent
if tuntap.Mode < syscall.IFF_TUN || tuntap.Mode > syscall.IFF_TAP {
return fmt.Errorf("Tuntap.Mode %v unknown!", tuntap.Mode)
}
file, err := os.OpenFile("/dev/net/tun", os.O_RDWR, 0)
if err != nil {
return err
}
defer file.Close()
var req ifReq
req.Flags |= syscall.IFF_ONE_QUEUE
req.Flags |= syscall.IFF_TUN_EXCL
copy(req.Name[:15], base.Name)
req.Flags |= uint16(tuntap.Mode)
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), uintptr(syscall.TUNSETIFF), uintptr(unsafe.Pointer(&req)))
if errno != 0 {
return fmt.Errorf("Tuntap IOCTL TUNSETIFF failed, errno %v", errno)
}
_, _, errno = syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), uintptr(syscall.TUNSETPERSIST), 1)
if errno != 0 {
return fmt.Errorf("Tuntap IOCTL TUNSETPERSIST failed, errno %v", errno)
}
ensureIndex(base)
// can't set master during create, so set it afterwards
if base.MasterIndex != 0 {
// TODO: verify MasterIndex is actually a bridge?
return LinkSetMasterByIndex(link, base.MasterIndex)
}
return nil
}
req := nl.NewNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) req := nl.NewNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
@ -493,6 +533,8 @@ func linkDeserialize(m []byte) (Link, error) {
switch linkType { switch linkType {
case "dummy": case "dummy":
link = &Dummy{} link = &Dummy{}
case "ifb":
link = &Ifb{}
case "bridge": case "bridge":
link = &Bridge{} link = &Bridge{}
case "vlan": case "vlan":
@ -505,8 +547,10 @@ func linkDeserialize(m []byte) (Link, error) {
link = &IPVlan{} link = &IPVlan{}
case "macvlan": case "macvlan":
link = &Macvlan{} link = &Macvlan{}
case "macvtap":
link = &Macvtap{}
default: default:
link = &Generic{LinkType: linkType} link = &GenericLink{LinkType: linkType}
} }
case nl.IFLA_INFO_DATA: case nl.IFLA_INFO_DATA:
data, err := nl.ParseRouteAttr(info.Value) data, err := nl.ParseRouteAttr(info.Value)
@ -522,6 +566,8 @@ func linkDeserialize(m []byte) (Link, error) {
parseIPVlanData(link, data) parseIPVlanData(link, data)
case "macvlan": case "macvlan":
parseMacvlanData(link, data) parseMacvlanData(link, data)
case "macvtap":
parseMacvtapData(link, data)
} }
} }
} }
@ -583,6 +629,46 @@ func LinkList() ([]Link, error) {
return res, nil return res, nil
} }
// LinkUpdate is used to pass information back from LinkSubscribe()
type LinkUpdate struct {
nl.IfInfomsg
Link
}
// LinkSubscribe takes a chan down which notifications will be sent
// when links change. Close the 'done' chan to stop subscription.
func LinkSubscribe(ch chan<- LinkUpdate, done <-chan struct{}) error {
s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_LINK)
if err != nil {
return err
}
if done != nil {
go func() {
<-done
s.Close()
}()
}
go func() {
defer close(ch)
for {
msgs, err := s.Receive()
if err != nil {
return
}
for _, m := range msgs {
ifmsg := nl.DeserializeIfInfomsg(m.Data)
link, err := linkDeserialize(m.Data)
if err != nil {
return
}
ch <- LinkUpdate{IfInfomsg: *ifmsg, Link: link}
}
}
}()
return nil
}
func LinkSetHairpin(link Link, mode bool) error { func LinkSetHairpin(link Link, mode bool) error {
return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_MODE) return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_MODE)
} }
@ -696,6 +782,11 @@ func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) {
} }
} }
func parseMacvtapData(link Link, data []syscall.NetlinkRouteAttr) {
macv := link.(*Macvtap)
parseMacvlanData(&macv.Macvlan, data)
}
func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) { func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) {
macv := link.(*Macvlan) macv := link.(*Macvlan)
for _, datum := range data { for _, datum := range data {

View file

@ -0,0 +1,14 @@
package netlink
// ideally golang.org/x/sys/unix would define IfReq but it only has
// IFNAMSIZ, hence this minimalistic implementation
const (
SizeOfIfReq = 40
IFNAMSIZ = 16
)
type ifReq struct {
Name [IFNAMSIZ]byte
Flags uint16
pad [SizeOfIfReq - IFNAMSIZ - 2]byte
}

View file

@ -142,7 +142,7 @@ func (a *RtAttr) Len() int {
} }
// Serialize the RtAttr into a byte array // Serialize the RtAttr into a byte array
// This can't ust unsafe.cast because it must iterate through children. // This can't just unsafe.cast because it must iterate through children.
func (a *RtAttr) Serialize() []byte { func (a *RtAttr) Serialize() []byte {
native := NativeEndian() native := NativeEndian()

View file

@ -0,0 +1,508 @@
package nl
import (
"unsafe"
)
// LinkLayer
const (
LINKLAYER_UNSPEC = iota
LINKLAYER_ETHERNET
LINKLAYER_ATM
)
// ATM
const (
ATM_CELL_PAYLOAD = 48
ATM_CELL_SIZE = 53
)
const TC_LINKLAYER_MASK = 0x0F
// Police
const (
TCA_POLICE_UNSPEC = iota
TCA_POLICE_TBF
TCA_POLICE_RATE
TCA_POLICE_PEAKRATE
TCA_POLICE_AVRATE
TCA_POLICE_RESULT
TCA_POLICE_MAX = TCA_POLICE_RESULT
)
// Message types
const (
TCA_UNSPEC = iota
TCA_KIND
TCA_OPTIONS
TCA_STATS
TCA_XSTATS
TCA_RATE
TCA_FCNT
TCA_STATS2
TCA_STAB
TCA_MAX = TCA_STAB
)
const (
TCA_ACT_TAB = 1
TCAA_MAX = 1
)
const (
TCA_PRIO_UNSPEC = iota
TCA_PRIO_MQ
TCA_PRIO_MAX = TCA_PRIO_MQ
)
const (
SizeofTcMsg = 0x14
SizeofTcActionMsg = 0x04
SizeofTcPrioMap = 0x14
SizeofTcRateSpec = 0x0c
SizeofTcTbfQopt = 2*SizeofTcRateSpec + 0x0c
SizeofTcHtbCopt = 2*SizeofTcRateSpec + 0x14
SizeofTcHtbGlob = 0x14
SizeofTcU32Key = 0x10
SizeofTcU32Sel = 0x10 // without keys
SizeofTcMirred = 0x1c
SizeofTcPolice = 2*SizeofTcRateSpec + 0x20
)
// struct tcmsg {
// unsigned char tcm_family;
// unsigned char tcm__pad1;
// unsigned short tcm__pad2;
// int tcm_ifindex;
// __u32 tcm_handle;
// __u32 tcm_parent;
// __u32 tcm_info;
// };
type TcMsg struct {
Family uint8
Pad [3]byte
Ifindex int32
Handle uint32
Parent uint32
Info uint32
}
func (msg *TcMsg) Len() int {
return SizeofTcMsg
}
func DeserializeTcMsg(b []byte) *TcMsg {
return (*TcMsg)(unsafe.Pointer(&b[0:SizeofTcMsg][0]))
}
func (x *TcMsg) Serialize() []byte {
return (*(*[SizeofTcMsg]byte)(unsafe.Pointer(x)))[:]
}
// struct tcamsg {
// unsigned char tca_family;
// unsigned char tca__pad1;
// unsigned short tca__pad2;
// };
type TcActionMsg struct {
Family uint8
Pad [3]byte
}
func (msg *TcActionMsg) Len() int {
return SizeofTcActionMsg
}
func DeserializeTcActionMsg(b []byte) *TcActionMsg {
return (*TcActionMsg)(unsafe.Pointer(&b[0:SizeofTcActionMsg][0]))
}
func (x *TcActionMsg) Serialize() []byte {
return (*(*[SizeofTcActionMsg]byte)(unsafe.Pointer(x)))[:]
}
const (
TC_PRIO_MAX = 15
)
// struct tc_prio_qopt {
// int bands; /* Number of bands */
// __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */
// };
type TcPrioMap struct {
Bands int32
Priomap [TC_PRIO_MAX + 1]uint8
}
func (msg *TcPrioMap) Len() int {
return SizeofTcPrioMap
}
func DeserializeTcPrioMap(b []byte) *TcPrioMap {
return (*TcPrioMap)(unsafe.Pointer(&b[0:SizeofTcPrioMap][0]))
}
func (x *TcPrioMap) Serialize() []byte {
return (*(*[SizeofTcPrioMap]byte)(unsafe.Pointer(x)))[:]
}
const (
TCA_TBF_UNSPEC = iota
TCA_TBF_PARMS
TCA_TBF_RTAB
TCA_TBF_PTAB
TCA_TBF_RATE64
TCA_TBF_PRATE64
TCA_TBF_BURST
TCA_TBF_PBURST
TCA_TBF_MAX = TCA_TBF_PBURST
)
// struct tc_ratespec {
// unsigned char cell_log;
// __u8 linklayer; /* lower 4 bits */
// unsigned short overhead;
// short cell_align;
// unsigned short mpu;
// __u32 rate;
// };
type TcRateSpec struct {
CellLog uint8
Linklayer uint8
Overhead uint16
CellAlign int16
Mpu uint16
Rate uint32
}
func (msg *TcRateSpec) Len() int {
return SizeofTcRateSpec
}
func DeserializeTcRateSpec(b []byte) *TcRateSpec {
return (*TcRateSpec)(unsafe.Pointer(&b[0:SizeofTcRateSpec][0]))
}
func (x *TcRateSpec) Serialize() []byte {
return (*(*[SizeofTcRateSpec]byte)(unsafe.Pointer(x)))[:]
}
// struct tc_tbf_qopt {
// struct tc_ratespec rate;
// struct tc_ratespec peakrate;
// __u32 limit;
// __u32 buffer;
// __u32 mtu;
// };
type TcTbfQopt struct {
Rate TcRateSpec
Peakrate TcRateSpec
Limit uint32
Buffer uint32
Mtu uint32
}
func (msg *TcTbfQopt) Len() int {
return SizeofTcTbfQopt
}
func DeserializeTcTbfQopt(b []byte) *TcTbfQopt {
return (*TcTbfQopt)(unsafe.Pointer(&b[0:SizeofTcTbfQopt][0]))
}
func (x *TcTbfQopt) Serialize() []byte {
return (*(*[SizeofTcTbfQopt]byte)(unsafe.Pointer(x)))[:]
}
const (
TCA_HTB_UNSPEC = iota
TCA_HTB_PARMS
TCA_HTB_INIT
TCA_HTB_CTAB
TCA_HTB_RTAB
TCA_HTB_DIRECT_QLEN
TCA_HTB_RATE64
TCA_HTB_CEIL64
TCA_HTB_MAX = TCA_HTB_CEIL64
)
//struct tc_htb_opt {
// struct tc_ratespec rate;
// struct tc_ratespec ceil;
// __u32 buffer;
// __u32 cbuffer;
// __u32 quantum;
// __u32 level; /* out only */
// __u32 prio;
//};
type TcHtbCopt struct {
Rate TcRateSpec
Ceil TcRateSpec
Buffer uint32
Cbuffer uint32
Quantum uint32
Level uint32
Prio uint32
}
func (msg *TcHtbCopt) Len() int {
return SizeofTcHtbCopt
}
func DeserializeTcHtbCopt(b []byte) *TcHtbCopt {
return (*TcHtbCopt)(unsafe.Pointer(&b[0:SizeofTcHtbCopt][0]))
}
func (x *TcHtbCopt) Serialize() []byte {
return (*(*[SizeofTcHtbCopt]byte)(unsafe.Pointer(x)))[:]
}
type TcHtbGlob struct {
Version uint32
Rate2Quantum uint32
Defcls uint32
Debug uint32
DirectPkts uint32
}
func (msg *TcHtbGlob) Len() int {
return SizeofTcHtbGlob
}
func DeserializeTcHtbGlob(b []byte) *TcHtbGlob {
return (*TcHtbGlob)(unsafe.Pointer(&b[0:SizeofTcHtbGlob][0]))
}
func (x *TcHtbGlob) Serialize() []byte {
return (*(*[SizeofTcHtbGlob]byte)(unsafe.Pointer(x)))[:]
}
const (
TCA_U32_UNSPEC = iota
TCA_U32_CLASSID
TCA_U32_HASH
TCA_U32_LINK
TCA_U32_DIVISOR
TCA_U32_SEL
TCA_U32_POLICE
TCA_U32_ACT
TCA_U32_INDEV
TCA_U32_PCNT
TCA_U32_MARK
TCA_U32_MAX = TCA_U32_MARK
)
// struct tc_u32_key {
// __be32 mask;
// __be32 val;
// int off;
// int offmask;
// };
type TcU32Key struct {
Mask uint32 // big endian
Val uint32 // big endian
Off int32
OffMask int32
}
func (msg *TcU32Key) Len() int {
return SizeofTcU32Key
}
func DeserializeTcU32Key(b []byte) *TcU32Key {
return (*TcU32Key)(unsafe.Pointer(&b[0:SizeofTcU32Key][0]))
}
func (x *TcU32Key) Serialize() []byte {
return (*(*[SizeofTcU32Key]byte)(unsafe.Pointer(x)))[:]
}
// struct tc_u32_sel {
// unsigned char flags;
// unsigned char offshift;
// unsigned char nkeys;
//
// __be16 offmask;
// __u16 off;
// short offoff;
//
// short hoff;
// __be32 hmask;
// struct tc_u32_key keys[0];
// };
const (
TC_U32_TERMINAL = 1 << iota
TC_U32_OFFSET = 1 << iota
TC_U32_VAROFFSET = 1 << iota
TC_U32_EAT = 1 << iota
)
type TcU32Sel struct {
Flags uint8
Offshift uint8
Nkeys uint8
Pad uint8
Offmask uint16 // big endian
Off uint16
Offoff int16
Hoff int16
Hmask uint32 // big endian
Keys []TcU32Key
}
func (msg *TcU32Sel) Len() int {
return SizeofTcU32Sel + int(msg.Nkeys)*SizeofTcU32Key
}
func DeserializeTcU32Sel(b []byte) *TcU32Sel {
x := &TcU32Sel{}
copy((*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:], b)
next := SizeofTcU32Sel
var i uint8
for i = 0; i < x.Nkeys; i++ {
x.Keys = append(x.Keys, *DeserializeTcU32Key(b[next:]))
next += SizeofTcU32Key
}
return x
}
func (x *TcU32Sel) Serialize() []byte {
// This can't just unsafe.cast because it must iterate through keys.
buf := make([]byte, x.Len())
copy(buf, (*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:])
next := SizeofTcU32Sel
for _, key := range x.Keys {
keyBuf := key.Serialize()
copy(buf[next:], keyBuf)
next += SizeofTcU32Key
}
return buf
}
const (
TCA_ACT_MIRRED = 8
)
const (
TCA_MIRRED_UNSPEC = iota
TCA_MIRRED_TM
TCA_MIRRED_PARMS
TCA_MIRRED_MAX = TCA_MIRRED_PARMS
)
const (
TCA_EGRESS_REDIR = 1 /* packet redirect to EGRESS*/
TCA_EGRESS_MIRROR = 2 /* mirror packet to EGRESS */
TCA_INGRESS_REDIR = 3 /* packet redirect to INGRESS*/
TCA_INGRESS_MIRROR = 4 /* mirror packet to INGRESS */
)
const (
TC_ACT_UNSPEC = int32(-1)
TC_ACT_OK = 0
TC_ACT_RECLASSIFY = 1
TC_ACT_SHOT = 2
TC_ACT_PIPE = 3
TC_ACT_STOLEN = 4
TC_ACT_QUEUED = 5
TC_ACT_REPEAT = 6
TC_ACT_JUMP = 0x10000000
)
// #define tc_gen \
// __u32 index; \
// __u32 capab; \
// int action; \
// int refcnt; \
// int bindcnt
// struct tc_mirred {
// tc_gen;
// int eaction; /* one of IN/EGRESS_MIRROR/REDIR */
// __u32 ifindex; /* ifindex of egress port */
// };
type TcMirred struct {
Index uint32
Capab uint32
Action int32
Refcnt int32
Bindcnt int32
Eaction int32
Ifindex uint32
}
func (msg *TcMirred) Len() int {
return SizeofTcMirred
}
func DeserializeTcMirred(b []byte) *TcMirred {
return (*TcMirred)(unsafe.Pointer(&b[0:SizeofTcMirred][0]))
}
func (x *TcMirred) Serialize() []byte {
return (*(*[SizeofTcMirred]byte)(unsafe.Pointer(x)))[:]
}
const (
TC_POLICE_UNSPEC = TC_ACT_UNSPEC
TC_POLICE_OK = TC_ACT_OK
TC_POLICE_RECLASSIFY = TC_ACT_RECLASSIFY
TC_POLICE_SHOT = TC_ACT_SHOT
TC_POLICE_PIPE = TC_ACT_PIPE
)
// struct tc_police {
// __u32 index;
// int action;
// __u32 limit;
// __u32 burst;
// __u32 mtu;
// struct tc_ratespec rate;
// struct tc_ratespec peakrate;
// int refcnt;
// int bindcnt;
// __u32 capab;
// };
type TcPolice struct {
Index uint32
Action int32
Limit uint32
Burst uint32
Mtu uint32
Rate TcRateSpec
PeakRate TcRateSpec
Refcnt int32
Bindcnt int32
Capab uint32
}
func (msg *TcPolice) Len() int {
return SizeofTcPolice
}
func DeserializeTcPolice(b []byte) *TcPolice {
return (*TcPolice)(unsafe.Pointer(&b[0:SizeofTcPolice][0]))
}
func (x *TcPolice) Serialize() []byte {
return (*(*[SizeofTcPolice]byte)(unsafe.Pointer(x)))[:]
}
const (
TCA_FW_UNSPEC = iota
TCA_FW_CLASSID
TCA_FW_POLICE
TCA_FW_INDEV
TCA_FW_ACT
TCA_FW_MASK
TCA_FW_MAX = TCA_FW_MASK
)

View file

@ -0,0 +1,167 @@
package netlink
import (
"fmt"
)
const (
HANDLE_NONE = 0
HANDLE_INGRESS = 0xFFFFFFF1
HANDLE_ROOT = 0xFFFFFFFF
PRIORITY_MAP_LEN = 16
)
type Qdisc interface {
Attrs() *QdiscAttrs
Type() string
}
// Qdisc represents a netlink qdisc. A qdisc is associated with a link,
// has a handle, a parent and a refcnt. The root qdisc of a device should
// have parent == HANDLE_ROOT.
type QdiscAttrs struct {
LinkIndex int
Handle uint32
Parent uint32
Refcnt uint32 // read only
}
func (q QdiscAttrs) String() string {
return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Refcnt: %s}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Refcnt)
}
func MakeHandle(major, minor uint16) uint32 {
return (uint32(major) << 16) | uint32(minor)
}
func MajorMinor(handle uint32) (uint16, uint16) {
return uint16((handle & 0xFFFF0000) >> 16), uint16(handle & 0x0000FFFFF)
}
func HandleStr(handle uint32) string {
switch handle {
case HANDLE_NONE:
return "none"
case HANDLE_INGRESS:
return "ingress"
case HANDLE_ROOT:
return "root"
default:
major, minor := MajorMinor(handle)
return fmt.Sprintf("%x:%x", major, minor)
}
}
// PfifoFast is the default qdisc created by the kernel if one has not
// been defined for the interface
type PfifoFast struct {
QdiscAttrs
Bands uint8
PriorityMap [PRIORITY_MAP_LEN]uint8
}
func (qdisc *PfifoFast) Attrs() *QdiscAttrs {
return &qdisc.QdiscAttrs
}
func (qdisc *PfifoFast) Type() string {
return "pfifo_fast"
}
// Prio is a basic qdisc that works just like PfifoFast
type Prio struct {
QdiscAttrs
Bands uint8
PriorityMap [PRIORITY_MAP_LEN]uint8
}
func NewPrio(attrs QdiscAttrs) *Prio {
return &Prio{
QdiscAttrs: attrs,
Bands: 3,
PriorityMap: [PRIORITY_MAP_LEN]uint8{1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1},
}
}
func (qdisc *Prio) Attrs() *QdiscAttrs {
return &qdisc.QdiscAttrs
}
func (qdisc *Prio) Type() string {
return "prio"
}
// Htb is a classful qdisc that rate limits based on tokens
type Htb struct {
QdiscAttrs
Version uint32
Rate2Quantum uint32
Defcls uint32
Debug uint32
DirectPkts uint32
}
func NewHtb(attrs QdiscAttrs) *Htb {
return &Htb{
QdiscAttrs: attrs,
Version: 3,
Defcls: 0,
Rate2Quantum: 10,
Debug: 0,
DirectPkts: 0,
}
}
func (qdisc *Htb) Attrs() *QdiscAttrs {
return &qdisc.QdiscAttrs
}
func (qdisc *Htb) Type() string {
return "htb"
}
// Tbf is a classless qdisc that rate limits based on tokens
type Tbf struct {
QdiscAttrs
// TODO: handle 64bit rate properly
Rate uint64
Limit uint32
Buffer uint32
// TODO: handle other settings
}
func (qdisc *Tbf) Attrs() *QdiscAttrs {
return &qdisc.QdiscAttrs
}
func (qdisc *Tbf) Type() string {
return "tbf"
}
// Ingress is a qdisc for adding ingress filters
type Ingress struct {
QdiscAttrs
}
func (qdisc *Ingress) Attrs() *QdiscAttrs {
return &qdisc.QdiscAttrs
}
func (qdisc *Ingress) Type() string {
return "ingress"
}
// GenericQdisc qdiscs represent types that are not currently understood
// by this netlink library.
type GenericQdisc struct {
QdiscAttrs
QdiscType string
}
func (qdisc *GenericQdisc) Attrs() *QdiscAttrs {
return &qdisc.QdiscAttrs
}
func (qdisc *GenericQdisc) Type() string {
return qdisc.QdiscType
}

View file

@ -0,0 +1,316 @@
package netlink
import (
"fmt"
"io/ioutil"
"strconv"
"strings"
"syscall"
"github.com/vishvananda/netlink/nl"
)
// QdiscDel will delete a qdisc from the system.
// Equivalent to: `tc qdisc del $qdisc`
func QdiscDel(qdisc Qdisc) error {
req := nl.NewNetlinkRequest(syscall.RTM_DELQDISC, syscall.NLM_F_ACK)
base := qdisc.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Ifindex: int32(base.LinkIndex),
Handle: base.Handle,
Parent: base.Parent,
}
req.AddData(msg)
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
return err
}
// QdiscAdd will add a qdisc to the system.
// Equivalent to: `tc qdisc add $qdisc`
func QdiscAdd(qdisc Qdisc) error {
req := nl.NewNetlinkRequest(syscall.RTM_NEWQDISC, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
base := qdisc.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Ifindex: int32(base.LinkIndex),
Handle: base.Handle,
Parent: base.Parent,
}
req.AddData(msg)
req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type())))
options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
if prio, ok := qdisc.(*Prio); ok {
tcmap := nl.TcPrioMap{
Bands: int32(prio.Bands),
Priomap: prio.PriorityMap,
}
options = nl.NewRtAttr(nl.TCA_OPTIONS, tcmap.Serialize())
} else if tbf, ok := qdisc.(*Tbf); ok {
opt := nl.TcTbfQopt{}
// TODO: handle rate > uint32
opt.Rate.Rate = uint32(tbf.Rate)
opt.Limit = tbf.Limit
opt.Buffer = tbf.Buffer
nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize())
} else if htb, ok := qdisc.(*Htb); ok {
opt := nl.TcHtbGlob{}
opt.Version = htb.Version
opt.Rate2Quantum = htb.Rate2Quantum
opt.Defcls = htb.Defcls
// TODO: Handle Debug properly. For now default to 0
opt.Debug = htb.Debug
opt.DirectPkts = htb.DirectPkts
nl.NewRtAttrChild(options, nl.TCA_HTB_INIT, opt.Serialize())
// nl.NewRtAttrChild(options, nl.TCA_HTB_DIRECT_QLEN, opt.Serialize())
} else if _, ok := qdisc.(*Ingress); ok {
// ingress filters must use the proper handle
if msg.Parent != HANDLE_INGRESS {
return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS")
}
}
req.AddData(options)
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
return err
}
// QdiscList gets a list of qdiscs in the system.
// Equivalent to: `tc qdisc show`.
// The list can be filtered by link.
func QdiscList(link Link) ([]Qdisc, error) {
req := nl.NewNetlinkRequest(syscall.RTM_GETQDISC, syscall.NLM_F_DUMP)
index := int32(0)
if link != nil {
base := link.Attrs()
ensureIndex(base)
index = int32(base.Index)
}
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Ifindex: index,
}
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWQDISC)
if err != nil {
return nil, err
}
var res []Qdisc
for _, m := range msgs {
msg := nl.DeserializeTcMsg(m)
attrs, err := nl.ParseRouteAttr(m[msg.Len():])
if err != nil {
return nil, err
}
// skip qdiscs from other interfaces
if link != nil && msg.Ifindex != index {
continue
}
base := QdiscAttrs{
LinkIndex: int(msg.Ifindex),
Handle: msg.Handle,
Parent: msg.Parent,
Refcnt: msg.Info,
}
var qdisc Qdisc
qdiscType := ""
for _, attr := range attrs {
switch attr.Attr.Type {
case nl.TCA_KIND:
qdiscType = string(attr.Value[:len(attr.Value)-1])
switch qdiscType {
case "pfifo_fast":
qdisc = &PfifoFast{}
case "prio":
qdisc = &Prio{}
case "tbf":
qdisc = &Tbf{}
case "ingress":
qdisc = &Ingress{}
case "htb":
qdisc = &Htb{}
default:
qdisc = &GenericQdisc{QdiscType: qdiscType}
}
case nl.TCA_OPTIONS:
switch qdiscType {
case "pfifo_fast":
// pfifo returns TcPrioMap directly without wrapping it in rtattr
if err := parsePfifoFastData(qdisc, attr.Value); err != nil {
return nil, err
}
case "prio":
// prio returns TcPrioMap directly without wrapping it in rtattr
if err := parsePrioData(qdisc, attr.Value); err != nil {
return nil, err
}
case "tbf":
data, err := nl.ParseRouteAttr(attr.Value)
if err != nil {
return nil, err
}
if err := parseTbfData(qdisc, data); err != nil {
return nil, err
}
case "htb":
data, err := nl.ParseRouteAttr(attr.Value)
if err != nil {
return nil, err
}
if err := parseHtbData(qdisc, data); err != nil {
return nil, err
}
// no options for ingress
}
}
}
*qdisc.Attrs() = base
res = append(res, qdisc)
}
return res, nil
}
func parsePfifoFastData(qdisc Qdisc, value []byte) error {
pfifo := qdisc.(*PfifoFast)
tcmap := nl.DeserializeTcPrioMap(value)
pfifo.PriorityMap = tcmap.Priomap
pfifo.Bands = uint8(tcmap.Bands)
return nil
}
func parsePrioData(qdisc Qdisc, value []byte) error {
prio := qdisc.(*Prio)
tcmap := nl.DeserializeTcPrioMap(value)
prio.PriorityMap = tcmap.Priomap
prio.Bands = uint8(tcmap.Bands)
return nil
}
func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
native = nl.NativeEndian()
htb := qdisc.(*Htb)
for _, datum := range data {
switch datum.Attr.Type {
case nl.TCA_HTB_INIT:
opt := nl.DeserializeTcHtbGlob(datum.Value)
htb.Version = opt.Version
htb.Rate2Quantum = opt.Rate2Quantum
htb.Defcls = opt.Defcls
htb.Debug = opt.Debug
htb.DirectPkts = opt.DirectPkts
case nl.TCA_HTB_DIRECT_QLEN:
// TODO
//htb.DirectQlen = native.uint32(datum.Value)
}
}
return nil
}
func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
native = nl.NativeEndian()
tbf := qdisc.(*Tbf)
for _, datum := range data {
switch datum.Attr.Type {
case nl.TCA_TBF_PARMS:
opt := nl.DeserializeTcTbfQopt(datum.Value)
tbf.Rate = uint64(opt.Rate.Rate)
tbf.Limit = opt.Limit
tbf.Buffer = opt.Buffer
case nl.TCA_TBF_RATE64:
tbf.Rate = native.Uint64(datum.Value[0:4])
}
}
return nil
}
const (
TIME_UNITS_PER_SEC = 1000000
)
var (
tickInUsec float64 = 0.0
clockFactor float64 = 0.0
hz float64 = 0.0
)
func initClock() {
data, err := ioutil.ReadFile("/proc/net/psched")
if err != nil {
return
}
parts := strings.Split(strings.TrimSpace(string(data)), " ")
if len(parts) < 3 {
return
}
var vals [3]uint64
for i := range vals {
val, err := strconv.ParseUint(parts[i], 16, 32)
if err != nil {
return
}
vals[i] = val
}
// compatibility
if vals[2] == 1000000000 {
vals[0] = vals[1]
}
clockFactor = float64(vals[2]) / TIME_UNITS_PER_SEC
tickInUsec = float64(vals[0]) / float64(vals[1]) * clockFactor
hz = float64(vals[0])
}
func TickInUsec() float64 {
if tickInUsec == 0.0 {
initClock()
}
return tickInUsec
}
func ClockFactor() float64 {
if clockFactor == 0.0 {
initClock()
}
return clockFactor
}
func Hz() float64 {
if hz == 0.0 {
initClock()
}
return hz
}
func time2Tick(time uint32) uint32 {
return uint32(float64(time) * TickInUsec())
}
func tick2Time(tick uint32) uint32 {
return uint32(float64(tick) / TickInUsec())
}
func time2Ktime(time uint32) uint32 {
return uint32(float64(time) * ClockFactor())
}
func ktime2Time(ktime uint32) uint32 {
return uint32(float64(ktime) / ClockFactor())
}
func burst(rate uint64, buffer uint32) uint32 {
return uint32(float64(rate) * float64(tick2Time(buffer)) / TIME_UNITS_PER_SEC)
}
func latency(rate uint64, limit, buffer uint32) float64 {
return TIME_UNITS_PER_SEC*(float64(limit)/float64(rate)) - float64(tick2Time(buffer))
}
func Xmittime(rate uint64, size uint32) float64 {
return TickInUsec() * TIME_UNITS_PER_SEC * (float64(size) / float64(rate))
}

View file

@ -17,6 +17,13 @@ const (
SCOPE_NOWHERE Scope = syscall.RT_SCOPE_NOWHERE SCOPE_NOWHERE Scope = syscall.RT_SCOPE_NOWHERE
) )
type NextHopFlag int
const (
FLAG_ONLINK NextHopFlag = syscall.RTNH_F_ONLINK
FLAG_PERVASIVE NextHopFlag = syscall.RTNH_F_PERVASIVE
)
// Route represents a netlink route. A route is associated with a link, // Route represents a netlink route. A route is associated with a link,
// has a destination network, an optional source ip, and optional // has a destination network, an optional source ip, and optional
// gateway. Advanced route parameters and non-main routing tables are // gateway. Advanced route parameters and non-main routing tables are
@ -27,9 +34,44 @@ type Route struct {
Dst *net.IPNet Dst *net.IPNet
Src net.IP Src net.IP
Gw net.IP Gw net.IP
Flags int
} }
func (r Route) String() string { func (r Route) String() string {
return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s}", r.LinkIndex, r.Dst, return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s Flags: %s}", r.LinkIndex, r.Dst,
r.Src, r.Gw) r.Src, r.Gw, r.ListFlags())
}
func (r *Route) SetFlag(flag NextHopFlag) {
r.Flags |= int(flag)
}
func (r *Route) ClearFlag(flag NextHopFlag) {
r.Flags &^= int(flag)
}
type flagString struct {
f NextHopFlag
s string
}
var testFlags = []flagString{
flagString{f: FLAG_ONLINK, s: "onlink"},
flagString{f: FLAG_PERVASIVE, s: "pervasive"},
}
func (r *Route) ListFlags() []string {
var flags []string
for _, tf := range testFlags {
if r.Flags&int(tf.f) != 0 {
flags = append(flags, tf.s)
}
}
return flags
}
// RouteUpdate is sent when a route changes - type is RTM_NEWROUTE or RTM_DELROUTE
type RouteUpdate struct {
Type uint16
Route
} }

View file

@ -17,7 +17,7 @@ func RouteAdd(route *Route) error {
return routeHandle(route, req, nl.NewRtMsg()) return routeHandle(route, req, nl.NewRtMsg())
} }
// RouteAdd will delete a route from the system. // RouteDel will delete a route from the system.
// Equivalent to: `ip route del $route` // Equivalent to: `ip route del $route`
func RouteDel(route *Route) error { func RouteDel(route *Route) error {
req := nl.NewNetlinkRequest(syscall.RTM_DELROUTE, syscall.NLM_F_ACK) req := nl.NewNetlinkRequest(syscall.RTM_DELROUTE, syscall.NLM_F_ACK)
@ -30,6 +30,7 @@ func routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error {
} }
msg.Scope = uint8(route.Scope) msg.Scope = uint8(route.Scope)
msg.Flags = uint32(route.Flags)
family := -1 family := -1
var rtAttrs []*nl.RtAttr var rtAttrs []*nl.RtAttr
@ -117,7 +118,6 @@ func RouteList(link Link, family int) ([]Route, error) {
index = base.Index index = base.Index
} }
native := nl.NativeEndian()
var res []Route var res []Route
for _, m := range msgs { for _, m := range msgs {
msg := nl.DeserializeRtMsg(m) msg := nl.DeserializeRtMsg(m)
@ -132,31 +132,14 @@ func RouteList(link Link, family int) ([]Route, error) {
continue continue
} }
attrs, err := nl.ParseRouteAttr(m[msg.Len():]) route, err := deserializeRoute(m)
if err != nil { if err != nil {
return nil, err return nil, err
} }
route := Route{Scope: Scope(msg.Scope)} if link != nil && route.LinkIndex != index {
for _, attr := range attrs { // Ignore routes from other interfaces
switch attr.Attr.Type { continue
case syscall.RTA_GATEWAY:
route.Gw = net.IP(attr.Value)
case syscall.RTA_PREFSRC:
route.Src = net.IP(attr.Value)
case syscall.RTA_DST:
route.Dst = &net.IPNet{
IP: attr.Value,
Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
}
case syscall.RTA_OIF:
routeIndex := int(native.Uint32(attr.Value[0:4]))
if link != nil && routeIndex != index {
// Ignore routes from other interfaces
continue
}
route.LinkIndex = routeIndex
}
} }
res = append(res, route) res = append(res, route)
} }
@ -164,6 +147,37 @@ func RouteList(link Link, family int) ([]Route, error) {
return res, nil return res, nil
} }
// deserializeRoute decodes a binary netlink message into a Route struct
func deserializeRoute(m []byte) (Route, error) {
route := Route{}
msg := nl.DeserializeRtMsg(m)
attrs, err := nl.ParseRouteAttr(m[msg.Len():])
if err != nil {
return route, err
}
route.Scope = Scope(msg.Scope)
route.Flags = int(msg.Flags)
native := nl.NativeEndian()
for _, attr := range attrs {
switch attr.Attr.Type {
case syscall.RTA_GATEWAY:
route.Gw = net.IP(attr.Value)
case syscall.RTA_PREFSRC:
route.Src = net.IP(attr.Value)
case syscall.RTA_DST:
route.Dst = &net.IPNet{
IP: attr.Value,
Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
}
case syscall.RTA_OIF:
routeIndex := int(native.Uint32(attr.Value[0:4]))
route.LinkIndex = routeIndex
}
}
return route, nil
}
// RouteGet gets a route to a specific destination from the host system. // RouteGet gets a route to a specific destination from the host system.
// Equivalent to: 'ip route get'. // Equivalent to: 'ip route get'.
func RouteGet(destination net.IP) ([]Route, error) { func RouteGet(destination net.IP) ([]Route, error) {
@ -191,34 +205,47 @@ func RouteGet(destination net.IP) ([]Route, error) {
return nil, err return nil, err
} }
native := nl.NativeEndian()
var res []Route var res []Route
for _, m := range msgs { for _, m := range msgs {
msg := nl.DeserializeRtMsg(m) route, err := deserializeRoute(m)
attrs, err := nl.ParseRouteAttr(m[msg.Len():])
if err != nil { if err != nil {
return nil, err return nil, err
} }
route := Route{}
for _, attr := range attrs {
switch attr.Attr.Type {
case syscall.RTA_GATEWAY:
route.Gw = net.IP(attr.Value)
case syscall.RTA_PREFSRC:
route.Src = net.IP(attr.Value)
case syscall.RTA_DST:
route.Dst = &net.IPNet{
IP: attr.Value,
Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
}
case syscall.RTA_OIF:
routeIndex := int(native.Uint32(attr.Value[0:4]))
route.LinkIndex = routeIndex
}
}
res = append(res, route) res = append(res, route)
} }
return res, nil return res, nil
} }
// RouteSubscribe takes a chan down which notifications will be sent
// when routes are added or deleted. Close the 'done' chan to stop subscription.
func RouteSubscribe(ch chan<- RouteUpdate, done <-chan struct{}) error {
s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_ROUTE, syscall.RTNLGRP_IPV6_ROUTE)
if err != nil {
return err
}
if done != nil {
go func() {
<-done
s.Close()
}()
}
go func() {
defer close(ch)
for {
msgs, err := s.Receive()
if err != nil {
return
}
for _, m := range msgs {
route, err := deserializeRoute(m.Data)
if err != nil {
return
}
ch <- RouteUpdate{Type: m.Header.Type, Route: route}
}
}
}()
return nil
}