diff --git a/hack/vendor.sh b/hack/vendor.sh index 1164812af8..ed6b6e0dc1 100755 --- a/hack/vendor.sh +++ b/hack/vendor.sh @@ -22,14 +22,14 @@ clone git github.com/vdemeester/shakers 3c10293ce22b900c27acad7b28656196fcc2f73b clone git golang.org/x/net 47990a1ba55743e6ef1affd3a14e5bac8553615d https://github.com/golang/net.git #get libnetwork packages -clone git github.com/docker/libnetwork 04cc1fa0a89f8c407b7be8cab883d4b17531ea7d +clone git github.com/docker/libnetwork bbd6e6d8ca1e7c9b42f6f53277b0bde72847ff90 clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec clone git github.com/hashicorp/go-msgpack 71c2886f5a673a35f909803f38ece5810165097b clone git github.com/hashicorp/memberlist 9a1e242e454d2443df330bdd51a436d5a9058fc4 clone git github.com/hashicorp/serf 7151adcef72687bf95f451a2e0ba15cb19412bf2 clone git github.com/docker/libkv c2aac5dbbaa5c872211edea7c0f32b3bd67e7410 clone git github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25 -clone git github.com/vishvananda/netlink 4b5dce31de6d42af5bb9811c6d265472199e0fec +clone git github.com/vishvananda/netlink 8e810149a2e531fed9b837c0c7d8a8922d2bedf7 clone git github.com/BurntSushi/toml f706d00e3de6abe700c994cdd545a1a4915af060 clone git github.com/samuel/go-zookeeper d0e0d8e11f318e000a8cc434616d69e329edc374 clone git github.com/deckarep/golang-set ef32fa3046d9f249d399f98ebaf9be944430fd1d diff --git a/integration-cli/docker_cli_network_unix_test.go b/integration-cli/docker_cli_network_unix_test.go index d8db5216d6..c3b353c5a0 100644 --- a/integration-cli/docker_cli_network_unix_test.go +++ b/integration-cli/docker_cli_network_unix_test.go @@ -569,6 +569,9 @@ func (s *DockerDaemonSuite) TestDockerNetworkNoDiscoveryDefaultBridgeNetwork(c * out, err = s.d.Cmd("network", "connect", network, cid1) c.Assert(err, check.IsNil, check.Commentf(out)) + hosts, err = s.d.Cmd("exec", cid1, "cat", hostsFile) + c.Assert(err, checker.IsNil) + hostsPost, err = s.d.Cmd("exec", cid1, "cat", hostsFile) c.Assert(err, checker.IsNil) c.Assert(string(hosts), checker.Equals, string(hostsPost), @@ -631,6 +634,9 @@ func (s *DockerNetworkSuite) TestDockerNetworkAnonymousEndpoint(c *check.C) { dockerCmd(c, "network", "connect", cstmBridgeNw1, cid2) + hosts2, err = readContainerFileWithExec(cid2, hostsFile) + c.Assert(err, checker.IsNil) + hosts1post, err = readContainerFileWithExec(cid1, hostsFile) c.Assert(err, checker.IsNil) c.Assert(string(hosts1), checker.Equals, string(hosts1post), diff --git a/vendor/src/github.com/docker/libnetwork/MAINTAINERS b/vendor/src/github.com/docker/libnetwork/MAINTAINERS index 69f1e9b880..33f2dd2e18 100644 --- a/vendor/src/github.com/docker/libnetwork/MAINTAINERS +++ b/vendor/src/github.com/docker/libnetwork/MAINTAINERS @@ -1,5 +1,52 @@ -Alessandro Boch (@aboch) -Alexandr Morozov (@LK4D4) -Arnaud Porterie (@icecrime) -Jana Radhakrishnan (@mrjana) -Madhu Venugopal (@mavenugo) +# Libnetwork maintainers file +# +# This file describes who runs the docker/libnetwork project and how. +# This is a living document - if you see something out of date or missing, speak up! +# +# It is structured to be consumable by both humans and programs. +# To extract its contents programmatically, use any TOML-compliant parser. +# +# This file is compiled into the MAINTAINERS file in docker/opensource. +# +[Org] + [Org."Core maintainers"] + people = [ + "aboch", + "LK4D4", + "icecrime", + "mrjana", + "mavenugo", + ] + +[people] + +# A reference list of all people associated with the project. +# All other sections should refer to people by their canonical key +# in the people section. + + # ADD YOURSELF HERE IN ALPHABETICAL ORDER + + [people.aboch] + Name = "Alessandro Boch" + Email = "aboch@docker.com" + GitHub = "aboch" + + [people.LK4D4] + Name = "Alexandr Morozov" + Email = "lk4d4@docker.com" + GitHub = "LK4D4" + + [people.icecrime] + Name = "Arnaud Porterie" + Email = "arnaud@docker.com" + GitHub = "icecrime" + + [people.mrjana] + Name = "Jana Radhakrishnan" + Email = "mrjana@docker.com" + GitHub = "mrjana" + + [people.mavenugo] + Name = "Madhu Venugopal" + Email = "madhu@docker.com" + GitHub = "mavenugo" diff --git a/vendor/src/github.com/docker/libnetwork/README.md b/vendor/src/github.com/docker/libnetwork/README.md index aa3fcbce4c..cebf96af25 100644 --- a/vendor/src/github.com/docker/libnetwork/README.md +++ b/vendor/src/github.com/docker/libnetwork/README.md @@ -6,8 +6,6 @@ Libnetwork provides a native Go implementation for connecting containers The goal of libnetwork is to deliver a robust Container Network Model that provides a consistent programming interface and the required network abstractions for applications. -**NOTE**: libnetwork project is under heavy development and is not ready for general use. - #### Design Please refer to the [design](docs/design.md) for more information. @@ -67,10 +65,6 @@ There are many networking solutions available to suit a broad range of use-cases } } ``` -#### Current Status -Please watch this space for updates on the progress. - -Currently libnetwork is nothing more than an attempt to modularize the Docker platform's networking subsystem by moving it into libnetwork as a library. ## Future Please refer to [roadmap](ROADMAP.md) for more information. diff --git a/vendor/src/github.com/docker/libnetwork/bitseq/sequence.go b/vendor/src/github.com/docker/libnetwork/bitseq/sequence.go index 553e150c26..a537ed0107 100644 --- a/vendor/src/github.com/docker/libnetwork/bitseq/sequence.go +++ b/vendor/src/github.com/docker/libnetwork/bitseq/sequence.go @@ -24,7 +24,10 @@ const ( ) var ( - errNoBitAvailable = fmt.Errorf("no bit available") + // ErrNoBitAvailable is returned when no more bits are available to set + ErrNoBitAvailable = fmt.Errorf("no bit available") + // ErrBitAllocated is returned when the specific bit requested is already set + ErrBitAllocated = fmt.Errorf("requested bit is already allocated") ) // Handle contains the sequece representing the bitmask and its identifier @@ -94,7 +97,7 @@ func (s *sequence) toString() string { // GetAvailableBit returns the position of the first unset bit in the bitmask represented by this sequence func (s *sequence) getAvailableBit(from uint64) (uint64, uint64, error) { if s.block == blockMAX || s.count == 0 { - return invalidPos, invalidPos, errNoBitAvailable + return invalidPos, invalidPos, ErrNoBitAvailable } bits := from bitSel := blockFirstBit >> from @@ -197,7 +200,7 @@ func (h *Handle) SetAnyInRange(start, end uint64) (uint64, error) { return invalidPos, fmt.Errorf("invalid bit range [%d, %d]", start, end) } if h.Unselected() == 0 { - return invalidPos, errNoBitAvailable + return invalidPos, ErrNoBitAvailable } return h.set(0, start, end, true, false) } @@ -205,7 +208,7 @@ func (h *Handle) SetAnyInRange(start, end uint64) (uint64, error) { // SetAny atomically sets the first unset bit in the sequence and returns the corresponding ordinal func (h *Handle) SetAny() (uint64, error) { if h.Unselected() == 0 { - return invalidPos, errNoBitAvailable + return invalidPos, ErrNoBitAvailable } return h.set(0, 0, h.bits-1, true, false) } @@ -250,8 +253,12 @@ func (h *Handle) set(ordinal, start, end uint64, any bool, release bool) (uint64 ) for { - if h.store != nil { - if err := h.store.GetObject(datastore.Key(h.Key()...), h); err != nil && err != datastore.ErrKeyNotFound { + var store datastore.DataStore + h.Lock() + store = h.store + h.Unlock() + if store != nil { + if err := store.GetObject(datastore.Key(h.Key()...), h); err != nil && err != datastore.ErrKeyNotFound { return ret, err } } @@ -265,7 +272,7 @@ func (h *Handle) set(ordinal, start, end uint64, any bool, release bool) (uint64 bytePos, bitPos, err = getFirstAvailable(h.head, start) ret = posToOrdinal(bytePos, bitPos) if end < ret { - err = errNoBitAvailable + err = ErrNoBitAvailable } } else { bytePos, bitPos, err = checkIfAvailable(h.head, ordinal) @@ -445,7 +452,7 @@ func getFirstAvailable(head *sequence, start uint64) (uint64, uint64, error) { byteOffset += current.count * blockBytes current = current.next } - return invalidPos, invalidPos, errNoBitAvailable + return invalidPos, invalidPos, ErrNoBitAvailable } // checkIfAvailable checks if the bit correspondent to the specified ordinal is unset @@ -463,7 +470,7 @@ func checkIfAvailable(head *sequence, ordinal uint64) (uint64, uint64, error) { } } - return invalidPos, invalidPos, fmt.Errorf("requested bit is not available") + return invalidPos, invalidPos, ErrBitAllocated } // Given the byte position and the sequences list head, return the pointer to the diff --git a/vendor/src/github.com/docker/libnetwork/circle.yml b/vendor/src/github.com/docker/libnetwork/circle.yml index edeee1ceec..a454d21201 100644 --- a/vendor/src/github.com/docker/libnetwork/circle.yml +++ b/vendor/src/github.com/docker/libnetwork/circle.yml @@ -4,7 +4,7 @@ machine: dependencies: override: - - sudo apt-get update; sudo apt-get install -y iptables zookeeperd + - sudo apt-get update; sudo apt-get install -y iptables zookeeperd - go get golang.org/x/tools/cmd/vet - go get golang.org/x/tools/cmd/goimports - go get golang.org/x/tools/cmd/cover diff --git a/vendor/src/github.com/docker/libnetwork/controller.go b/vendor/src/github.com/docker/libnetwork/controller.go index e1d8277fac..be9b726a28 100644 --- a/vendor/src/github.com/docker/libnetwork/controller.go +++ b/vendor/src/github.com/docker/libnetwork/controller.go @@ -315,17 +315,17 @@ func (c *controller) RegisterIpamDriver(name string, driver ipamapi.Ipam) error _, ok := c.ipamDrivers[name] c.Unlock() if ok { - return driverapi.ErrActiveRegistration(name) + return types.ForbiddenErrorf("ipam driver %q already registered", name) } locAS, glbAS, err := driver.GetDefaultAddressSpaces() if err != nil { - return fmt.Errorf("ipam driver %s failed to return default address spaces: %v", name, err) + return types.InternalErrorf("ipam driver %q failed to return default address spaces: %v", name, err) } c.Lock() c.ipamDrivers[name] = &ipamData{driver: driver, defaultLocalAddressSpace: locAS, defaultGlobalAddressSpace: glbAS} c.Unlock() - log.Debugf("Registering ipam provider: %s", name) + log.Debugf("Registering ipam driver: %q", name) return nil } @@ -667,7 +667,7 @@ func (c *controller) loadIpamDriver(name string) (*ipamData, error) { id, ok := c.ipamDrivers[name] c.Unlock() if !ok { - return nil, ErrInvalidNetworkDriver(name) + return nil, types.BadRequestErrorf("invalid ipam driver: %q", name) } return id, nil } diff --git a/vendor/src/github.com/docker/libnetwork/default_gateway.go b/vendor/src/github.com/docker/libnetwork/default_gateway.go index 5d58b06175..139242c6a5 100644 --- a/vendor/src/github.com/docker/libnetwork/default_gateway.go +++ b/vendor/src/github.com/docker/libnetwork/default_gateway.go @@ -103,10 +103,18 @@ func (sb *sandbox) needDefaultGW() bool { if ep.getNetwork().Type() == "null" || ep.getNetwork().Type() == "host" { continue } + if ep.joinInfo.disableGatewayService { + return false + } // TODO v6 needs to be handled. if len(ep.Gateway()) > 0 { return false } + for _, r := range ep.StaticRoutes() { + if r.Destination.String() == "0.0.0.0/0" { + return false + } + } needGW = true } return needGW diff --git a/vendor/src/github.com/docker/libnetwork/driverapi/driverapi.go b/vendor/src/github.com/docker/libnetwork/driverapi/driverapi.go index bd311d0035..44a937fb73 100644 --- a/vendor/src/github.com/docker/libnetwork/driverapi/driverapi.go +++ b/vendor/src/github.com/docker/libnetwork/driverapi/driverapi.go @@ -91,6 +91,9 @@ type JoinInfo interface { // AddStaticRoute adds a routes to the sandbox. // It may be used in addtion to or instead of a default gateway (as above). AddStaticRoute(destination *net.IPNet, routeType int, nextHop net.IP) error + + // DisableGatewayService tells libnetwork not to provide Default GW for the container + DisableGatewayService() } // DriverCallback provides a Callback interface for Drivers into LibNetwork diff --git a/vendor/src/github.com/docker/libnetwork/drivers/overlay/ov_serf.go b/vendor/src/github.com/docker/libnetwork/drivers/overlay/ov_serf.go index a10bbf8e33..e3f373c0d0 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/overlay/ov_serf.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/overlay/ov_serf.go @@ -151,6 +151,10 @@ func (d *driver) processQuery(q *serf.Query) { } func (d *driver) resolvePeer(nid string, peerIP net.IP) (net.HardwareAddr, net.IPMask, net.IP, error) { + if d.serfInstance == nil { + return nil, nil, nil, fmt.Errorf("could not resolve peer: serf instance not initialized") + } + qPayload := fmt.Sprintf("%s %s", string(nid), peerIP.String()) resp, err := d.serfInstance.Query("peerlookup", []byte(qPayload), nil) if err != nil { diff --git a/vendor/src/github.com/docker/libnetwork/drivers/remote/api/api.go b/vendor/src/github.com/docker/libnetwork/drivers/remote/api/api.go index 1d7742e21a..6c9fb09521 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/remote/api/api.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/remote/api/api.go @@ -134,10 +134,11 @@ type StaticRoute struct { // JoinResponse is the response to a JoinRequest. type JoinResponse struct { Response - InterfaceName *InterfaceName - Gateway string - GatewayIPv6 string - StaticRoutes []StaticRoute + InterfaceName *InterfaceName + Gateway string + GatewayIPv6 string + StaticRoutes []StaticRoute + DisableGatewayService bool } // LeaveRequest describes the API for detaching an endpoint from a sandbox. diff --git a/vendor/src/github.com/docker/libnetwork/drivers/remote/driver.go b/vendor/src/github.com/docker/libnetwork/drivers/remote/driver.go index a10698e317..0a7ab1865c 100644 --- a/vendor/src/github.com/docker/libnetwork/drivers/remote/driver.go +++ b/vendor/src/github.com/docker/libnetwork/drivers/remote/driver.go @@ -231,6 +231,9 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, } } } + if res.DisableGatewayService { + jinfo.DisableGatewayService() + } return nil } diff --git a/vendor/src/github.com/docker/libnetwork/endpoint.go b/vendor/src/github.com/docker/libnetwork/endpoint.go index 865f2aab70..524287fc8f 100644 --- a/vendor/src/github.com/docker/libnetwork/endpoint.go +++ b/vendor/src/github.com/docker/libnetwork/endpoint.go @@ -60,6 +60,8 @@ type endpoint struct { anonymous bool generic map[string]interface{} joinLeaveDone chan struct{} + prefAddress net.IP + ipamOptions map[string]string dbIndex uint64 dbExists bool sync.Mutex @@ -386,6 +388,9 @@ func (ep *endpoint) sbJoin(sbox Sandbox, options ...EndpointOption) error { } }() + // Watch for service records + network.getController().watchSvcRecord(ep) + address := "" if ip := ep.getFirstInterfaceAddress(); ip != nil { address = ip.String() @@ -394,9 +399,6 @@ func (ep *endpoint) sbJoin(sbox Sandbox, options ...EndpointOption) error { return err } - // Watch for service records - network.getController().watchSvcRecord(ep) - if err = sb.updateDNS(network.enableIPv6); err != nil { return err } @@ -559,7 +561,7 @@ func (ep *endpoint) sbLeave(sbox Sandbox, options ...EndpointOption) error { sb.deleteHostsEntries(n.getSvcRecords(ep)) - if sb.needDefaultGW() { + if !sb.inDelete && sb.needDefaultGW() { ep := sb.getEPwithoutGateway() if ep == nil { return fmt.Errorf("endpoint without GW expected, but not found") @@ -685,6 +687,14 @@ func EndpointOptionGeneric(generic map[string]interface{}) EndpointOption { } } +// CreateOptionIpam function returns an option setter for the ipam configuration for this endpoint +func CreateOptionIpam(prefAddress net.IP, ipamOptions map[string]string) EndpointOption { + return func(ep *endpoint) { + ep.prefAddress = prefAddress + ep.ipamOptions = ipamOptions + } +} + // CreateOptionExposedPorts function returns an option setter for the container exposed // ports option to be passed to network.CreateEndpoint() method. func CreateOptionExposedPorts(exposedPorts []types.TransportPort) EndpointOption { @@ -799,7 +809,7 @@ func (ep *endpoint) assignAddressVersion(ipVer int, ipam ipamapi.Ipam) error { if *address != nil { prefIP = (*address).IP } - addr, _, err := ipam.RequestAddress(d.PoolID, prefIP, nil) + addr, _, err := ipam.RequestAddress(d.PoolID, prefIP, ep.ipamOptions) if err == nil { ep.Lock() *address = addr diff --git a/vendor/src/github.com/docker/libnetwork/endpoint_info.go b/vendor/src/github.com/docker/libnetwork/endpoint_info.go index 1028308557..624bc533c5 100644 --- a/vendor/src/github.com/docker/libnetwork/endpoint_info.go +++ b/vendor/src/github.com/docker/libnetwork/endpoint_info.go @@ -25,6 +25,10 @@ type EndpointInfo interface { // This will only return a valid value if a container has joined the endpoint. GatewayIPv6() net.IP + // StaticRoutes returns the list of static routes configured by the network + // driver when the container joins a network + StaticRoutes() []*types.StaticRoute + // Sandbox returns the attached sandbox if there, nil otherwise. Sandbox() Sandbox } @@ -136,9 +140,10 @@ func (epi *endpointInterface) CopyTo(dstEpi *endpointInterface) error { } type endpointJoinInfo struct { - gw net.IP - gw6 net.IP - StaticRoutes []*types.StaticRoute + gw net.IP + gw6 net.IP + StaticRoutes []*types.StaticRoute + disableGatewayService bool } func (ep *endpoint) Info() EndpointInfo { @@ -295,6 +300,17 @@ func (ep *endpoint) Sandbox() Sandbox { return cnt } +func (ep *endpoint) StaticRoutes() []*types.StaticRoute { + ep.Lock() + defer ep.Unlock() + + if ep.joinInfo == nil { + return nil + } + + return ep.joinInfo.StaticRoutes +} + func (ep *endpoint) Gateway() net.IP { ep.Lock() defer ep.Unlock() @@ -340,3 +356,10 @@ func (ep *endpoint) retrieveFromStore() (*endpoint, error) { } return n.getEndpointFromStore(ep.ID()) } + +func (ep *endpoint) DisableGatewayService() { + ep.Lock() + defer ep.Unlock() + + ep.joinInfo.disableGatewayService = true +} diff --git a/vendor/src/github.com/docker/libnetwork/etchosts/etchosts.go b/vendor/src/github.com/docker/libnetwork/etchosts/etchosts.go index 92597b71b4..256a89dfa8 100644 --- a/vendor/src/github.com/docker/libnetwork/etchosts/etchosts.go +++ b/vendor/src/github.com/docker/libnetwork/etchosts/etchosts.go @@ -1,6 +1,7 @@ package etchosts import ( + "bufio" "bytes" "fmt" "io" @@ -138,19 +139,36 @@ func Delete(path string, recs []Record) error { if len(recs) == 0 { return nil } - - old, err := ioutil.ReadFile(path) + old, err := os.Open(path) if err != nil { return err } - regexpStr := fmt.Sprintf("\\S*\\t%s\\n", regexp.QuoteMeta(recs[0].Hosts)) - for _, r := range recs[1:] { - regexpStr = regexpStr + "|" + fmt.Sprintf("\\S*\\t%s\\n", regexp.QuoteMeta(r.Hosts)) - } + var buf bytes.Buffer - var re = regexp.MustCompile(regexpStr) - return ioutil.WriteFile(path, re.ReplaceAll(old, []byte("")), 0644) + s := bufio.NewScanner(old) + eol := []byte{'\n'} +loop: + for s.Scan() { + b := s.Bytes() + if b[0] == '#' { + buf.Write(b) + buf.Write(eol) + continue + } + for _, r := range recs { + if bytes.HasSuffix(b, []byte("\t"+r.Hosts)) { + continue loop + } + } + buf.Write(b) + buf.Write(eol) + } + old.Close() + if err := s.Err(); err != nil { + return err + } + return ioutil.WriteFile(path, buf.Bytes(), 0644) } // Update all IP addresses where hostname matches. diff --git a/vendor/src/github.com/docker/libnetwork/ipam/allocator.go b/vendor/src/github.com/docker/libnetwork/ipam/allocator.go index f90c7c1805..1ca9127cc2 100644 --- a/vendor/src/github.com/docker/libnetwork/ipam/allocator.go +++ b/vendor/src/github.com/docker/libnetwork/ipam/allocator.go @@ -76,8 +76,7 @@ func NewAllocator(lcDs, glDs datastore.DataStore) (*Allocator, error) { func (a *Allocator) refresh(as string) error { aSpace, err := a.getAddressSpaceFromStore(as) if err != nil { - return fmt.Errorf("error getting pools config from store during init: %v", - err) + return types.InternalErrorf("error getting pools config from store: %v", err) } if aSpace == nil { @@ -239,7 +238,7 @@ func (a *Allocator) insertBitMask(key SubnetKey, pool *net.IPNet) error { store := a.getStore(key.AddressSpace) if store == nil { - return fmt.Errorf("could not find store for address space %s while inserting bit mask", key.AddressSpace) + return types.InternalErrorf("could not find store for address space %s while inserting bit mask", key.AddressSpace) } ipVer := getAddressVersion(pool.IP) @@ -279,7 +278,7 @@ func (a *Allocator) retrieveBitmask(k SubnetKey, n *net.IPNet) (*bitseq.Handle, if !ok { log.Debugf("Retrieving bitmask (%s, %s)", k.String(), n.String()) if err := a.insertBitMask(k, n); err != nil { - return nil, fmt.Errorf("could not find bitmask in datastore for %s", k.String()) + return nil, types.InternalErrorf("could not find bitmask in datastore for %s", k.String()) } a.Lock() bm = a.addresses[k] @@ -306,7 +305,7 @@ func (a *Allocator) getPredefinedPool(as string, ipV6 bool) (*net.IPNet, error) } if as != localAddressSpace && as != globalAddressSpace { - return nil, fmt.Errorf("no default pool availbale for non-default addresss spaces") + return nil, types.NotImplementedErrorf("no default pool availbale for non-default addresss spaces") } aSpace, err := a.getAddrSpace(as) @@ -378,7 +377,7 @@ func (a *Allocator) RequestAddress(poolID string, prefAddress net.IP, opts map[s bm, err := a.retrieveBitmask(k, c.Pool) if err != nil { - return nil, nil, fmt.Errorf("could not find bitmask in datastore for %s on address %v request from pool %s: %v", + return nil, nil, types.InternalErrorf("could not find bitmask in datastore for %s on address %v request from pool %s: %v", k.String(), prefAddress, poolID, err) } ip, err := a.getAddress(p.Pool, bm, prefAddress, p.Range) @@ -410,12 +409,12 @@ func (a *Allocator) ReleaseAddress(poolID string, address net.IP) error { p, ok := aSpace.subnets[k] if !ok { aSpace.Unlock() - return ipamapi.ErrBadPool + return types.NotFoundErrorf("cannot find address pool for poolID:%s", poolID) } if address == nil { aSpace.Unlock() - return ipamapi.ErrInvalidRequest + return types.BadRequestErrorf("invalid address: nil") } if !p.Pool.Contains(address) { @@ -434,12 +433,12 @@ func (a *Allocator) ReleaseAddress(poolID string, address net.IP) error { h, err := types.GetHostPartIP(address, mask) if err != nil { - return fmt.Errorf("failed to release address %s: %v", address.String(), err) + return types.InternalErrorf("failed to release address %s: %v", address.String(), err) } bm, err := a.retrieveBitmask(k, c.Pool) if err != nil { - return fmt.Errorf("could not find bitmask in datastore for %s on address %v release from pool %s: %v", + return types.InternalErrorf("could not find bitmask in datastore for %s on address %v release from pool %s: %v", k.String(), address, poolID, err) } @@ -463,19 +462,25 @@ func (a *Allocator) getAddress(nw *net.IPNet, bitmask *bitseq.Handle, prefAddres } else if prefAddress != nil { hostPart, e := types.GetHostPartIP(prefAddress, base.Mask) if e != nil { - return nil, fmt.Errorf("failed to allocate preferred address %s: %v", prefAddress.String(), e) + return nil, types.InternalErrorf("failed to allocate preferred address %s: %v", prefAddress.String(), e) } ordinal = ipToUint64(types.GetMinimalIP(hostPart)) err = bitmask.Set(ordinal) } else { ordinal, err = bitmask.SetAnyInRange(ipr.Start, ipr.End) } - if err != nil { - return nil, ipamapi.ErrNoAvailableIPs - } - // Convert IP ordinal for this subnet into IP address - return generateAddress(ordinal, base), nil + switch err { + case nil: + // Convert IP ordinal for this subnet into IP address + return generateAddress(ordinal, base), nil + case bitseq.ErrBitAllocated: + return nil, ipamapi.ErrIPAlreadyAllocated + case bitseq.ErrNoBitAvailable: + return nil, ipamapi.ErrNoAvailableIPs + default: + return nil, err + } } // DumpDatabase dumps the internal info diff --git a/vendor/src/github.com/docker/libnetwork/ipam/store.go b/vendor/src/github.com/docker/libnetwork/ipam/store.go index 29cd753b60..ba44ef9dd9 100644 --- a/vendor/src/github.com/docker/libnetwork/ipam/store.go +++ b/vendor/src/github.com/docker/libnetwork/ipam/store.go @@ -2,7 +2,6 @@ package ipam import ( "encoding/json" - "fmt" log "github.com/Sirupsen/logrus" "github.com/docker/libnetwork/datastore" @@ -84,7 +83,7 @@ func (a *Allocator) getStore(as string) datastore.DataStore { func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) { store := a.getStore(as) if store == nil { - return nil, fmt.Errorf("store for address space %s not found", as) + return nil, types.InternalErrorf("store for address space %s not found", as) } pc := &addrSpace{id: dsConfigKey + "/" + as, ds: store, alloc: a} @@ -93,7 +92,7 @@ func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) { return nil, nil } - return nil, fmt.Errorf("could not get pools config from store: %v", err) + return nil, types.InternalErrorf("could not get pools config from store: %v", err) } return pc, nil @@ -102,7 +101,7 @@ func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) { func (a *Allocator) writeToStore(aSpace *addrSpace) error { store := aSpace.store() if store == nil { - return fmt.Errorf("invalid store while trying to write %s address space", aSpace.DataScope()) + return types.InternalErrorf("invalid store while trying to write %s address space", aSpace.DataScope()) } err := store.PutObjectAtomic(aSpace) @@ -116,7 +115,7 @@ func (a *Allocator) writeToStore(aSpace *addrSpace) error { func (a *Allocator) deleteFromStore(aSpace *addrSpace) error { store := aSpace.store() if store == nil { - return fmt.Errorf("invalid store while trying to delete %s address space", aSpace.DataScope()) + return types.InternalErrorf("invalid store while trying to delete %s address space", aSpace.DataScope()) } return store.DeleteObjectAtomic(aSpace) diff --git a/vendor/src/github.com/docker/libnetwork/ipam/structures.go b/vendor/src/github.com/docker/libnetwork/ipam/structures.go index 9ac171887c..cd0593ceff 100644 --- a/vendor/src/github.com/docker/libnetwork/ipam/structures.go +++ b/vendor/src/github.com/docker/libnetwork/ipam/structures.go @@ -88,12 +88,12 @@ func (s *SubnetKey) String() string { // FromString populate the SubnetKey object reading it from string func (s *SubnetKey) FromString(str string) error { if str == "" || !strings.Contains(str, "/") { - return fmt.Errorf("invalid string form for subnetkey: %s", str) + return types.BadRequestErrorf("invalid string form for subnetkey: %s", str) } p := strings.Split(str, "/") if len(p) != 3 && len(p) != 5 { - return fmt.Errorf("invalid string form for subnetkey: %s", str) + return types.BadRequestErrorf("invalid string form for subnetkey: %s", str) } s.AddressSpace = p[0] s.Subnet = fmt.Sprintf("%s/%s", p[1], p[2]) @@ -317,7 +317,7 @@ func (aSpace *addrSpace) updatePoolDBOnRemoval(k SubnetKey) (func() error, error return func() error { bm, err := aSpace.alloc.retrieveBitmask(k, c.Pool) if err != nil { - return fmt.Errorf("could not find bitmask in datastore for pool %s removal: %v", k.String(), err) + return types.InternalErrorf("could not find bitmask in datastore for pool %s removal: %v", k.String(), err) } return bm.Destroy() }, nil diff --git a/vendor/src/github.com/docker/libnetwork/ipamapi/contract.go b/vendor/src/github.com/docker/libnetwork/ipamapi/contract.go index 2b11b303a4..5323c4f7df 100644 --- a/vendor/src/github.com/docker/libnetwork/ipamapi/contract.go +++ b/vendor/src/github.com/docker/libnetwork/ipamapi/contract.go @@ -2,8 +2,9 @@ package ipamapi import ( - "errors" "net" + + "github.com/docker/libnetwork/types" ) /******************** @@ -15,6 +16,8 @@ const ( DefaultIPAM = "default" // PluginEndpointType represents the Endpoint Type used by Plugin system PluginEndpointType = "IpamDriver" + // RequestAddressType represents the Address Type used when requesting an address + RequestAddressType = "RequestAddressType" ) // Callback provides a Callback interface for registering an IPAM instance into LibNetwork @@ -29,22 +32,19 @@ type Callback interface { // Weel-known errors returned by IPAM var ( - ErrInvalidIpamService = errors.New("Invalid IPAM Service") - ErrInvalidIpamConfigService = errors.New("Invalid IPAM Config Service") - ErrIpamNotAvailable = errors.New("IPAM Service not available") - ErrIpamInternalError = errors.New("IPAM Internal Error") - ErrInvalidAddressSpace = errors.New("Invalid Address Space") - ErrInvalidPool = errors.New("Invalid Address Pool") - ErrInvalidSubPool = errors.New("Invalid Address SubPool") - ErrInvalidRequest = errors.New("Invalid Request") - ErrPoolNotFound = errors.New("Address Pool not found") - ErrOverlapPool = errors.New("Address pool overlaps with existing pool on this address space") - ErrNoAvailablePool = errors.New("No available pool") - ErrNoAvailableIPs = errors.New("No available addresses on this pool") - ErrIPAlreadyAllocated = errors.New("Address already in use") - ErrIPOutOfRange = errors.New("Requested address is out of range") - ErrPoolOverlap = errors.New("Pool overlaps with other one on this address space") - ErrBadPool = errors.New("Address space does not contain specified address pool") + ErrIpamInternalError = types.InternalErrorf("IPAM Internal Error") + ErrInvalidAddressSpace = types.BadRequestErrorf("Invalid Address Space") + ErrInvalidPool = types.BadRequestErrorf("Invalid Address Pool") + ErrInvalidSubPool = types.BadRequestErrorf("Invalid Address SubPool") + ErrInvalidRequest = types.BadRequestErrorf("Invalid Request") + ErrPoolNotFound = types.BadRequestErrorf("Address Pool not found") + ErrOverlapPool = types.ForbiddenErrorf("Address pool overlaps with existing pool on this address space") + ErrNoAvailablePool = types.NoServiceErrorf("No available pool") + ErrNoAvailableIPs = types.NoServiceErrorf("No available addresses on this pool") + ErrIPAlreadyAllocated = types.ForbiddenErrorf("Address already in use") + ErrIPOutOfRange = types.BadRequestErrorf("Requested address is out of range") + ErrPoolOverlap = types.ForbiddenErrorf("Pool overlaps with other one on this address space") + ErrBadPool = types.BadRequestErrorf("Address space does not contain specified address pool") ) /******************************* diff --git a/vendor/src/github.com/docker/libnetwork/network.go b/vendor/src/github.com/docker/libnetwork/network.go index 378cd80f20..be4ab70505 100644 --- a/vendor/src/github.com/docker/libnetwork/network.go +++ b/vendor/src/github.com/docker/libnetwork/network.go @@ -603,12 +603,13 @@ func (n *network) Delete() error { if err = n.getController().deleteFromStore(n.getEpCnt()); err != nil { return fmt.Errorf("error deleting network endpoint count from store: %v", err) } + + n.ipamRelease() + if err = n.getController().deleteFromStore(n); err != nil { return fmt.Errorf("error deleting network from store: %v", err) } - n.ipamRelease() - return nil } @@ -970,7 +971,10 @@ func (n *network) ipamAllocateVersion(ipVer int, ipam ipamapi.Ipam) error { // irrespective of whether ipam driver returned a gateway already. // If none of the above is true, libnetwork will allocate one. if cfg.Gateway != "" || d.Gateway == nil { - if d.Gateway, _, err = ipam.RequestAddress(d.PoolID, net.ParseIP(cfg.Gateway), nil); err != nil { + var gatewayOpts = map[string]string{ + ipamapi.RequestAddressType: netlabel.Gateway, + } + if d.Gateway, _, err = ipam.RequestAddress(d.PoolID, net.ParseIP(cfg.Gateway), gatewayOpts); err != nil { return types.InternalErrorf("failed to allocate gateway (%v): %v", cfg.Gateway, err) } } diff --git a/vendor/src/github.com/docker/libnetwork/sandbox.go b/vendor/src/github.com/docker/libnetwork/sandbox.go index b29c67fdbe..d24c43a460 100644 --- a/vendor/src/github.com/docker/libnetwork/sandbox.go +++ b/vendor/src/github.com/docker/libnetwork/sandbox.go @@ -62,7 +62,6 @@ type sandbox struct { osSbox osl.Sandbox controller *controller refCnt int - hostsOnce sync.Once endpoints epHeap epPriority map[string]int joinLeaveDone chan struct{} @@ -601,41 +600,21 @@ func (sb *sandbox) buildHostsFile() error { } func (sb *sandbox) updateHostsFile(ifaceIP string, svcRecords []etchosts.Record) error { - var err error + var mhost string if sb.config.originHostsPath != "" { return nil } - max := func(a, b int) int { - if a < b { - return b - } - - return a + if sb.config.domainName != "" { + mhost = fmt.Sprintf("%s.%s %s", sb.config.hostName, sb.config.domainName, + sb.config.hostName) + } else { + mhost = sb.config.hostName } - extraContent := make([]etchosts.Record, 0, - max(len(sb.config.extraHosts), len(svcRecords))) - - sb.hostsOnce.Do(func() { - // Rebuild the hosts file accounting for the passed - // interface IP and service records - - for _, extraHost := range sb.config.extraHosts { - extraContent = append(extraContent, - etchosts.Record{Hosts: extraHost.name, IP: extraHost.IP}) - } - - err = etchosts.Build(sb.config.hostsPath, ifaceIP, - sb.config.hostName, sb.config.domainName, extraContent) - }) - - if err != nil { - return err - } - - extraContent = extraContent[:0] + extraContent := make([]etchosts.Record, 0, len(svcRecords)+1) + extraContent = append(extraContent, etchosts.Record{Hosts: mhost, IP: ifaceIP}) for _, svc := range svcRecords { extraContent = append(extraContent, svc) } diff --git a/vendor/src/github.com/vishvananda/netlink/class.go b/vendor/src/github.com/vishvananda/netlink/class.go new file mode 100644 index 0000000000..35bdb33100 --- /dev/null +++ b/vendor/src/github.com/vishvananda/netlink/class.go @@ -0,0 +1,110 @@ +package netlink + +import ( + "fmt" +) + +type Class interface { + Attrs() *ClassAttrs + Type() string +} + +// Class represents a netlink class. A filter is associated with a link, +// has a handle and a parent. The root filter of a device should have a +// parent == HANDLE_ROOT. +type ClassAttrs struct { + LinkIndex int + Handle uint32 + Parent uint32 + Leaf uint32 +} + +func (q ClassAttrs) String() string { + return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Leaf: %s}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Leaf) +} + +type HtbClassAttrs struct { + // TODO handle all attributes + Rate uint64 + Ceil uint64 + Buffer uint32 + Cbuffer uint32 + Quantum uint32 + Level uint32 + Prio uint32 +} + +func (q HtbClassAttrs) String() string { + return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer) +} + +// Htb class +type HtbClass struct { + ClassAttrs + Rate uint64 + Ceil uint64 + Buffer uint32 + Cbuffer uint32 + Quantum uint32 + Level uint32 + Prio uint32 +} + +func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass { + mtu := 1600 + rate := cattrs.Rate / 8 + ceil := cattrs.Ceil / 8 + buffer := cattrs.Buffer + cbuffer := cattrs.Cbuffer + if ceil == 0 { + ceil = rate + } + + if buffer == 0 { + buffer = uint32(float64(rate)/Hz() + float64(mtu)) + } + buffer = uint32(Xmittime(rate, buffer)) + + if cbuffer == 0 { + cbuffer = uint32(float64(ceil)/Hz() + float64(mtu)) + } + cbuffer = uint32(Xmittime(ceil, cbuffer)) + + return &HtbClass{ + ClassAttrs: attrs, + Rate: rate, + Ceil: ceil, + Buffer: buffer, + Cbuffer: cbuffer, + Quantum: 10, + Level: 0, + Prio: 0, + } +} + +func (q HtbClass) String() string { + return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer) +} + +func (class *HtbClass) Attrs() *ClassAttrs { + return &class.ClassAttrs +} + +func (class *HtbClass) Type() string { + return "htb" +} + +// GenericClass classes represent types that are not currently understood +// by this netlink library. +type GenericClass struct { + ClassAttrs + ClassType string +} + +func (class *GenericClass) Attrs() *ClassAttrs { + return &class.ClassAttrs +} + +func (class *GenericClass) Type() string { + return class.ClassType +} diff --git a/vendor/src/github.com/vishvananda/netlink/class_linux.go b/vendor/src/github.com/vishvananda/netlink/class_linux.go new file mode 100644 index 0000000000..3dcc542bfe --- /dev/null +++ b/vendor/src/github.com/vishvananda/netlink/class_linux.go @@ -0,0 +1,144 @@ +package netlink + +import ( + "syscall" + + "github.com/vishvananda/netlink/nl" +) + +// ClassDel will delete a class from the system. +// Equivalent to: `tc class del $class` +func ClassDel(class Class) error { + req := nl.NewNetlinkRequest(syscall.RTM_DELTCLASS, syscall.NLM_F_ACK) + base := class.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + } + req.AddData(msg) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// ClassAdd will add a class to the system. +// Equivalent to: `tc class add $class` +func ClassAdd(class Class) error { + req := nl.NewNetlinkRequest(syscall.RTM_NEWTCLASS, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + base := class.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + } + req.AddData(msg) + req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(class.Type()))) + + options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) + if htb, ok := class.(*HtbClass); ok { + opt := nl.TcHtbCopt{} + opt.Rate.Rate = uint32(htb.Rate) + opt.Ceil.Rate = uint32(htb.Ceil) + opt.Buffer = htb.Buffer + opt.Cbuffer = htb.Cbuffer + opt.Quantum = htb.Quantum + opt.Level = htb.Level + opt.Prio = htb.Prio + // TODO: Handle Debug properly. For now default to 0 + nl.NewRtAttrChild(options, nl.TCA_HTB_PARMS, opt.Serialize()) + } + req.AddData(options) + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// ClassList gets a list of classes in the system. +// Equivalent to: `tc class show`. +// Generally retunrs nothing if link and parent are not specified. +func ClassList(link Link, parent uint32) ([]Class, error) { + req := nl.NewNetlinkRequest(syscall.RTM_GETTCLASS, syscall.NLM_F_DUMP) + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Parent: parent, + } + if link != nil { + base := link.Attrs() + ensureIndex(base) + msg.Ifindex = int32(base.Index) + } + req.AddData(msg) + + msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTCLASS) + if err != nil { + return nil, err + } + + var res []Class + for _, m := range msgs { + msg := nl.DeserializeTcMsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + base := ClassAttrs{ + LinkIndex: int(msg.Ifindex), + Handle: msg.Handle, + Parent: msg.Parent, + } + + var class Class + classType := "" + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.TCA_KIND: + classType = string(attr.Value[:len(attr.Value)-1]) + switch classType { + case "htb": + class = &HtbClass{} + default: + class = &GenericClass{ClassType: classType} + } + case nl.TCA_OPTIONS: + switch classType { + case "htb": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + _, err = parseHtbClassData(class, data) + if err != nil { + return nil, err + } + } + } + } + *class.Attrs() = base + res = append(res, class) + } + + return res, nil +} + +func parseHtbClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, error) { + htb := class.(*HtbClass) + detailed := false + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_HTB_PARMS: + opt := nl.DeserializeTcHtbCopt(datum.Value) + htb.Rate = uint64(opt.Rate.Rate) + htb.Ceil = uint64(opt.Ceil.Rate) + htb.Buffer = opt.Buffer + htb.Cbuffer = opt.Cbuffer + htb.Quantum = opt.Quantum + htb.Level = opt.Level + htb.Prio = opt.Prio + } + } + return detailed, nil +} diff --git a/vendor/src/github.com/vishvananda/netlink/filter.go b/vendor/src/github.com/vishvananda/netlink/filter.go new file mode 100644 index 0000000000..80ef34ded4 --- /dev/null +++ b/vendor/src/github.com/vishvananda/netlink/filter.go @@ -0,0 +1,140 @@ +package netlink + +import ( + "errors" + "fmt" + "github.com/vishvananda/netlink/nl" +) + +type Filter interface { + Attrs() *FilterAttrs + Type() string +} + +// Filter represents a netlink filter. A filter is associated with a link, +// has a handle and a parent. The root filter of a device should have a +// parent == HANDLE_ROOT. +type FilterAttrs struct { + LinkIndex int + Handle uint32 + Parent uint32 + Priority uint16 // lower is higher priority + Protocol uint16 // syscall.ETH_P_* +} + +func (q FilterAttrs) String() string { + return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Priority: %d, Protocol: %d}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Priority, q.Protocol) +} + +// U32 filters on many packet related properties +type U32 struct { + FilterAttrs + // Currently only supports redirecting to another interface + RedirIndex int +} + +func (filter *U32) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *U32) Type() string { + return "u32" +} + +type FilterFwAttrs struct { + ClassId uint32 + InDev string + Mask uint32 + Index uint32 + Buffer uint32 + Mtu uint32 + Mpu uint16 + Rate uint32 + AvRate uint32 + PeakRate uint32 + Action int + Overhead uint16 + LinkLayer int +} + +// FwFilter filters on firewall marks +type Fw struct { + FilterAttrs + ClassId uint32 + Police nl.TcPolice + InDev string + // TODO Action + Mask uint32 + AvRate uint32 + Rtab [256]uint32 + Ptab [256]uint32 +} + +func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) { + var rtab [256]uint32 + var ptab [256]uint32 + rcell_log := -1 + pcell_log := -1 + avrate := fattrs.AvRate / 8 + police := nl.TcPolice{} + police.Rate.Rate = fattrs.Rate / 8 + police.PeakRate.Rate = fattrs.PeakRate / 8 + buffer := fattrs.Buffer + linklayer := nl.LINKLAYER_ETHERNET + + if fattrs.LinkLayer != nl.LINKLAYER_UNSPEC { + linklayer = fattrs.LinkLayer + } + + police.Action = int32(fattrs.Action) + if police.Rate.Rate != 0 { + police.Rate.Mpu = fattrs.Mpu + police.Rate.Overhead = fattrs.Overhead + if CalcRtable(&police.Rate, rtab, rcell_log, fattrs.Mtu, linklayer) < 0 { + return nil, errors.New("TBF: failed to calculate rate table.") + } + police.Burst = uint32(Xmittime(uint64(police.Rate.Rate), uint32(buffer))) + } + police.Mtu = fattrs.Mtu + if police.PeakRate.Rate != 0 { + police.PeakRate.Mpu = fattrs.Mpu + police.PeakRate.Overhead = fattrs.Overhead + if CalcRtable(&police.PeakRate, ptab, pcell_log, fattrs.Mtu, linklayer) < 0 { + return nil, errors.New("POLICE: failed to calculate peak rate table.") + } + } + + return &Fw{ + FilterAttrs: attrs, + ClassId: fattrs.ClassId, + InDev: fattrs.InDev, + Mask: fattrs.Mask, + Police: police, + AvRate: avrate, + Rtab: rtab, + Ptab: ptab, + }, nil +} + +func (filter *Fw) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *Fw) Type() string { + return "fw" +} + +// GenericFilter filters represent types that are not currently understood +// by this netlink library. +type GenericFilter struct { + FilterAttrs + FilterType string +} + +func (filter *GenericFilter) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *GenericFilter) Type() string { + return filter.FilterType +} diff --git a/vendor/src/github.com/vishvananda/netlink/filter_linux.go b/vendor/src/github.com/vishvananda/netlink/filter_linux.go new file mode 100644 index 0000000000..1dc688b124 --- /dev/null +++ b/vendor/src/github.com/vishvananda/netlink/filter_linux.go @@ -0,0 +1,322 @@ +package netlink + +import ( + "bytes" + "encoding/binary" + "fmt" + "syscall" + + "github.com/vishvananda/netlink/nl" +) + +// FilterDel will delete a filter from the system. +// Equivalent to: `tc filter del $filter` +func FilterDel(filter Filter) error { + req := nl.NewNetlinkRequest(syscall.RTM_DELTFILTER, syscall.NLM_F_ACK) + base := filter.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)), + } + req.AddData(msg) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// FilterAdd will add a filter to the system. +// Equivalent to: `tc filter add $filter` +func FilterAdd(filter Filter) error { + native = nl.NativeEndian() + req := nl.NewNetlinkRequest(syscall.RTM_NEWTFILTER, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + base := filter.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)), + } + req.AddData(msg) + req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(filter.Type()))) + + options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) + if u32, ok := filter.(*U32); ok { + // match all + sel := nl.TcU32Sel{ + Nkeys: 1, + Flags: nl.TC_U32_TERMINAL, + } + sel.Keys = append(sel.Keys, nl.TcU32Key{}) + nl.NewRtAttrChild(options, nl.TCA_U32_SEL, sel.Serialize()) + actions := nl.NewRtAttrChild(options, nl.TCA_U32_ACT, nil) + table := nl.NewRtAttrChild(actions, nl.TCA_ACT_TAB, nil) + nl.NewRtAttrChild(table, nl.TCA_KIND, nl.ZeroTerminated("mirred")) + // redirect to other interface + mir := nl.TcMirred{ + Action: nl.TC_ACT_STOLEN, + Eaction: nl.TCA_EGRESS_REDIR, + Ifindex: uint32(u32.RedirIndex), + } + aopts := nl.NewRtAttrChild(table, nl.TCA_OPTIONS, nil) + nl.NewRtAttrChild(aopts, nl.TCA_MIRRED_PARMS, mir.Serialize()) + } else if fw, ok := filter.(*Fw); ok { + if fw.Mask != 0 { + b := make([]byte, 4) + native.PutUint32(b, fw.Mask) + nl.NewRtAttrChild(options, nl.TCA_FW_MASK, b) + } + if fw.InDev != "" { + nl.NewRtAttrChild(options, nl.TCA_FW_INDEV, nl.ZeroTerminated(fw.InDev)) + } + if (fw.Police != nl.TcPolice{}) { + + police := nl.NewRtAttrChild(options, nl.TCA_FW_POLICE, nil) + nl.NewRtAttrChild(police, nl.TCA_POLICE_TBF, fw.Police.Serialize()) + if (fw.Police.Rate != nl.TcRateSpec{}) { + payload := SerializeRtab(fw.Rtab) + nl.NewRtAttrChild(police, nl.TCA_POLICE_RATE, payload) + } + if (fw.Police.PeakRate != nl.TcRateSpec{}) { + payload := SerializeRtab(fw.Ptab) + nl.NewRtAttrChild(police, nl.TCA_POLICE_PEAKRATE, payload) + } + } + if fw.ClassId != 0 { + b := make([]byte, 4) + native.PutUint32(b, fw.ClassId) + nl.NewRtAttrChild(options, nl.TCA_FW_CLASSID, b) + } + } + + req.AddData(options) + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// FilterList gets a list of filters in the system. +// Equivalent to: `tc filter show`. +// Generally retunrs nothing if link and parent are not specified. +func FilterList(link Link, parent uint32) ([]Filter, error) { + req := nl.NewNetlinkRequest(syscall.RTM_GETTFILTER, syscall.NLM_F_DUMP) + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Parent: parent, + } + if link != nil { + base := link.Attrs() + ensureIndex(base) + msg.Ifindex = int32(base.Index) + } + req.AddData(msg) + + msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTFILTER) + if err != nil { + return nil, err + } + + var res []Filter + for _, m := range msgs { + msg := nl.DeserializeTcMsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + base := FilterAttrs{ + LinkIndex: int(msg.Ifindex), + Handle: msg.Handle, + Parent: msg.Parent, + } + base.Priority, base.Protocol = MajorMinor(msg.Info) + base.Protocol = nl.Swap16(base.Protocol) + + var filter Filter + filterType := "" + detailed := false + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.TCA_KIND: + filterType = string(attr.Value[:len(attr.Value)-1]) + switch filterType { + case "u32": + filter = &U32{} + case "fw": + filter = &Fw{} + default: + filter = &GenericFilter{FilterType: filterType} + } + case nl.TCA_OPTIONS: + switch filterType { + case "u32": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + detailed, err = parseU32Data(filter, data) + if err != nil { + return nil, err + } + case "fw": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + detailed, err = parseFwData(filter, data) + if err != nil { + return nil, err + } + } + } + } + // only return the detailed version of the filter + if detailed { + *filter.Attrs() = base + res = append(res, filter) + } + } + + return res, nil +} + +func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { + native = nl.NativeEndian() + u32 := filter.(*U32) + detailed := false + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_U32_SEL: + detailed = true + sel := nl.DeserializeTcU32Sel(datum.Value) + // only parse if we have a very basic redirect + if sel.Flags&nl.TC_U32_TERMINAL == 0 || sel.Nkeys != 1 { + return detailed, nil + } + case nl.TCA_U32_ACT: + table, err := nl.ParseRouteAttr(datum.Value) + if err != nil { + return detailed, err + } + if len(table) != 1 || table[0].Attr.Type != nl.TCA_ACT_TAB { + return detailed, fmt.Errorf("Action table not formed properly") + } + aattrs, err := nl.ParseRouteAttr(table[0].Value) + for _, aattr := range aattrs { + switch aattr.Attr.Type { + case nl.TCA_KIND: + actionType := string(aattr.Value[:len(aattr.Value)-1]) + // only parse if the action is mirred + if actionType != "mirred" { + return detailed, nil + } + case nl.TCA_OPTIONS: + adata, err := nl.ParseRouteAttr(aattr.Value) + if err != nil { + return detailed, err + } + for _, adatum := range adata { + switch adatum.Attr.Type { + case nl.TCA_MIRRED_PARMS: + mir := nl.DeserializeTcMirred(adatum.Value) + u32.RedirIndex = int(mir.Ifindex) + } + } + } + } + } + } + return detailed, nil +} + +func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { + native = nl.NativeEndian() + fw := filter.(*Fw) + detailed := true + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_FW_MASK: + fw.Mask = native.Uint32(datum.Value[0:4]) + case nl.TCA_FW_CLASSID: + fw.ClassId = native.Uint32(datum.Value[0:4]) + case nl.TCA_FW_INDEV: + fw.InDev = string(datum.Value[:len(datum.Value)-1]) + case nl.TCA_FW_POLICE: + adata, _ := nl.ParseRouteAttr(datum.Value) + for _, aattr := range adata { + switch aattr.Attr.Type { + case nl.TCA_POLICE_TBF: + fw.Police = *nl.DeserializeTcPolice(aattr.Value) + case nl.TCA_POLICE_RATE: + fw.Rtab = DeserializeRtab(aattr.Value) + case nl.TCA_POLICE_PEAKRATE: + fw.Ptab = DeserializeRtab(aattr.Value) + } + } + } + } + return detailed, nil +} + +func AlignToAtm(size uint) uint { + var linksize, cells int + cells = int(size / nl.ATM_CELL_PAYLOAD) + if (size % nl.ATM_CELL_PAYLOAD) > 0 { + cells++ + } + linksize = cells * nl.ATM_CELL_SIZE + return uint(linksize) +} + +func AdjustSize(sz uint, mpu uint, linklayer int) uint { + if sz < mpu { + sz = mpu + } + switch linklayer { + case nl.LINKLAYER_ATM: + return AlignToAtm(sz) + default: + return sz + } +} + +func CalcRtable(rate *nl.TcRateSpec, rtab [256]uint32, cell_log int, mtu uint32, linklayer int) int { + bps := rate.Rate + mpu := rate.Mpu + var sz uint + if mtu == 0 { + mtu = 2047 + } + if cell_log < 0 { + cell_log = 0 + for (mtu >> uint(cell_log)) > 255 { + cell_log++ + } + } + for i := 0; i < 256; i++ { + sz = AdjustSize(uint((i+1)< syscall.IFF_TAP { + return fmt.Errorf("Tuntap.Mode %v unknown!", tuntap.Mode) + } + file, err := os.OpenFile("/dev/net/tun", os.O_RDWR, 0) + if err != nil { + return err + } + defer file.Close() + var req ifReq + req.Flags |= syscall.IFF_ONE_QUEUE + req.Flags |= syscall.IFF_TUN_EXCL + copy(req.Name[:15], base.Name) + req.Flags |= uint16(tuntap.Mode) + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), uintptr(syscall.TUNSETIFF), uintptr(unsafe.Pointer(&req))) + if errno != 0 { + return fmt.Errorf("Tuntap IOCTL TUNSETIFF failed, errno %v", errno) + } + _, _, errno = syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), uintptr(syscall.TUNSETPERSIST), 1) + if errno != 0 { + return fmt.Errorf("Tuntap IOCTL TUNSETPERSIST failed, errno %v", errno) + } + ensureIndex(base) + + // can't set master during create, so set it afterwards + if base.MasterIndex != 0 { + // TODO: verify MasterIndex is actually a bridge? + return LinkSetMasterByIndex(link, base.MasterIndex) + } + return nil + } + req := nl.NewNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) @@ -493,6 +533,8 @@ func linkDeserialize(m []byte) (Link, error) { switch linkType { case "dummy": link = &Dummy{} + case "ifb": + link = &Ifb{} case "bridge": link = &Bridge{} case "vlan": @@ -505,8 +547,10 @@ func linkDeserialize(m []byte) (Link, error) { link = &IPVlan{} case "macvlan": link = &Macvlan{} + case "macvtap": + link = &Macvtap{} default: - link = &Generic{LinkType: linkType} + link = &GenericLink{LinkType: linkType} } case nl.IFLA_INFO_DATA: data, err := nl.ParseRouteAttr(info.Value) @@ -522,6 +566,8 @@ func linkDeserialize(m []byte) (Link, error) { parseIPVlanData(link, data) case "macvlan": parseMacvlanData(link, data) + case "macvtap": + parseMacvtapData(link, data) } } } @@ -583,6 +629,46 @@ func LinkList() ([]Link, error) { return res, nil } +// LinkUpdate is used to pass information back from LinkSubscribe() +type LinkUpdate struct { + nl.IfInfomsg + Link +} + +// LinkSubscribe takes a chan down which notifications will be sent +// when links change. Close the 'done' chan to stop subscription. +func LinkSubscribe(ch chan<- LinkUpdate, done <-chan struct{}) error { + s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_LINK) + if err != nil { + return err + } + if done != nil { + go func() { + <-done + s.Close() + }() + } + go func() { + defer close(ch) + for { + msgs, err := s.Receive() + if err != nil { + return + } + for _, m := range msgs { + ifmsg := nl.DeserializeIfInfomsg(m.Data) + link, err := linkDeserialize(m.Data) + if err != nil { + return + } + ch <- LinkUpdate{IfInfomsg: *ifmsg, Link: link} + } + } + }() + + return nil +} + func LinkSetHairpin(link Link, mode bool) error { return setProtinfoAttr(link, mode, nl.IFLA_BRPORT_MODE) } @@ -696,6 +782,11 @@ func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) { } } +func parseMacvtapData(link Link, data []syscall.NetlinkRouteAttr) { + macv := link.(*Macvtap) + parseMacvlanData(&macv.Macvlan, data) +} + func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) { macv := link.(*Macvlan) for _, datum := range data { diff --git a/vendor/src/github.com/vishvananda/netlink/link_tuntap_linux.go b/vendor/src/github.com/vishvananda/netlink/link_tuntap_linux.go new file mode 100644 index 0000000000..310bd33d8d --- /dev/null +++ b/vendor/src/github.com/vishvananda/netlink/link_tuntap_linux.go @@ -0,0 +1,14 @@ +package netlink + +// ideally golang.org/x/sys/unix would define IfReq but it only has +// IFNAMSIZ, hence this minimalistic implementation +const ( + SizeOfIfReq = 40 + IFNAMSIZ = 16 +) + +type ifReq struct { + Name [IFNAMSIZ]byte + Flags uint16 + pad [SizeOfIfReq - IFNAMSIZ - 2]byte +} diff --git a/vendor/src/github.com/vishvananda/netlink/nl/nl_linux.go b/vendor/src/github.com/vishvananda/netlink/nl/nl_linux.go index 3cb137d01b..8dbd92b819 100644 --- a/vendor/src/github.com/vishvananda/netlink/nl/nl_linux.go +++ b/vendor/src/github.com/vishvananda/netlink/nl/nl_linux.go @@ -142,7 +142,7 @@ func (a *RtAttr) Len() int { } // Serialize the RtAttr into a byte array -// This can't ust unsafe.cast because it must iterate through children. +// This can't just unsafe.cast because it must iterate through children. func (a *RtAttr) Serialize() []byte { native := NativeEndian() diff --git a/vendor/src/github.com/vishvananda/netlink/nl/tc_linux.go b/vendor/src/github.com/vishvananda/netlink/nl/tc_linux.go new file mode 100644 index 0000000000..4a32055d1a --- /dev/null +++ b/vendor/src/github.com/vishvananda/netlink/nl/tc_linux.go @@ -0,0 +1,508 @@ +package nl + +import ( + "unsafe" +) + +// LinkLayer +const ( + LINKLAYER_UNSPEC = iota + LINKLAYER_ETHERNET + LINKLAYER_ATM +) + +// ATM +const ( + ATM_CELL_PAYLOAD = 48 + ATM_CELL_SIZE = 53 +) + +const TC_LINKLAYER_MASK = 0x0F + +// Police +const ( + TCA_POLICE_UNSPEC = iota + TCA_POLICE_TBF + TCA_POLICE_RATE + TCA_POLICE_PEAKRATE + TCA_POLICE_AVRATE + TCA_POLICE_RESULT + TCA_POLICE_MAX = TCA_POLICE_RESULT +) + +// Message types +const ( + TCA_UNSPEC = iota + TCA_KIND + TCA_OPTIONS + TCA_STATS + TCA_XSTATS + TCA_RATE + TCA_FCNT + TCA_STATS2 + TCA_STAB + TCA_MAX = TCA_STAB +) + +const ( + TCA_ACT_TAB = 1 + TCAA_MAX = 1 +) + +const ( + TCA_PRIO_UNSPEC = iota + TCA_PRIO_MQ + TCA_PRIO_MAX = TCA_PRIO_MQ +) + +const ( + SizeofTcMsg = 0x14 + SizeofTcActionMsg = 0x04 + SizeofTcPrioMap = 0x14 + SizeofTcRateSpec = 0x0c + SizeofTcTbfQopt = 2*SizeofTcRateSpec + 0x0c + SizeofTcHtbCopt = 2*SizeofTcRateSpec + 0x14 + SizeofTcHtbGlob = 0x14 + SizeofTcU32Key = 0x10 + SizeofTcU32Sel = 0x10 // without keys + SizeofTcMirred = 0x1c + SizeofTcPolice = 2*SizeofTcRateSpec + 0x20 +) + +// struct tcmsg { +// unsigned char tcm_family; +// unsigned char tcm__pad1; +// unsigned short tcm__pad2; +// int tcm_ifindex; +// __u32 tcm_handle; +// __u32 tcm_parent; +// __u32 tcm_info; +// }; + +type TcMsg struct { + Family uint8 + Pad [3]byte + Ifindex int32 + Handle uint32 + Parent uint32 + Info uint32 +} + +func (msg *TcMsg) Len() int { + return SizeofTcMsg +} + +func DeserializeTcMsg(b []byte) *TcMsg { + return (*TcMsg)(unsafe.Pointer(&b[0:SizeofTcMsg][0])) +} + +func (x *TcMsg) Serialize() []byte { + return (*(*[SizeofTcMsg]byte)(unsafe.Pointer(x)))[:] +} + +// struct tcamsg { +// unsigned char tca_family; +// unsigned char tca__pad1; +// unsigned short tca__pad2; +// }; + +type TcActionMsg struct { + Family uint8 + Pad [3]byte +} + +func (msg *TcActionMsg) Len() int { + return SizeofTcActionMsg +} + +func DeserializeTcActionMsg(b []byte) *TcActionMsg { + return (*TcActionMsg)(unsafe.Pointer(&b[0:SizeofTcActionMsg][0])) +} + +func (x *TcActionMsg) Serialize() []byte { + return (*(*[SizeofTcActionMsg]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TC_PRIO_MAX = 15 +) + +// struct tc_prio_qopt { +// int bands; /* Number of bands */ +// __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ +// }; + +type TcPrioMap struct { + Bands int32 + Priomap [TC_PRIO_MAX + 1]uint8 +} + +func (msg *TcPrioMap) Len() int { + return SizeofTcPrioMap +} + +func DeserializeTcPrioMap(b []byte) *TcPrioMap { + return (*TcPrioMap)(unsafe.Pointer(&b[0:SizeofTcPrioMap][0])) +} + +func (x *TcPrioMap) Serialize() []byte { + return (*(*[SizeofTcPrioMap]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TCA_TBF_UNSPEC = iota + TCA_TBF_PARMS + TCA_TBF_RTAB + TCA_TBF_PTAB + TCA_TBF_RATE64 + TCA_TBF_PRATE64 + TCA_TBF_BURST + TCA_TBF_PBURST + TCA_TBF_MAX = TCA_TBF_PBURST +) + +// struct tc_ratespec { +// unsigned char cell_log; +// __u8 linklayer; /* lower 4 bits */ +// unsigned short overhead; +// short cell_align; +// unsigned short mpu; +// __u32 rate; +// }; + +type TcRateSpec struct { + CellLog uint8 + Linklayer uint8 + Overhead uint16 + CellAlign int16 + Mpu uint16 + Rate uint32 +} + +func (msg *TcRateSpec) Len() int { + return SizeofTcRateSpec +} + +func DeserializeTcRateSpec(b []byte) *TcRateSpec { + return (*TcRateSpec)(unsafe.Pointer(&b[0:SizeofTcRateSpec][0])) +} + +func (x *TcRateSpec) Serialize() []byte { + return (*(*[SizeofTcRateSpec]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_tbf_qopt { +// struct tc_ratespec rate; +// struct tc_ratespec peakrate; +// __u32 limit; +// __u32 buffer; +// __u32 mtu; +// }; + +type TcTbfQopt struct { + Rate TcRateSpec + Peakrate TcRateSpec + Limit uint32 + Buffer uint32 + Mtu uint32 +} + +func (msg *TcTbfQopt) Len() int { + return SizeofTcTbfQopt +} + +func DeserializeTcTbfQopt(b []byte) *TcTbfQopt { + return (*TcTbfQopt)(unsafe.Pointer(&b[0:SizeofTcTbfQopt][0])) +} + +func (x *TcTbfQopt) Serialize() []byte { + return (*(*[SizeofTcTbfQopt]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TCA_HTB_UNSPEC = iota + TCA_HTB_PARMS + TCA_HTB_INIT + TCA_HTB_CTAB + TCA_HTB_RTAB + TCA_HTB_DIRECT_QLEN + TCA_HTB_RATE64 + TCA_HTB_CEIL64 + TCA_HTB_MAX = TCA_HTB_CEIL64 +) + +//struct tc_htb_opt { +// struct tc_ratespec rate; +// struct tc_ratespec ceil; +// __u32 buffer; +// __u32 cbuffer; +// __u32 quantum; +// __u32 level; /* out only */ +// __u32 prio; +//}; + +type TcHtbCopt struct { + Rate TcRateSpec + Ceil TcRateSpec + Buffer uint32 + Cbuffer uint32 + Quantum uint32 + Level uint32 + Prio uint32 +} + +func (msg *TcHtbCopt) Len() int { + return SizeofTcHtbCopt +} + +func DeserializeTcHtbCopt(b []byte) *TcHtbCopt { + return (*TcHtbCopt)(unsafe.Pointer(&b[0:SizeofTcHtbCopt][0])) +} + +func (x *TcHtbCopt) Serialize() []byte { + return (*(*[SizeofTcHtbCopt]byte)(unsafe.Pointer(x)))[:] +} + +type TcHtbGlob struct { + Version uint32 + Rate2Quantum uint32 + Defcls uint32 + Debug uint32 + DirectPkts uint32 +} + +func (msg *TcHtbGlob) Len() int { + return SizeofTcHtbGlob +} + +func DeserializeTcHtbGlob(b []byte) *TcHtbGlob { + return (*TcHtbGlob)(unsafe.Pointer(&b[0:SizeofTcHtbGlob][0])) +} + +func (x *TcHtbGlob) Serialize() []byte { + return (*(*[SizeofTcHtbGlob]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TCA_U32_UNSPEC = iota + TCA_U32_CLASSID + TCA_U32_HASH + TCA_U32_LINK + TCA_U32_DIVISOR + TCA_U32_SEL + TCA_U32_POLICE + TCA_U32_ACT + TCA_U32_INDEV + TCA_U32_PCNT + TCA_U32_MARK + TCA_U32_MAX = TCA_U32_MARK +) + +// struct tc_u32_key { +// __be32 mask; +// __be32 val; +// int off; +// int offmask; +// }; + +type TcU32Key struct { + Mask uint32 // big endian + Val uint32 // big endian + Off int32 + OffMask int32 +} + +func (msg *TcU32Key) Len() int { + return SizeofTcU32Key +} + +func DeserializeTcU32Key(b []byte) *TcU32Key { + return (*TcU32Key)(unsafe.Pointer(&b[0:SizeofTcU32Key][0])) +} + +func (x *TcU32Key) Serialize() []byte { + return (*(*[SizeofTcU32Key]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_u32_sel { +// unsigned char flags; +// unsigned char offshift; +// unsigned char nkeys; +// +// __be16 offmask; +// __u16 off; +// short offoff; +// +// short hoff; +// __be32 hmask; +// struct tc_u32_key keys[0]; +// }; + +const ( + TC_U32_TERMINAL = 1 << iota + TC_U32_OFFSET = 1 << iota + TC_U32_VAROFFSET = 1 << iota + TC_U32_EAT = 1 << iota +) + +type TcU32Sel struct { + Flags uint8 + Offshift uint8 + Nkeys uint8 + Pad uint8 + Offmask uint16 // big endian + Off uint16 + Offoff int16 + Hoff int16 + Hmask uint32 // big endian + Keys []TcU32Key +} + +func (msg *TcU32Sel) Len() int { + return SizeofTcU32Sel + int(msg.Nkeys)*SizeofTcU32Key +} + +func DeserializeTcU32Sel(b []byte) *TcU32Sel { + x := &TcU32Sel{} + copy((*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:], b) + next := SizeofTcU32Sel + var i uint8 + for i = 0; i < x.Nkeys; i++ { + x.Keys = append(x.Keys, *DeserializeTcU32Key(b[next:])) + next += SizeofTcU32Key + } + return x +} + +func (x *TcU32Sel) Serialize() []byte { + // This can't just unsafe.cast because it must iterate through keys. + buf := make([]byte, x.Len()) + copy(buf, (*(*[SizeofTcU32Sel]byte)(unsafe.Pointer(x)))[:]) + next := SizeofTcU32Sel + for _, key := range x.Keys { + keyBuf := key.Serialize() + copy(buf[next:], keyBuf) + next += SizeofTcU32Key + } + return buf +} + +const ( + TCA_ACT_MIRRED = 8 +) + +const ( + TCA_MIRRED_UNSPEC = iota + TCA_MIRRED_TM + TCA_MIRRED_PARMS + TCA_MIRRED_MAX = TCA_MIRRED_PARMS +) + +const ( + TCA_EGRESS_REDIR = 1 /* packet redirect to EGRESS*/ + TCA_EGRESS_MIRROR = 2 /* mirror packet to EGRESS */ + TCA_INGRESS_REDIR = 3 /* packet redirect to INGRESS*/ + TCA_INGRESS_MIRROR = 4 /* mirror packet to INGRESS */ +) + +const ( + TC_ACT_UNSPEC = int32(-1) + TC_ACT_OK = 0 + TC_ACT_RECLASSIFY = 1 + TC_ACT_SHOT = 2 + TC_ACT_PIPE = 3 + TC_ACT_STOLEN = 4 + TC_ACT_QUEUED = 5 + TC_ACT_REPEAT = 6 + TC_ACT_JUMP = 0x10000000 +) + +// #define tc_gen \ +// __u32 index; \ +// __u32 capab; \ +// int action; \ +// int refcnt; \ +// int bindcnt +// struct tc_mirred { +// tc_gen; +// int eaction; /* one of IN/EGRESS_MIRROR/REDIR */ +// __u32 ifindex; /* ifindex of egress port */ +// }; + +type TcMirred struct { + Index uint32 + Capab uint32 + Action int32 + Refcnt int32 + Bindcnt int32 + Eaction int32 + Ifindex uint32 +} + +func (msg *TcMirred) Len() int { + return SizeofTcMirred +} + +func DeserializeTcMirred(b []byte) *TcMirred { + return (*TcMirred)(unsafe.Pointer(&b[0:SizeofTcMirred][0])) +} + +func (x *TcMirred) Serialize() []byte { + return (*(*[SizeofTcMirred]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TC_POLICE_UNSPEC = TC_ACT_UNSPEC + TC_POLICE_OK = TC_ACT_OK + TC_POLICE_RECLASSIFY = TC_ACT_RECLASSIFY + TC_POLICE_SHOT = TC_ACT_SHOT + TC_POLICE_PIPE = TC_ACT_PIPE +) + +// struct tc_police { +// __u32 index; +// int action; +// __u32 limit; +// __u32 burst; +// __u32 mtu; +// struct tc_ratespec rate; +// struct tc_ratespec peakrate; +// int refcnt; +// int bindcnt; +// __u32 capab; +// }; + +type TcPolice struct { + Index uint32 + Action int32 + Limit uint32 + Burst uint32 + Mtu uint32 + Rate TcRateSpec + PeakRate TcRateSpec + Refcnt int32 + Bindcnt int32 + Capab uint32 +} + +func (msg *TcPolice) Len() int { + return SizeofTcPolice +} + +func DeserializeTcPolice(b []byte) *TcPolice { + return (*TcPolice)(unsafe.Pointer(&b[0:SizeofTcPolice][0])) +} + +func (x *TcPolice) Serialize() []byte { + return (*(*[SizeofTcPolice]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TCA_FW_UNSPEC = iota + TCA_FW_CLASSID + TCA_FW_POLICE + TCA_FW_INDEV + TCA_FW_ACT + TCA_FW_MASK + TCA_FW_MAX = TCA_FW_MASK +) diff --git a/vendor/src/github.com/vishvananda/netlink/qdisc.go b/vendor/src/github.com/vishvananda/netlink/qdisc.go new file mode 100644 index 0000000000..41a4aa8e3e --- /dev/null +++ b/vendor/src/github.com/vishvananda/netlink/qdisc.go @@ -0,0 +1,167 @@ +package netlink + +import ( + "fmt" +) + +const ( + HANDLE_NONE = 0 + HANDLE_INGRESS = 0xFFFFFFF1 + HANDLE_ROOT = 0xFFFFFFFF + PRIORITY_MAP_LEN = 16 +) + +type Qdisc interface { + Attrs() *QdiscAttrs + Type() string +} + +// Qdisc represents a netlink qdisc. A qdisc is associated with a link, +// has a handle, a parent and a refcnt. The root qdisc of a device should +// have parent == HANDLE_ROOT. +type QdiscAttrs struct { + LinkIndex int + Handle uint32 + Parent uint32 + Refcnt uint32 // read only +} + +func (q QdiscAttrs) String() string { + return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Refcnt: %s}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Refcnt) +} + +func MakeHandle(major, minor uint16) uint32 { + return (uint32(major) << 16) | uint32(minor) +} + +func MajorMinor(handle uint32) (uint16, uint16) { + return uint16((handle & 0xFFFF0000) >> 16), uint16(handle & 0x0000FFFFF) +} + +func HandleStr(handle uint32) string { + switch handle { + case HANDLE_NONE: + return "none" + case HANDLE_INGRESS: + return "ingress" + case HANDLE_ROOT: + return "root" + default: + major, minor := MajorMinor(handle) + return fmt.Sprintf("%x:%x", major, minor) + } +} + +// PfifoFast is the default qdisc created by the kernel if one has not +// been defined for the interface +type PfifoFast struct { + QdiscAttrs + Bands uint8 + PriorityMap [PRIORITY_MAP_LEN]uint8 +} + +func (qdisc *PfifoFast) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *PfifoFast) Type() string { + return "pfifo_fast" +} + +// Prio is a basic qdisc that works just like PfifoFast +type Prio struct { + QdiscAttrs + Bands uint8 + PriorityMap [PRIORITY_MAP_LEN]uint8 +} + +func NewPrio(attrs QdiscAttrs) *Prio { + return &Prio{ + QdiscAttrs: attrs, + Bands: 3, + PriorityMap: [PRIORITY_MAP_LEN]uint8{1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}, + } +} + +func (qdisc *Prio) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Prio) Type() string { + return "prio" +} + +// Htb is a classful qdisc that rate limits based on tokens +type Htb struct { + QdiscAttrs + Version uint32 + Rate2Quantum uint32 + Defcls uint32 + Debug uint32 + DirectPkts uint32 +} + +func NewHtb(attrs QdiscAttrs) *Htb { + return &Htb{ + QdiscAttrs: attrs, + Version: 3, + Defcls: 0, + Rate2Quantum: 10, + Debug: 0, + DirectPkts: 0, + } +} + +func (qdisc *Htb) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Htb) Type() string { + return "htb" +} + +// Tbf is a classless qdisc that rate limits based on tokens +type Tbf struct { + QdiscAttrs + // TODO: handle 64bit rate properly + Rate uint64 + Limit uint32 + Buffer uint32 + // TODO: handle other settings +} + +func (qdisc *Tbf) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Tbf) Type() string { + return "tbf" +} + +// Ingress is a qdisc for adding ingress filters +type Ingress struct { + QdiscAttrs +} + +func (qdisc *Ingress) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Ingress) Type() string { + return "ingress" +} + +// GenericQdisc qdiscs represent types that are not currently understood +// by this netlink library. +type GenericQdisc struct { + QdiscAttrs + QdiscType string +} + +func (qdisc *GenericQdisc) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *GenericQdisc) Type() string { + return qdisc.QdiscType +} diff --git a/vendor/src/github.com/vishvananda/netlink/qdisc_linux.go b/vendor/src/github.com/vishvananda/netlink/qdisc_linux.go new file mode 100644 index 0000000000..a16eb99b34 --- /dev/null +++ b/vendor/src/github.com/vishvananda/netlink/qdisc_linux.go @@ -0,0 +1,316 @@ +package netlink + +import ( + "fmt" + "io/ioutil" + "strconv" + "strings" + "syscall" + + "github.com/vishvananda/netlink/nl" +) + +// QdiscDel will delete a qdisc from the system. +// Equivalent to: `tc qdisc del $qdisc` +func QdiscDel(qdisc Qdisc) error { + req := nl.NewNetlinkRequest(syscall.RTM_DELQDISC, syscall.NLM_F_ACK) + base := qdisc.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + } + req.AddData(msg) + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// QdiscAdd will add a qdisc to the system. +// Equivalent to: `tc qdisc add $qdisc` +func QdiscAdd(qdisc Qdisc) error { + req := nl.NewNetlinkRequest(syscall.RTM_NEWQDISC, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + base := qdisc.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + } + req.AddData(msg) + req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type()))) + + options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) + if prio, ok := qdisc.(*Prio); ok { + tcmap := nl.TcPrioMap{ + Bands: int32(prio.Bands), + Priomap: prio.PriorityMap, + } + options = nl.NewRtAttr(nl.TCA_OPTIONS, tcmap.Serialize()) + } else if tbf, ok := qdisc.(*Tbf); ok { + opt := nl.TcTbfQopt{} + // TODO: handle rate > uint32 + opt.Rate.Rate = uint32(tbf.Rate) + opt.Limit = tbf.Limit + opt.Buffer = tbf.Buffer + nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize()) + } else if htb, ok := qdisc.(*Htb); ok { + opt := nl.TcHtbGlob{} + opt.Version = htb.Version + opt.Rate2Quantum = htb.Rate2Quantum + opt.Defcls = htb.Defcls + // TODO: Handle Debug properly. For now default to 0 + opt.Debug = htb.Debug + opt.DirectPkts = htb.DirectPkts + nl.NewRtAttrChild(options, nl.TCA_HTB_INIT, opt.Serialize()) + // nl.NewRtAttrChild(options, nl.TCA_HTB_DIRECT_QLEN, opt.Serialize()) + } else if _, ok := qdisc.(*Ingress); ok { + // ingress filters must use the proper handle + if msg.Parent != HANDLE_INGRESS { + return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS") + } + } + req.AddData(options) + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +// QdiscList gets a list of qdiscs in the system. +// Equivalent to: `tc qdisc show`. +// The list can be filtered by link. +func QdiscList(link Link) ([]Qdisc, error) { + req := nl.NewNetlinkRequest(syscall.RTM_GETQDISC, syscall.NLM_F_DUMP) + index := int32(0) + if link != nil { + base := link.Attrs() + ensureIndex(base) + index = int32(base.Index) + } + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: index, + } + req.AddData(msg) + + msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWQDISC) + if err != nil { + return nil, err + } + + var res []Qdisc + for _, m := range msgs { + msg := nl.DeserializeTcMsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + // skip qdiscs from other interfaces + if link != nil && msg.Ifindex != index { + continue + } + + base := QdiscAttrs{ + LinkIndex: int(msg.Ifindex), + Handle: msg.Handle, + Parent: msg.Parent, + Refcnt: msg.Info, + } + var qdisc Qdisc + qdiscType := "" + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.TCA_KIND: + qdiscType = string(attr.Value[:len(attr.Value)-1]) + switch qdiscType { + case "pfifo_fast": + qdisc = &PfifoFast{} + case "prio": + qdisc = &Prio{} + case "tbf": + qdisc = &Tbf{} + case "ingress": + qdisc = &Ingress{} + case "htb": + qdisc = &Htb{} + default: + qdisc = &GenericQdisc{QdiscType: qdiscType} + } + case nl.TCA_OPTIONS: + switch qdiscType { + case "pfifo_fast": + // pfifo returns TcPrioMap directly without wrapping it in rtattr + if err := parsePfifoFastData(qdisc, attr.Value); err != nil { + return nil, err + } + case "prio": + // prio returns TcPrioMap directly without wrapping it in rtattr + if err := parsePrioData(qdisc, attr.Value); err != nil { + return nil, err + } + case "tbf": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + if err := parseTbfData(qdisc, data); err != nil { + return nil, err + } + case "htb": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + if err := parseHtbData(qdisc, data); err != nil { + return nil, err + } + + // no options for ingress + } + } + } + *qdisc.Attrs() = base + res = append(res, qdisc) + } + + return res, nil +} + +func parsePfifoFastData(qdisc Qdisc, value []byte) error { + pfifo := qdisc.(*PfifoFast) + tcmap := nl.DeserializeTcPrioMap(value) + pfifo.PriorityMap = tcmap.Priomap + pfifo.Bands = uint8(tcmap.Bands) + return nil +} + +func parsePrioData(qdisc Qdisc, value []byte) error { + prio := qdisc.(*Prio) + tcmap := nl.DeserializeTcPrioMap(value) + prio.PriorityMap = tcmap.Priomap + prio.Bands = uint8(tcmap.Bands) + return nil +} + +func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { + native = nl.NativeEndian() + htb := qdisc.(*Htb) + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_HTB_INIT: + opt := nl.DeserializeTcHtbGlob(datum.Value) + htb.Version = opt.Version + htb.Rate2Quantum = opt.Rate2Quantum + htb.Defcls = opt.Defcls + htb.Debug = opt.Debug + htb.DirectPkts = opt.DirectPkts + case nl.TCA_HTB_DIRECT_QLEN: + // TODO + //htb.DirectQlen = native.uint32(datum.Value) + } + } + return nil +} +func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { + native = nl.NativeEndian() + tbf := qdisc.(*Tbf) + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_TBF_PARMS: + opt := nl.DeserializeTcTbfQopt(datum.Value) + tbf.Rate = uint64(opt.Rate.Rate) + tbf.Limit = opt.Limit + tbf.Buffer = opt.Buffer + case nl.TCA_TBF_RATE64: + tbf.Rate = native.Uint64(datum.Value[0:4]) + } + } + return nil +} + +const ( + TIME_UNITS_PER_SEC = 1000000 +) + +var ( + tickInUsec float64 = 0.0 + clockFactor float64 = 0.0 + hz float64 = 0.0 +) + +func initClock() { + data, err := ioutil.ReadFile("/proc/net/psched") + if err != nil { + return + } + parts := strings.Split(strings.TrimSpace(string(data)), " ") + if len(parts) < 3 { + return + } + var vals [3]uint64 + for i := range vals { + val, err := strconv.ParseUint(parts[i], 16, 32) + if err != nil { + return + } + vals[i] = val + } + // compatibility + if vals[2] == 1000000000 { + vals[0] = vals[1] + } + clockFactor = float64(vals[2]) / TIME_UNITS_PER_SEC + tickInUsec = float64(vals[0]) / float64(vals[1]) * clockFactor + hz = float64(vals[0]) +} + +func TickInUsec() float64 { + if tickInUsec == 0.0 { + initClock() + } + return tickInUsec +} + +func ClockFactor() float64 { + if clockFactor == 0.0 { + initClock() + } + return clockFactor +} + +func Hz() float64 { + if hz == 0.0 { + initClock() + } + return hz +} + +func time2Tick(time uint32) uint32 { + return uint32(float64(time) * TickInUsec()) +} + +func tick2Time(tick uint32) uint32 { + return uint32(float64(tick) / TickInUsec()) +} + +func time2Ktime(time uint32) uint32 { + return uint32(float64(time) * ClockFactor()) +} + +func ktime2Time(ktime uint32) uint32 { + return uint32(float64(ktime) / ClockFactor()) +} + +func burst(rate uint64, buffer uint32) uint32 { + return uint32(float64(rate) * float64(tick2Time(buffer)) / TIME_UNITS_PER_SEC) +} + +func latency(rate uint64, limit, buffer uint32) float64 { + return TIME_UNITS_PER_SEC*(float64(limit)/float64(rate)) - float64(tick2Time(buffer)) +} + +func Xmittime(rate uint64, size uint32) float64 { + return TickInUsec() * TIME_UNITS_PER_SEC * (float64(size) / float64(rate)) +} diff --git a/vendor/src/github.com/vishvananda/netlink/route.go b/vendor/src/github.com/vishvananda/netlink/route.go index 6218546f80..789d39f262 100644 --- a/vendor/src/github.com/vishvananda/netlink/route.go +++ b/vendor/src/github.com/vishvananda/netlink/route.go @@ -17,6 +17,13 @@ const ( SCOPE_NOWHERE Scope = syscall.RT_SCOPE_NOWHERE ) +type NextHopFlag int + +const ( + FLAG_ONLINK NextHopFlag = syscall.RTNH_F_ONLINK + FLAG_PERVASIVE NextHopFlag = syscall.RTNH_F_PERVASIVE +) + // Route represents a netlink route. A route is associated with a link, // has a destination network, an optional source ip, and optional // gateway. Advanced route parameters and non-main routing tables are @@ -27,9 +34,44 @@ type Route struct { Dst *net.IPNet Src net.IP Gw net.IP + Flags int } func (r Route) String() string { - return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s}", r.LinkIndex, r.Dst, - r.Src, r.Gw) + return fmt.Sprintf("{Ifindex: %d Dst: %s Src: %s Gw: %s Flags: %s}", r.LinkIndex, r.Dst, + r.Src, r.Gw, r.ListFlags()) +} + +func (r *Route) SetFlag(flag NextHopFlag) { + r.Flags |= int(flag) +} + +func (r *Route) ClearFlag(flag NextHopFlag) { + r.Flags &^= int(flag) +} + +type flagString struct { + f NextHopFlag + s string +} + +var testFlags = []flagString{ + flagString{f: FLAG_ONLINK, s: "onlink"}, + flagString{f: FLAG_PERVASIVE, s: "pervasive"}, +} + +func (r *Route) ListFlags() []string { + var flags []string + for _, tf := range testFlags { + if r.Flags&int(tf.f) != 0 { + flags = append(flags, tf.s) + } + } + return flags +} + +// RouteUpdate is sent when a route changes - type is RTM_NEWROUTE or RTM_DELROUTE +type RouteUpdate struct { + Type uint16 + Route } diff --git a/vendor/src/github.com/vishvananda/netlink/route_linux.go b/vendor/src/github.com/vishvananda/netlink/route_linux.go index 38908951fe..c8910e2332 100644 --- a/vendor/src/github.com/vishvananda/netlink/route_linux.go +++ b/vendor/src/github.com/vishvananda/netlink/route_linux.go @@ -17,7 +17,7 @@ func RouteAdd(route *Route) error { return routeHandle(route, req, nl.NewRtMsg()) } -// RouteAdd will delete a route from the system. +// RouteDel will delete a route from the system. // Equivalent to: `ip route del $route` func RouteDel(route *Route) error { req := nl.NewNetlinkRequest(syscall.RTM_DELROUTE, syscall.NLM_F_ACK) @@ -30,6 +30,7 @@ func routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error { } msg.Scope = uint8(route.Scope) + msg.Flags = uint32(route.Flags) family := -1 var rtAttrs []*nl.RtAttr @@ -117,7 +118,6 @@ func RouteList(link Link, family int) ([]Route, error) { index = base.Index } - native := nl.NativeEndian() var res []Route for _, m := range msgs { msg := nl.DeserializeRtMsg(m) @@ -132,31 +132,14 @@ func RouteList(link Link, family int) ([]Route, error) { continue } - attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + route, err := deserializeRoute(m) if err != nil { return nil, err } - route := Route{Scope: Scope(msg.Scope)} - for _, attr := range attrs { - switch attr.Attr.Type { - case syscall.RTA_GATEWAY: - route.Gw = net.IP(attr.Value) - case syscall.RTA_PREFSRC: - route.Src = net.IP(attr.Value) - case syscall.RTA_DST: - route.Dst = &net.IPNet{ - IP: attr.Value, - Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)), - } - case syscall.RTA_OIF: - routeIndex := int(native.Uint32(attr.Value[0:4])) - if link != nil && routeIndex != index { - // Ignore routes from other interfaces - continue - } - route.LinkIndex = routeIndex - } + if link != nil && route.LinkIndex != index { + // Ignore routes from other interfaces + continue } res = append(res, route) } @@ -164,6 +147,37 @@ func RouteList(link Link, family int) ([]Route, error) { return res, nil } +// deserializeRoute decodes a binary netlink message into a Route struct +func deserializeRoute(m []byte) (Route, error) { + route := Route{} + msg := nl.DeserializeRtMsg(m) + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return route, err + } + route.Scope = Scope(msg.Scope) + route.Flags = int(msg.Flags) + + native := nl.NativeEndian() + for _, attr := range attrs { + switch attr.Attr.Type { + case syscall.RTA_GATEWAY: + route.Gw = net.IP(attr.Value) + case syscall.RTA_PREFSRC: + route.Src = net.IP(attr.Value) + case syscall.RTA_DST: + route.Dst = &net.IPNet{ + IP: attr.Value, + Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)), + } + case syscall.RTA_OIF: + routeIndex := int(native.Uint32(attr.Value[0:4])) + route.LinkIndex = routeIndex + } + } + return route, nil +} + // RouteGet gets a route to a specific destination from the host system. // Equivalent to: 'ip route get'. func RouteGet(destination net.IP) ([]Route, error) { @@ -191,34 +205,47 @@ func RouteGet(destination net.IP) ([]Route, error) { return nil, err } - native := nl.NativeEndian() var res []Route for _, m := range msgs { - msg := nl.DeserializeRtMsg(m) - attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + route, err := deserializeRoute(m) if err != nil { return nil, err } - - route := Route{} - for _, attr := range attrs { - switch attr.Attr.Type { - case syscall.RTA_GATEWAY: - route.Gw = net.IP(attr.Value) - case syscall.RTA_PREFSRC: - route.Src = net.IP(attr.Value) - case syscall.RTA_DST: - route.Dst = &net.IPNet{ - IP: attr.Value, - Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)), - } - case syscall.RTA_OIF: - routeIndex := int(native.Uint32(attr.Value[0:4])) - route.LinkIndex = routeIndex - } - } res = append(res, route) } return res, nil } + +// RouteSubscribe takes a chan down which notifications will be sent +// when routes are added or deleted. Close the 'done' chan to stop subscription. +func RouteSubscribe(ch chan<- RouteUpdate, done <-chan struct{}) error { + s, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_ROUTE, syscall.RTNLGRP_IPV6_ROUTE) + if err != nil { + return err + } + if done != nil { + go func() { + <-done + s.Close() + }() + } + go func() { + defer close(ch) + for { + msgs, err := s.Receive() + if err != nil { + return + } + for _, m := range msgs { + route, err := deserializeRoute(m.Data) + if err != nil { + return + } + ch <- RouteUpdate{Type: m.Header.Type, Route: route} + } + } + }() + + return nil +}