123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664 |
- package overlay
- import (
- "encoding/json"
- "fmt"
- "net"
- "os"
- "path/filepath"
- "strings"
- "sync"
- "syscall"
- "github.com/Sirupsen/logrus"
- "github.com/docker/libnetwork/datastore"
- "github.com/docker/libnetwork/driverapi"
- "github.com/docker/libnetwork/netutils"
- "github.com/docker/libnetwork/osl"
- "github.com/docker/libnetwork/resolvconf"
- "github.com/docker/libnetwork/types"
- "github.com/vishvananda/netlink"
- "github.com/vishvananda/netlink/nl"
- )
- var (
- hostMode bool
- hostModeOnce sync.Once
- )
- type networkTable map[string]*network
- type subnet struct {
- once *sync.Once
- vxlanName string
- brName string
- vni uint32
- initErr error
- subnetIP *net.IPNet
- gwIP *net.IPNet
- }
- type subnetJSON struct {
- SubnetIP string
- GwIP string
- Vni uint32
- }
- type network struct {
- id string
- dbIndex uint64
- dbExists bool
- sbox osl.Sandbox
- endpoints endpointTable
- driver *driver
- joinCnt int
- once *sync.Once
- initEpoch int
- initErr error
- subnets []*subnet
- sync.Mutex
- }
- func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error {
- if id == "" {
- return fmt.Errorf("invalid network id")
- }
- if len(ipV4Data) == 0 || ipV4Data[0].Pool.String() == "0.0.0.0/0" {
- return types.BadRequestErrorf("ipv4 pool is empty")
- }
- // Since we perform lazy configuration make sure we try
- // configuring the driver when we enter CreateNetwork
- if err := d.configure(); err != nil {
- return err
- }
- n := &network{
- id: id,
- driver: d,
- endpoints: endpointTable{},
- once: &sync.Once{},
- subnets: []*subnet{},
- }
- for _, ipd := range ipV4Data {
- s := &subnet{
- subnetIP: ipd.Pool,
- gwIP: ipd.Gateway,
- once: &sync.Once{},
- }
- n.subnets = append(n.subnets, s)
- }
- if err := n.writeToStore(); err != nil {
- return fmt.Errorf("failed to update data store for network %v: %v", n.id, err)
- }
- d.addNetwork(n)
- return nil
- }
- func (d *driver) DeleteNetwork(nid string) error {
- if nid == "" {
- return fmt.Errorf("invalid network id")
- }
- n := d.network(nid)
- if n == nil {
- return fmt.Errorf("could not find network with id %s", nid)
- }
- d.deleteNetwork(nid)
- return n.releaseVxlanID()
- }
- func (n *network) incEndpointCount() {
- n.Lock()
- defer n.Unlock()
- n.joinCnt++
- }
- func (n *network) joinSandbox() error {
- // If there is a race between two go routines here only one will win
- // the other will wait.
- n.once.Do(func() {
- // save the error status of initSandbox in n.initErr so that
- // all the racing go routines are able to know the status.
- n.initErr = n.initSandbox()
- })
- return n.initErr
- }
- func (n *network) joinSubnetSandbox(s *subnet) error {
- s.once.Do(func() {
- s.initErr = n.initSubnetSandbox(s)
- })
- return s.initErr
- }
- func (n *network) leaveSandbox() {
- n.Lock()
- n.joinCnt--
- if n.joinCnt != 0 {
- n.Unlock()
- return
- }
- // We are about to destroy sandbox since the container is leaving the network
- // Reinitialize the once variable so that we will be able to trigger one time
- // sandbox initialization(again) when another container joins subsequently.
- n.once = &sync.Once{}
- for _, s := range n.subnets {
- s.once = &sync.Once{}
- }
- n.Unlock()
- n.destroySandbox()
- }
- func (n *network) destroySandbox() {
- sbox := n.sandbox()
- if sbox != nil {
- for _, iface := range sbox.Info().Interfaces() {
- if err := iface.Remove(); err != nil {
- logrus.Debugf("Remove interface %s failed: %v", iface.SrcName(), err)
- }
- }
- for _, s := range n.subnets {
- if hostMode {
- if err := removeFilters(n.id[:12], s.brName); err != nil {
- logrus.Warnf("Could not remove overlay filters: %v", err)
- }
- }
- if s.vxlanName != "" {
- err := deleteInterface(s.vxlanName)
- if err != nil {
- logrus.Warnf("could not cleanup sandbox properly: %v", err)
- }
- }
- }
- if hostMode {
- if err := removeNetworkChain(n.id[:12]); err != nil {
- logrus.Warnf("could not remove network chain: %v", err)
- }
- }
- sbox.Destroy()
- n.setSandbox(nil)
- }
- }
- func setHostMode() {
- if os.Getenv("_OVERLAY_HOST_MODE") != "" {
- hostMode = true
- return
- }
- err := createVxlan("testvxlan", 1)
- if err != nil {
- logrus.Errorf("Failed to create testvxlan interface: %v", err)
- return
- }
- defer deleteInterface("testvxlan")
- path := "/proc/self/ns/net"
- f, err := os.OpenFile(path, os.O_RDONLY, 0)
- if err != nil {
- logrus.Errorf("Failed to open path %s for network namespace for setting host mode: %v", path, err)
- return
- }
- defer f.Close()
- nsFD := f.Fd()
- iface, err := netlink.LinkByName("testvxlan")
- if err != nil {
- logrus.Errorf("Failed to get link testvxlan: %v", err)
- return
- }
- // If we are not able to move the vxlan interface to a namespace
- // then fallback to host mode
- if err := netlink.LinkSetNsFd(iface, int(nsFD)); err != nil {
- hostMode = true
- }
- }
- func (n *network) generateVxlanName(s *subnet) string {
- return "vx-" + fmt.Sprintf("%06x", n.vxlanID(s)) + "-" + n.id[:5]
- }
- func (n *network) generateBridgeName(s *subnet) string {
- return "ov-" + fmt.Sprintf("%06x", n.vxlanID(s)) + "-" + n.id[:5]
- }
- func isOverlap(nw *net.IPNet) bool {
- var nameservers []string
- if rc, err := resolvconf.Get(); err == nil {
- nameservers = resolvconf.GetNameserversAsCIDR(rc.Content)
- }
- if err := netutils.CheckNameserverOverlaps(nameservers, nw); err != nil {
- return true
- }
- if err := netutils.CheckRouteOverlaps(nw); err != nil {
- return true
- }
- return false
- }
- func (n *network) initSubnetSandbox(s *subnet) error {
- brName := n.generateBridgeName(s)
- vxlanName := n.generateVxlanName(s)
- if hostMode {
- // Try to delete stale bridge interface if it exists
- deleteInterface(brName)
- // Try to delete the vxlan interface by vni if already present
- deleteVxlanByVNI(n.vxlanID(s))
- if isOverlap(s.subnetIP) {
- return fmt.Errorf("overlay subnet %s has conflicts in the host while running in host mode", s.subnetIP.String())
- }
- }
- // create a bridge and vxlan device for this subnet and move it to the sandbox
- sbox := n.sandbox()
- if err := sbox.AddInterface(brName, "br",
- sbox.InterfaceOptions().Address(s.gwIP),
- sbox.InterfaceOptions().Bridge(true)); err != nil {
- return fmt.Errorf("bridge creation in sandbox failed for subnet %q: %v", s.subnetIP.String(), err)
- }
- err := createVxlan(vxlanName, n.vxlanID(s))
- if err != nil {
- return err
- }
- if err := sbox.AddInterface(vxlanName, "vxlan",
- sbox.InterfaceOptions().Master(brName)); err != nil {
- return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err)
- }
- if hostMode {
- if err := addFilters(n.id[:12], brName); err != nil {
- return err
- }
- }
- n.Lock()
- s.vxlanName = vxlanName
- s.brName = brName
- n.Unlock()
- return nil
- }
- func (n *network) cleanupStaleSandboxes() {
- filepath.Walk(filepath.Dir(osl.GenerateKey("walk")),
- func(path string, info os.FileInfo, err error) error {
- _, fname := filepath.Split(path)
- pList := strings.Split(fname, "-")
- if len(pList) <= 1 {
- return nil
- }
- pattern := pList[1]
- if strings.Contains(n.id, pattern) {
- syscall.Unmount(path, syscall.MNT_DETACH)
- os.Remove(path)
- }
- return nil
- })
- }
- func (n *network) initSandbox() error {
- n.Lock()
- n.initEpoch++
- n.Unlock()
- hostModeOnce.Do(setHostMode)
- if hostMode {
- if err := addNetworkChain(n.id[:12]); err != nil {
- return err
- }
- }
- // If there are any stale sandboxes related to this network
- // from previous daemon life clean it up here
- n.cleanupStaleSandboxes()
- sbox, err := osl.NewSandbox(
- osl.GenerateKey(fmt.Sprintf("%d-", n.initEpoch)+n.id), !hostMode)
- if err != nil {
- return fmt.Errorf("could not create network sandbox: %v", err)
- }
- n.setSandbox(sbox)
- n.driver.peerDbUpdateSandbox(n.id)
- var nlSock *nl.NetlinkSocket
- sbox.InvokeFunc(func() {
- nlSock, err = nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_NEIGH)
- if err != nil {
- err = fmt.Errorf("failed to subscribe to neighbor group netlink messages")
- }
- })
- go n.watchMiss(nlSock)
- return nil
- }
- func (n *network) watchMiss(nlSock *nl.NetlinkSocket) {
- for {
- msgs, err := nlSock.Receive()
- if err != nil {
- logrus.Errorf("Failed to receive from netlink: %v ", err)
- continue
- }
- for _, msg := range msgs {
- if msg.Header.Type != syscall.RTM_GETNEIGH && msg.Header.Type != syscall.RTM_NEWNEIGH {
- continue
- }
- neigh, err := netlink.NeighDeserialize(msg.Data)
- if err != nil {
- logrus.Errorf("Failed to deserialize netlink ndmsg: %v", err)
- continue
- }
- if neigh.IP.To16() != nil {
- continue
- }
- if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 {
- continue
- }
- mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, neigh.IP)
- if err != nil {
- logrus.Errorf("could not resolve peer %q: %v", neigh.IP, err)
- continue
- }
- if err := n.driver.peerAdd(n.id, "dummy", neigh.IP, IPmask, mac, vtep, true); err != nil {
- logrus.Errorf("could not add neighbor entry for missed peer %q: %v", neigh.IP, err)
- }
- }
- }
- }
- func (d *driver) addNetwork(n *network) {
- d.Lock()
- d.networks[n.id] = n
- d.Unlock()
- }
- func (d *driver) deleteNetwork(nid string) {
- d.Lock()
- delete(d.networks, nid)
- d.Unlock()
- }
- func (d *driver) network(nid string) *network {
- d.Lock()
- networks := d.networks
- d.Unlock()
- n, ok := networks[nid]
- if !ok {
- n = d.getNetworkFromStore(nid)
- if n != nil {
- n.driver = d
- n.endpoints = endpointTable{}
- n.once = &sync.Once{}
- networks[nid] = n
- }
- }
- return n
- }
- func (d *driver) getNetworkFromStore(nid string) *network {
- if d.store == nil {
- return nil
- }
- n := &network{id: nid}
- if err := d.store.GetObject(datastore.Key(n.Key()...), n); err != nil {
- return nil
- }
- return n
- }
- func (n *network) sandbox() osl.Sandbox {
- n.Lock()
- defer n.Unlock()
- return n.sbox
- }
- func (n *network) setSandbox(sbox osl.Sandbox) {
- n.Lock()
- n.sbox = sbox
- n.Unlock()
- }
- func (n *network) vxlanID(s *subnet) uint32 {
- n.Lock()
- defer n.Unlock()
- return s.vni
- }
- func (n *network) setVxlanID(s *subnet, vni uint32) {
- n.Lock()
- s.vni = vni
- n.Unlock()
- }
- func (n *network) Key() []string {
- return []string{"overlay", "network", n.id}
- }
- func (n *network) KeyPrefix() []string {
- return []string{"overlay", "network"}
- }
- func (n *network) Value() []byte {
- netJSON := []*subnetJSON{}
- for _, s := range n.subnets {
- sj := &subnetJSON{
- SubnetIP: s.subnetIP.String(),
- GwIP: s.gwIP.String(),
- Vni: s.vni,
- }
- netJSON = append(netJSON, sj)
- }
- b, err := json.Marshal(netJSON)
- if err != nil {
- return []byte{}
- }
- return b
- }
- func (n *network) Index() uint64 {
- return n.dbIndex
- }
- func (n *network) SetIndex(index uint64) {
- n.dbIndex = index
- n.dbExists = true
- }
- func (n *network) Exists() bool {
- return n.dbExists
- }
- func (n *network) Skip() bool {
- return false
- }
- func (n *network) SetValue(value []byte) error {
- var newNet bool
- netJSON := []*subnetJSON{}
- err := json.Unmarshal(value, &netJSON)
- if err != nil {
- return err
- }
- if len(n.subnets) == 0 {
- newNet = true
- }
- for _, sj := range netJSON {
- subnetIPstr := sj.SubnetIP
- gwIPstr := sj.GwIP
- vni := sj.Vni
- subnetIP, _ := types.ParseCIDR(subnetIPstr)
- gwIP, _ := types.ParseCIDR(gwIPstr)
- if newNet {
- s := &subnet{
- subnetIP: subnetIP,
- gwIP: gwIP,
- vni: vni,
- once: &sync.Once{},
- }
- n.subnets = append(n.subnets, s)
- } else {
- sNet := n.getMatchingSubnet(subnetIP)
- if sNet != nil {
- sNet.vni = vni
- }
- }
- }
- return nil
- }
- func (n *network) DataScope() string {
- return datastore.GlobalScope
- }
- func (n *network) writeToStore() error {
- return n.driver.store.PutObjectAtomic(n)
- }
- func (n *network) releaseVxlanID() error {
- if n.driver.store == nil {
- return fmt.Errorf("no datastore configured. cannot release vxlan id")
- }
- if len(n.subnets) == 0 {
- return nil
- }
- if err := n.driver.store.DeleteObjectAtomic(n); err != nil {
- if err == datastore.ErrKeyModified || err == datastore.ErrKeyNotFound {
- // In both the above cases we can safely assume that the key has been removed by some other
- // instance and so simply get out of here
- return nil
- }
- return fmt.Errorf("failed to delete network to vxlan id map: %v", err)
- }
- for _, s := range n.subnets {
- n.driver.vxlanIdm.Release(uint64(n.vxlanID(s)))
- n.setVxlanID(s, 0)
- }
- return nil
- }
- func (n *network) obtainVxlanID(s *subnet) error {
- //return if the subnet already has a vxlan id assigned
- if s.vni != 0 {
- return nil
- }
- if n.driver.store == nil {
- return fmt.Errorf("no datastore configured. cannot obtain vxlan id")
- }
- for {
- if err := n.driver.store.GetObject(datastore.Key(n.Key()...), n); err != nil {
- return fmt.Errorf("getting network %q from datastore failed %v", n.id, err)
- }
- if s.vni == 0 {
- vxlanID, err := n.driver.vxlanIdm.GetID()
- if err != nil {
- return fmt.Errorf("failed to allocate vxlan id: %v", err)
- }
- n.setVxlanID(s, uint32(vxlanID))
- if err := n.writeToStore(); err != nil {
- n.driver.vxlanIdm.Release(uint64(n.vxlanID(s)))
- n.setVxlanID(s, 0)
- if err == datastore.ErrKeyModified {
- continue
- }
- return fmt.Errorf("network %q failed to update data store: %v", n.id, err)
- }
- return nil
- }
- return nil
- }
- }
- // getSubnetforIP returns the subnet to which the given IP belongs
- func (n *network) getSubnetforIP(ip *net.IPNet) *subnet {
- for _, s := range n.subnets {
- // first check if the mask lengths are the same
- i, _ := s.subnetIP.Mask.Size()
- j, _ := ip.Mask.Size()
- if i != j {
- continue
- }
- if s.subnetIP.Contains(ip.IP) {
- return s
- }
- }
- return nil
- }
- // getMatchingSubnet return the network's subnet that matches the input
- func (n *network) getMatchingSubnet(ip *net.IPNet) *subnet {
- if ip == nil {
- return nil
- }
- for _, s := range n.subnets {
- // first check if the mask lengths are the same
- i, _ := s.subnetIP.Mask.Size()
- j, _ := ip.Mask.Size()
- if i != j {
- continue
- }
- if s.subnetIP.IP.Equal(ip.IP) {
- return s
- }
- }
- return nil
- }
|