123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565 |
- package overlay
- import (
- "bytes"
- "encoding/binary"
- "encoding/hex"
- "fmt"
- "hash/fnv"
- "net"
- "sync"
- "syscall"
- "strconv"
- "github.com/Sirupsen/logrus"
- "github.com/docker/libnetwork/iptables"
- "github.com/docker/libnetwork/ns"
- "github.com/docker/libnetwork/types"
- "github.com/vishvananda/netlink"
- )
- const (
- mark = uint32(0xD0C4E3)
- timeout = 30
- pktExpansion = 26 // SPI(4) + SeqN(4) + IV(8) + PadLength(1) + NextHeader(1) + ICV(8)
- )
- const (
- forward = iota + 1
- reverse
- bidir
- )
- type key struct {
- value []byte
- tag uint32
- }
- func (k *key) String() string {
- if k != nil {
- return fmt.Sprintf("(key: %s, tag: 0x%x)", hex.EncodeToString(k.value)[0:5], k.tag)
- }
- return ""
- }
- type spi struct {
- forward int
- reverse int
- }
- func (s *spi) String() string {
- return fmt.Sprintf("SPI(FWD: 0x%x, REV: 0x%x)", uint32(s.forward), uint32(s.reverse))
- }
- type encrMap struct {
- nodes map[string][]*spi
- sync.Mutex
- }
- func (e *encrMap) String() string {
- e.Lock()
- defer e.Unlock()
- b := new(bytes.Buffer)
- for k, v := range e.nodes {
- b.WriteString("\n")
- b.WriteString(k)
- b.WriteString(":")
- b.WriteString("[")
- for _, s := range v {
- b.WriteString(s.String())
- b.WriteString(",")
- }
- b.WriteString("]")
- }
- return b.String()
- }
- func (d *driver) checkEncryption(nid string, rIP net.IP, vxlanID uint32, isLocal, add bool) error {
- logrus.Debugf("checkEncryption(%s, %v, %d, %t)", nid[0:7], rIP, vxlanID, isLocal)
- n := d.network(nid)
- if n == nil || !n.secure {
- return nil
- }
- if len(d.keys) == 0 {
- return types.ForbiddenErrorf("encryption key is not present")
- }
- lIP := net.ParseIP(d.bindAddress)
- aIP := net.ParseIP(d.advertiseAddress)
- nodes := map[string]net.IP{}
- switch {
- case isLocal:
- if err := d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
- if !aIP.Equal(pEntry.vtep) {
- nodes[pEntry.vtep.String()] = pEntry.vtep
- }
- return false
- }); err != nil {
- logrus.Warnf("Failed to retrieve list of participating nodes in overlay network %s: %v", nid[0:5], err)
- }
- default:
- if len(d.network(nid).endpoints) > 0 {
- nodes[rIP.String()] = rIP
- }
- }
- logrus.Debugf("List of nodes: %s", nodes)
- if add {
- for _, rIP := range nodes {
- if err := setupEncryption(lIP, aIP, rIP, vxlanID, d.secMap, d.keys); err != nil {
- logrus.Warnf("Failed to program network encryption between %s and %s: %v", lIP, rIP, err)
- }
- }
- } else {
- if len(nodes) == 0 {
- if err := removeEncryption(lIP, rIP, d.secMap); err != nil {
- logrus.Warnf("Failed to remove network encryption between %s and %s: %v", lIP, rIP, err)
- }
- }
- }
- return nil
- }
- func setupEncryption(localIP, advIP, remoteIP net.IP, vni uint32, em *encrMap, keys []*key) error {
- logrus.Debugf("Programming encryption for vxlan %d between %s and %s", vni, localIP, remoteIP)
- rIPs := remoteIP.String()
- indices := make([]*spi, 0, len(keys))
- err := programMangle(vni, true)
- if err != nil {
- logrus.Warn(err)
- }
- for i, k := range keys {
- spis := &spi{buildSPI(advIP, remoteIP, k.tag), buildSPI(remoteIP, advIP, k.tag)}
- dir := reverse
- if i == 0 {
- dir = bidir
- }
- fSA, rSA, err := programSA(localIP, remoteIP, spis, k, dir, true)
- if err != nil {
- logrus.Warn(err)
- }
- indices = append(indices, spis)
- if i != 0 {
- continue
- }
- err = programSP(fSA, rSA, true)
- if err != nil {
- logrus.Warn(err)
- }
- }
- em.Lock()
- em.nodes[rIPs] = indices
- em.Unlock()
- return nil
- }
- func removeEncryption(localIP, remoteIP net.IP, em *encrMap) error {
- em.Lock()
- indices, ok := em.nodes[remoteIP.String()]
- em.Unlock()
- if !ok {
- return nil
- }
- for i, idxs := range indices {
- dir := reverse
- if i == 0 {
- dir = bidir
- }
- fSA, rSA, err := programSA(localIP, remoteIP, idxs, nil, dir, false)
- if err != nil {
- logrus.Warn(err)
- }
- if i != 0 {
- continue
- }
- err = programSP(fSA, rSA, false)
- if err != nil {
- logrus.Warn(err)
- }
- }
- return nil
- }
- func programMangle(vni uint32, add bool) (err error) {
- var (
- p = strconv.FormatUint(uint64(vxlanPort), 10)
- c = fmt.Sprintf("0>>22&0x3C@12&0xFFFFFF00=%d", int(vni)<<8)
- m = strconv.FormatUint(uint64(mark), 10)
- chain = "OUTPUT"
- rule = []string{"-p", "udp", "--dport", p, "-m", "u32", "--u32", c, "-j", "MARK", "--set-mark", m}
- a = "-A"
- action = "install"
- )
- if add == iptables.Exists(iptables.Mangle, chain, rule...) {
- return
- }
- if !add {
- a = "-D"
- action = "remove"
- }
- if err = iptables.RawCombinedOutput(append([]string{"-t", string(iptables.Mangle), a, chain}, rule...)...); err != nil {
- logrus.Warnf("could not %s mangle rule: %v", action, err)
- }
- return
- }
- func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (fSA *netlink.XfrmState, rSA *netlink.XfrmState, err error) {
- var (
- action = "Removing"
- xfrmProgram = ns.NlHandle().XfrmStateDel
- )
- if add {
- action = "Adding"
- xfrmProgram = ns.NlHandle().XfrmStateAdd
- }
- if dir&reverse > 0 {
- rSA = &netlink.XfrmState{
- Src: remoteIP,
- Dst: localIP,
- Proto: netlink.XFRM_PROTO_ESP,
- Spi: spi.reverse,
- Mode: netlink.XFRM_MODE_TRANSPORT,
- }
- if add {
- rSA.Aead = buildAeadAlgo(k, spi.reverse)
- }
- exists, err := saExists(rSA)
- if err != nil {
- exists = !add
- }
- if add != exists {
- logrus.Debugf("%s: rSA{%s}", action, rSA)
- if err := xfrmProgram(rSA); err != nil {
- logrus.Warnf("Failed %s rSA{%s}: %v", action, rSA, err)
- }
- }
- }
- if dir&forward > 0 {
- fSA = &netlink.XfrmState{
- Src: localIP,
- Dst: remoteIP,
- Proto: netlink.XFRM_PROTO_ESP,
- Spi: spi.forward,
- Mode: netlink.XFRM_MODE_TRANSPORT,
- }
- if add {
- fSA.Aead = buildAeadAlgo(k, spi.forward)
- }
- exists, err := saExists(fSA)
- if err != nil {
- exists = !add
- }
- if add != exists {
- logrus.Debugf("%s fSA{%s}", action, fSA)
- if err := xfrmProgram(fSA); err != nil {
- logrus.Warnf("Failed %s fSA{%s}: %v.", action, fSA, err)
- }
- }
- }
- return
- }
- func programSP(fSA *netlink.XfrmState, rSA *netlink.XfrmState, add bool) error {
- action := "Removing"
- xfrmProgram := ns.NlHandle().XfrmPolicyDel
- if add {
- action = "Adding"
- xfrmProgram = ns.NlHandle().XfrmPolicyAdd
- }
- fullMask := net.CIDRMask(8*len(fSA.Src), 8*len(fSA.Src))
- fPol := &netlink.XfrmPolicy{
- Src: &net.IPNet{IP: fSA.Src, Mask: fullMask},
- Dst: &net.IPNet{IP: fSA.Dst, Mask: fullMask},
- Dir: netlink.XFRM_DIR_OUT,
- Proto: 17,
- DstPort: 4789,
- Mark: &netlink.XfrmMark{
- Value: mark,
- },
- Tmpls: []netlink.XfrmPolicyTmpl{
- {
- Src: fSA.Src,
- Dst: fSA.Dst,
- Proto: netlink.XFRM_PROTO_ESP,
- Mode: netlink.XFRM_MODE_TRANSPORT,
- Spi: fSA.Spi,
- },
- },
- }
- exists, err := spExists(fPol)
- if err != nil {
- exists = !add
- }
- if add != exists {
- logrus.Debugf("%s fSP{%s}", action, fPol)
- if err := xfrmProgram(fPol); err != nil {
- logrus.Warnf("%s fSP{%s}: %v", action, fPol, err)
- }
- }
- return nil
- }
- func saExists(sa *netlink.XfrmState) (bool, error) {
- _, err := ns.NlHandle().XfrmStateGet(sa)
- switch err {
- case nil:
- return true, nil
- case syscall.ESRCH:
- return false, nil
- default:
- err = fmt.Errorf("Error while checking for SA existence: %v", err)
- logrus.Warn(err)
- return false, err
- }
- }
- func spExists(sp *netlink.XfrmPolicy) (bool, error) {
- _, err := ns.NlHandle().XfrmPolicyGet(sp)
- switch err {
- case nil:
- return true, nil
- case syscall.ENOENT:
- return false, nil
- default:
- err = fmt.Errorf("Error while checking for SP existence: %v", err)
- logrus.Warn(err)
- return false, err
- }
- }
- func buildSPI(src, dst net.IP, st uint32) int {
- b := make([]byte, 4)
- binary.BigEndian.PutUint32(b, st)
- h := fnv.New32a()
- h.Write(src)
- h.Write(b)
- h.Write(dst)
- return int(binary.BigEndian.Uint32(h.Sum(nil)))
- }
- func buildAeadAlgo(k *key, s int) *netlink.XfrmStateAlgo {
- salt := make([]byte, 4)
- binary.BigEndian.PutUint32(salt, uint32(s))
- return &netlink.XfrmStateAlgo{
- Name: "rfc4106(gcm(aes))",
- Key: append(k.value, salt...),
- ICVLen: 64,
- }
- }
- func (d *driver) secMapWalk(f func(string, []*spi) ([]*spi, bool)) error {
- d.secMap.Lock()
- for node, indices := range d.secMap.nodes {
- idxs, stop := f(node, indices)
- if idxs != nil {
- d.secMap.nodes[node] = idxs
- }
- if stop {
- break
- }
- }
- d.secMap.Unlock()
- return nil
- }
- func (d *driver) setKeys(keys []*key) error {
- // Accept the encryption keys and clear any stale encryption map
- d.Lock()
- d.keys = keys
- d.secMap = &encrMap{nodes: map[string][]*spi{}}
- d.Unlock()
- logrus.Debugf("Initial encryption keys: %v", d.keys)
- return nil
- }
- // updateKeys allows to add a new key and/or change the primary key and/or prune an existing key
- // The primary key is the key used in transmission and will go in first position in the list.
- func (d *driver) updateKeys(newKey, primary, pruneKey *key) error {
- logrus.Debugf("Updating Keys. New: %v, Primary: %v, Pruned: %v", newKey, primary, pruneKey)
- logrus.Debugf("Current: %v", d.keys)
- var (
- newIdx = -1
- priIdx = -1
- delIdx = -1
- lIP = net.ParseIP(d.bindAddress)
- aIP = net.ParseIP(d.advertiseAddress)
- )
- d.Lock()
- // add new
- if newKey != nil {
- d.keys = append(d.keys, newKey)
- newIdx += len(d.keys)
- }
- for i, k := range d.keys {
- if primary != nil && k.tag == primary.tag {
- priIdx = i
- }
- if pruneKey != nil && k.tag == pruneKey.tag {
- delIdx = i
- }
- }
- d.Unlock()
- if (newKey != nil && newIdx == -1) ||
- (primary != nil && priIdx == -1) ||
- (pruneKey != nil && delIdx == -1) {
- return types.BadRequestErrorf("cannot find proper key indices while processing key update:"+
- "(newIdx,priIdx,delIdx):(%d, %d, %d)", newIdx, priIdx, delIdx)
- }
- d.secMapWalk(func(rIPs string, spis []*spi) ([]*spi, bool) {
- rIP := net.ParseIP(rIPs)
- return updateNodeKey(lIP, aIP, rIP, spis, d.keys, newIdx, priIdx, delIdx), false
- })
- d.Lock()
- // swap primary
- if priIdx != -1 {
- swp := d.keys[0]
- d.keys[0] = d.keys[priIdx]
- d.keys[priIdx] = swp
- }
- // prune
- if delIdx != -1 {
- if delIdx == 0 {
- delIdx = priIdx
- }
- d.keys = append(d.keys[:delIdx], d.keys[delIdx+1:]...)
- }
- d.Unlock()
- logrus.Debugf("Updated: %v", d.keys)
- return nil
- }
- /********************************************************
- * Steady state: rSA0, rSA1, rSA2, fSA1, fSP1
- * Rotation --> -rSA0, +rSA3, +fSA2, +fSP2/-fSP1, -fSA1
- * Steady state: rSA1, rSA2, rSA3, fSA2, fSP2
- *********************************************************/
- // Spis and keys are sorted in such away the one in position 0 is the primary
- func updateNodeKey(lIP, aIP, rIP net.IP, idxs []*spi, curKeys []*key, newIdx, priIdx, delIdx int) []*spi {
- logrus.Debugf("Updating keys for node: %s (%d,%d,%d)", rIP, newIdx, priIdx, delIdx)
- spis := idxs
- logrus.Debugf("Current: %v", spis)
- // add new
- if newIdx != -1 {
- spis = append(spis, &spi{
- forward: buildSPI(aIP, rIP, curKeys[newIdx].tag),
- reverse: buildSPI(rIP, aIP, curKeys[newIdx].tag),
- })
- }
- if delIdx != -1 {
- // -rSA0
- programSA(lIP, rIP, spis[delIdx], nil, reverse, false)
- }
- if newIdx > -1 {
- // +RSA2
- programSA(lIP, rIP, spis[newIdx], curKeys[newIdx], reverse, true)
- }
- if priIdx > 0 {
- // +fSA2
- fSA2, _, _ := programSA(lIP, rIP, spis[priIdx], curKeys[priIdx], forward, true)
- // +fSP2, -fSP1
- fullMask := net.CIDRMask(8*len(fSA2.Src), 8*len(fSA2.Src))
- fSP1 := &netlink.XfrmPolicy{
- Src: &net.IPNet{IP: fSA2.Src, Mask: fullMask},
- Dst: &net.IPNet{IP: fSA2.Dst, Mask: fullMask},
- Dir: netlink.XFRM_DIR_OUT,
- Proto: 17,
- DstPort: 4789,
- Mark: &netlink.XfrmMark{
- Value: mark,
- },
- Tmpls: []netlink.XfrmPolicyTmpl{
- {
- Src: fSA2.Src,
- Dst: fSA2.Dst,
- Proto: netlink.XFRM_PROTO_ESP,
- Mode: netlink.XFRM_MODE_TRANSPORT,
- Spi: fSA2.Spi,
- },
- },
- }
- logrus.Debugf("Updating fSP{%s}", fSP1)
- if err := ns.NlHandle().XfrmPolicyUpdate(fSP1); err != nil {
- logrus.Warnf("Failed to update fSP{%s}: %v", fSP1, err)
- }
- // -fSA1
- programSA(lIP, rIP, spis[0], nil, forward, false)
- }
- // swap
- if priIdx > 0 {
- swp := spis[0]
- spis[0] = spis[priIdx]
- spis[priIdx] = swp
- }
- // prune
- if delIdx != -1 {
- if delIdx == 0 {
- delIdx = priIdx
- }
- spis = append(spis[:delIdx], spis[delIdx+1:]...)
- }
- logrus.Debugf("Updated: %v", spis)
- return spis
- }
- func (n *network) maxMTU() int {
- mtu := 1500
- if n.mtu != 0 {
- mtu = n.mtu
- }
- mtu -= vxlanEncap
- if n.secure {
- // In case of encryption account for the
- // esp packet espansion and padding
- mtu -= pktExpansion
- mtu -= (mtu % 4)
- }
- return mtu
- }
|