encryption.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646
  1. package overlay
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "encoding/hex"
  6. "fmt"
  7. "hash/fnv"
  8. "net"
  9. "sync"
  10. "syscall"
  11. "strconv"
  12. "github.com/docker/libnetwork/drivers/overlay/overlayutils"
  13. "github.com/docker/libnetwork/iptables"
  14. "github.com/docker/libnetwork/ns"
  15. "github.com/docker/libnetwork/types"
  16. "github.com/sirupsen/logrus"
  17. "github.com/vishvananda/netlink"
  18. )
  19. const (
  20. r = 0xD0C4E3
  21. pktExpansion = 26 // SPI(4) + SeqN(4) + IV(8) + PadLength(1) + NextHeader(1) + ICV(8)
  22. )
  23. const (
  24. forward = iota + 1
  25. reverse
  26. bidir
  27. )
  28. var spMark = netlink.XfrmMark{Value: uint32(r), Mask: 0xffffffff}
  29. type key struct {
  30. value []byte
  31. tag uint32
  32. }
  33. func (k *key) String() string {
  34. if k != nil {
  35. return fmt.Sprintf("(key: %s, tag: 0x%x)", hex.EncodeToString(k.value)[0:5], k.tag)
  36. }
  37. return ""
  38. }
  39. type spi struct {
  40. forward int
  41. reverse int
  42. }
  43. func (s *spi) String() string {
  44. return fmt.Sprintf("SPI(FWD: 0x%x, REV: 0x%x)", uint32(s.forward), uint32(s.reverse))
  45. }
  46. type encrMap struct {
  47. nodes map[string][]*spi
  48. sync.Mutex
  49. }
  50. func (e *encrMap) String() string {
  51. e.Lock()
  52. defer e.Unlock()
  53. b := new(bytes.Buffer)
  54. for k, v := range e.nodes {
  55. b.WriteString("\n")
  56. b.WriteString(k)
  57. b.WriteString(":")
  58. b.WriteString("[")
  59. for _, s := range v {
  60. b.WriteString(s.String())
  61. b.WriteString(",")
  62. }
  63. b.WriteString("]")
  64. }
  65. return b.String()
  66. }
  67. func (d *driver) checkEncryption(nid string, rIP net.IP, vxlanID uint32, isLocal, add bool) error {
  68. logrus.Debugf("checkEncryption(%.7s, %v, %d, %t)", nid, rIP, vxlanID, isLocal)
  69. n := d.network(nid)
  70. if n == nil || !n.secure {
  71. return nil
  72. }
  73. if len(d.keys) == 0 {
  74. return types.ForbiddenErrorf("encryption key is not present")
  75. }
  76. lIP := net.ParseIP(d.bindAddress)
  77. aIP := net.ParseIP(d.advertiseAddress)
  78. nodes := map[string]net.IP{}
  79. switch {
  80. case isLocal:
  81. if err := d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
  82. if !aIP.Equal(pEntry.vtep) {
  83. nodes[pEntry.vtep.String()] = pEntry.vtep
  84. }
  85. return false
  86. }); err != nil {
  87. logrus.Warnf("Failed to retrieve list of participating nodes in overlay network %.5s: %v", nid, err)
  88. }
  89. default:
  90. if len(d.network(nid).endpoints) > 0 {
  91. nodes[rIP.String()] = rIP
  92. }
  93. }
  94. logrus.Debugf("List of nodes: %s", nodes)
  95. if add {
  96. for _, rIP := range nodes {
  97. if err := setupEncryption(lIP, aIP, rIP, vxlanID, d.secMap, d.keys); err != nil {
  98. logrus.Warnf("Failed to program network encryption between %s and %s: %v", lIP, rIP, err)
  99. }
  100. }
  101. } else {
  102. if len(nodes) == 0 {
  103. if err := removeEncryption(lIP, rIP, d.secMap); err != nil {
  104. logrus.Warnf("Failed to remove network encryption between %s and %s: %v", lIP, rIP, err)
  105. }
  106. }
  107. }
  108. return nil
  109. }
  110. func setupEncryption(localIP, advIP, remoteIP net.IP, vni uint32, em *encrMap, keys []*key) error {
  111. logrus.Debugf("Programming encryption for vxlan %d between %s and %s", vni, localIP, remoteIP)
  112. rIPs := remoteIP.String()
  113. indices := make([]*spi, 0, len(keys))
  114. err := programMangle(vni, true)
  115. if err != nil {
  116. logrus.Warn(err)
  117. }
  118. err = programInput(vni, true)
  119. if err != nil {
  120. logrus.Warn(err)
  121. }
  122. for i, k := range keys {
  123. spis := &spi{buildSPI(advIP, remoteIP, k.tag), buildSPI(remoteIP, advIP, k.tag)}
  124. dir := reverse
  125. if i == 0 {
  126. dir = bidir
  127. }
  128. fSA, rSA, err := programSA(localIP, remoteIP, spis, k, dir, true)
  129. if err != nil {
  130. logrus.Warn(err)
  131. }
  132. indices = append(indices, spis)
  133. if i != 0 {
  134. continue
  135. }
  136. err = programSP(fSA, rSA, true)
  137. if err != nil {
  138. logrus.Warn(err)
  139. }
  140. }
  141. em.Lock()
  142. em.nodes[rIPs] = indices
  143. em.Unlock()
  144. return nil
  145. }
  146. func removeEncryption(localIP, remoteIP net.IP, em *encrMap) error {
  147. em.Lock()
  148. indices, ok := em.nodes[remoteIP.String()]
  149. em.Unlock()
  150. if !ok {
  151. return nil
  152. }
  153. for i, idxs := range indices {
  154. dir := reverse
  155. if i == 0 {
  156. dir = bidir
  157. }
  158. fSA, rSA, err := programSA(localIP, remoteIP, idxs, nil, dir, false)
  159. if err != nil {
  160. logrus.Warn(err)
  161. }
  162. if i != 0 {
  163. continue
  164. }
  165. err = programSP(fSA, rSA, false)
  166. if err != nil {
  167. logrus.Warn(err)
  168. }
  169. }
  170. return nil
  171. }
  172. func programMangle(vni uint32, add bool) (err error) {
  173. var (
  174. p = strconv.FormatUint(uint64(overlayutils.VXLANUDPPort()), 10)
  175. c = fmt.Sprintf("0>>22&0x3C@12&0xFFFFFF00=%d", int(vni)<<8)
  176. m = strconv.FormatUint(uint64(r), 10)
  177. chain = "OUTPUT"
  178. rule = []string{"-p", "udp", "--dport", p, "-m", "u32", "--u32", c, "-j", "MARK", "--set-mark", m}
  179. a = "-A"
  180. action = "install"
  181. )
  182. // TODO IPv6 support
  183. iptable := iptables.GetIptable(iptables.IPv4)
  184. if add == iptable.Exists(iptables.Mangle, chain, rule...) {
  185. return
  186. }
  187. if !add {
  188. a = "-D"
  189. action = "remove"
  190. }
  191. if err = iptable.RawCombinedOutput(append([]string{"-t", string(iptables.Mangle), a, chain}, rule...)...); err != nil {
  192. logrus.Warnf("could not %s mangle rule: %v", action, err)
  193. }
  194. return
  195. }
  196. func programInput(vni uint32, add bool) (err error) {
  197. var (
  198. port = strconv.FormatUint(uint64(overlayutils.VXLANUDPPort()), 10)
  199. vniMatch = fmt.Sprintf("0>>22&0x3C@12&0xFFFFFF00=%d", int(vni)<<8)
  200. plainVxlan = []string{"-p", "udp", "--dport", port, "-m", "u32", "--u32", vniMatch, "-j"}
  201. ipsecVxlan = append([]string{"-m", "policy", "--dir", "in", "--pol", "ipsec"}, plainVxlan...)
  202. block = append(plainVxlan, "DROP")
  203. accept = append(ipsecVxlan, "ACCEPT")
  204. chain = "INPUT"
  205. action = iptables.Append
  206. msg = "add"
  207. )
  208. // TODO IPv6 support
  209. iptable := iptables.GetIptable(iptables.IPv4)
  210. if !add {
  211. action = iptables.Delete
  212. msg = "remove"
  213. }
  214. if err := iptable.ProgramRule(iptables.Filter, chain, action, accept); err != nil {
  215. logrus.Errorf("could not %s input rule: %v. Please do it manually.", msg, err)
  216. }
  217. if err := iptable.ProgramRule(iptables.Filter, chain, action, block); err != nil {
  218. logrus.Errorf("could not %s input rule: %v. Please do it manually.", msg, err)
  219. }
  220. return
  221. }
  222. func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (fSA *netlink.XfrmState, rSA *netlink.XfrmState, err error) {
  223. var (
  224. action = "Removing"
  225. xfrmProgram = ns.NlHandle().XfrmStateDel
  226. )
  227. if add {
  228. action = "Adding"
  229. xfrmProgram = ns.NlHandle().XfrmStateAdd
  230. }
  231. if dir&reverse > 0 {
  232. rSA = &netlink.XfrmState{
  233. Src: remoteIP,
  234. Dst: localIP,
  235. Proto: netlink.XFRM_PROTO_ESP,
  236. Spi: spi.reverse,
  237. Mode: netlink.XFRM_MODE_TRANSPORT,
  238. Reqid: r,
  239. }
  240. if add {
  241. rSA.Aead = buildAeadAlgo(k, spi.reverse)
  242. }
  243. exists, err := saExists(rSA)
  244. if err != nil {
  245. exists = !add
  246. }
  247. if add != exists {
  248. logrus.Debugf("%s: rSA{%s}", action, rSA)
  249. if err := xfrmProgram(rSA); err != nil {
  250. logrus.Warnf("Failed %s rSA{%s}: %v", action, rSA, err)
  251. }
  252. }
  253. }
  254. if dir&forward > 0 {
  255. fSA = &netlink.XfrmState{
  256. Src: localIP,
  257. Dst: remoteIP,
  258. Proto: netlink.XFRM_PROTO_ESP,
  259. Spi: spi.forward,
  260. Mode: netlink.XFRM_MODE_TRANSPORT,
  261. Reqid: r,
  262. }
  263. if add {
  264. fSA.Aead = buildAeadAlgo(k, spi.forward)
  265. }
  266. exists, err := saExists(fSA)
  267. if err != nil {
  268. exists = !add
  269. }
  270. if add != exists {
  271. logrus.Debugf("%s fSA{%s}", action, fSA)
  272. if err := xfrmProgram(fSA); err != nil {
  273. logrus.Warnf("Failed %s fSA{%s}: %v.", action, fSA, err)
  274. }
  275. }
  276. }
  277. return
  278. }
  279. func programSP(fSA *netlink.XfrmState, rSA *netlink.XfrmState, add bool) error {
  280. action := "Removing"
  281. xfrmProgram := ns.NlHandle().XfrmPolicyDel
  282. if add {
  283. action = "Adding"
  284. xfrmProgram = ns.NlHandle().XfrmPolicyAdd
  285. }
  286. // Create a congruent cidr
  287. s := types.GetMinimalIP(fSA.Src)
  288. d := types.GetMinimalIP(fSA.Dst)
  289. fullMask := net.CIDRMask(8*len(s), 8*len(s))
  290. fPol := &netlink.XfrmPolicy{
  291. Src: &net.IPNet{IP: s, Mask: fullMask},
  292. Dst: &net.IPNet{IP: d, Mask: fullMask},
  293. Dir: netlink.XFRM_DIR_OUT,
  294. Proto: 17,
  295. DstPort: 4789,
  296. Mark: &spMark,
  297. Tmpls: []netlink.XfrmPolicyTmpl{
  298. {
  299. Src: fSA.Src,
  300. Dst: fSA.Dst,
  301. Proto: netlink.XFRM_PROTO_ESP,
  302. Mode: netlink.XFRM_MODE_TRANSPORT,
  303. Spi: fSA.Spi,
  304. Reqid: r,
  305. },
  306. },
  307. }
  308. exists, err := spExists(fPol)
  309. if err != nil {
  310. exists = !add
  311. }
  312. if add != exists {
  313. logrus.Debugf("%s fSP{%s}", action, fPol)
  314. if err := xfrmProgram(fPol); err != nil {
  315. logrus.Warnf("%s fSP{%s}: %v", action, fPol, err)
  316. }
  317. }
  318. return nil
  319. }
  320. func saExists(sa *netlink.XfrmState) (bool, error) {
  321. _, err := ns.NlHandle().XfrmStateGet(sa)
  322. switch err {
  323. case nil:
  324. return true, nil
  325. case syscall.ESRCH:
  326. return false, nil
  327. default:
  328. err = fmt.Errorf("Error while checking for SA existence: %v", err)
  329. logrus.Warn(err)
  330. return false, err
  331. }
  332. }
  333. func spExists(sp *netlink.XfrmPolicy) (bool, error) {
  334. _, err := ns.NlHandle().XfrmPolicyGet(sp)
  335. switch err {
  336. case nil:
  337. return true, nil
  338. case syscall.ENOENT:
  339. return false, nil
  340. default:
  341. err = fmt.Errorf("Error while checking for SP existence: %v", err)
  342. logrus.Warn(err)
  343. return false, err
  344. }
  345. }
  346. func buildSPI(src, dst net.IP, st uint32) int {
  347. b := make([]byte, 4)
  348. binary.BigEndian.PutUint32(b, st)
  349. h := fnv.New32a()
  350. h.Write(src)
  351. h.Write(b)
  352. h.Write(dst)
  353. return int(binary.BigEndian.Uint32(h.Sum(nil)))
  354. }
  355. func buildAeadAlgo(k *key, s int) *netlink.XfrmStateAlgo {
  356. salt := make([]byte, 4)
  357. binary.BigEndian.PutUint32(salt, uint32(s))
  358. return &netlink.XfrmStateAlgo{
  359. Name: "rfc4106(gcm(aes))",
  360. Key: append(k.value, salt...),
  361. ICVLen: 64,
  362. }
  363. }
  364. func (d *driver) secMapWalk(f func(string, []*spi) ([]*spi, bool)) error {
  365. d.secMap.Lock()
  366. for node, indices := range d.secMap.nodes {
  367. idxs, stop := f(node, indices)
  368. if idxs != nil {
  369. d.secMap.nodes[node] = idxs
  370. }
  371. if stop {
  372. break
  373. }
  374. }
  375. d.secMap.Unlock()
  376. return nil
  377. }
  378. func (d *driver) setKeys(keys []*key) error {
  379. // Remove any stale policy, state
  380. clearEncryptionStates()
  381. // Accept the encryption keys and clear any stale encryption map
  382. d.Lock()
  383. d.keys = keys
  384. d.secMap = &encrMap{nodes: map[string][]*spi{}}
  385. d.Unlock()
  386. logrus.Debugf("Initial encryption keys: %v", keys)
  387. return nil
  388. }
  389. // updateKeys allows to add a new key and/or change the primary key and/or prune an existing key
  390. // The primary key is the key used in transmission and will go in first position in the list.
  391. func (d *driver) updateKeys(newKey, primary, pruneKey *key) error {
  392. logrus.Debugf("Updating Keys. New: %v, Primary: %v, Pruned: %v", newKey, primary, pruneKey)
  393. logrus.Debugf("Current: %v", d.keys)
  394. var (
  395. newIdx = -1
  396. priIdx = -1
  397. delIdx = -1
  398. lIP = net.ParseIP(d.bindAddress)
  399. aIP = net.ParseIP(d.advertiseAddress)
  400. )
  401. d.Lock()
  402. defer d.Unlock()
  403. // add new
  404. if newKey != nil {
  405. d.keys = append(d.keys, newKey)
  406. newIdx += len(d.keys)
  407. }
  408. for i, k := range d.keys {
  409. if primary != nil && k.tag == primary.tag {
  410. priIdx = i
  411. }
  412. if pruneKey != nil && k.tag == pruneKey.tag {
  413. delIdx = i
  414. }
  415. }
  416. if (newKey != nil && newIdx == -1) ||
  417. (primary != nil && priIdx == -1) ||
  418. (pruneKey != nil && delIdx == -1) {
  419. return types.BadRequestErrorf("cannot find proper key indices while processing key update:"+
  420. "(newIdx,priIdx,delIdx):(%d, %d, %d)", newIdx, priIdx, delIdx)
  421. }
  422. if priIdx != -1 && priIdx == delIdx {
  423. return types.BadRequestErrorf("attempting to both make a key (index %d) primary and delete it", priIdx)
  424. }
  425. d.secMapWalk(func(rIPs string, spis []*spi) ([]*spi, bool) {
  426. rIP := net.ParseIP(rIPs)
  427. return updateNodeKey(lIP, aIP, rIP, spis, d.keys, newIdx, priIdx, delIdx), false
  428. })
  429. // swap primary
  430. if priIdx != -1 {
  431. d.keys[0], d.keys[priIdx] = d.keys[priIdx], d.keys[0]
  432. }
  433. // prune
  434. if delIdx != -1 {
  435. if delIdx == 0 {
  436. delIdx = priIdx
  437. }
  438. d.keys = append(d.keys[:delIdx], d.keys[delIdx+1:]...)
  439. }
  440. logrus.Debugf("Updated: %v", d.keys)
  441. return nil
  442. }
  443. /********************************************************
  444. * Steady state: rSA0, rSA1, rSA2, fSA1, fSP1
  445. * Rotation --> -rSA0, +rSA3, +fSA2, +fSP2/-fSP1, -fSA1
  446. * Steady state: rSA1, rSA2, rSA3, fSA2, fSP2
  447. *********************************************************/
  448. // Spis and keys are sorted in such away the one in position 0 is the primary
  449. func updateNodeKey(lIP, aIP, rIP net.IP, idxs []*spi, curKeys []*key, newIdx, priIdx, delIdx int) []*spi {
  450. logrus.Debugf("Updating keys for node: %s (%d,%d,%d)", rIP, newIdx, priIdx, delIdx)
  451. spis := idxs
  452. logrus.Debugf("Current: %v", spis)
  453. // add new
  454. if newIdx != -1 {
  455. spis = append(spis, &spi{
  456. forward: buildSPI(aIP, rIP, curKeys[newIdx].tag),
  457. reverse: buildSPI(rIP, aIP, curKeys[newIdx].tag),
  458. })
  459. }
  460. if delIdx != -1 {
  461. // -rSA0
  462. programSA(lIP, rIP, spis[delIdx], nil, reverse, false)
  463. }
  464. if newIdx > -1 {
  465. // +rSA2
  466. programSA(lIP, rIP, spis[newIdx], curKeys[newIdx], reverse, true)
  467. }
  468. if priIdx > 0 {
  469. // +fSA2
  470. fSA2, _, _ := programSA(lIP, rIP, spis[priIdx], curKeys[priIdx], forward, true)
  471. // +fSP2, -fSP1
  472. s := types.GetMinimalIP(fSA2.Src)
  473. d := types.GetMinimalIP(fSA2.Dst)
  474. fullMask := net.CIDRMask(8*len(s), 8*len(s))
  475. fSP1 := &netlink.XfrmPolicy{
  476. Src: &net.IPNet{IP: s, Mask: fullMask},
  477. Dst: &net.IPNet{IP: d, Mask: fullMask},
  478. Dir: netlink.XFRM_DIR_OUT,
  479. Proto: 17,
  480. DstPort: 4789,
  481. Mark: &spMark,
  482. Tmpls: []netlink.XfrmPolicyTmpl{
  483. {
  484. Src: fSA2.Src,
  485. Dst: fSA2.Dst,
  486. Proto: netlink.XFRM_PROTO_ESP,
  487. Mode: netlink.XFRM_MODE_TRANSPORT,
  488. Spi: fSA2.Spi,
  489. Reqid: r,
  490. },
  491. },
  492. }
  493. logrus.Debugf("Updating fSP{%s}", fSP1)
  494. if err := ns.NlHandle().XfrmPolicyUpdate(fSP1); err != nil {
  495. logrus.Warnf("Failed to update fSP{%s}: %v", fSP1, err)
  496. }
  497. // -fSA1
  498. programSA(lIP, rIP, spis[0], nil, forward, false)
  499. }
  500. // swap
  501. if priIdx > 0 {
  502. swp := spis[0]
  503. spis[0] = spis[priIdx]
  504. spis[priIdx] = swp
  505. }
  506. // prune
  507. if delIdx != -1 {
  508. if delIdx == 0 {
  509. delIdx = priIdx
  510. }
  511. spis = append(spis[:delIdx], spis[delIdx+1:]...)
  512. }
  513. logrus.Debugf("Updated: %v", spis)
  514. return spis
  515. }
  516. func (n *network) maxMTU() int {
  517. mtu := 1500
  518. if n.mtu != 0 {
  519. mtu = n.mtu
  520. }
  521. mtu -= vxlanEncap
  522. if n.secure {
  523. // In case of encryption account for the
  524. // esp packet expansion and padding
  525. mtu -= pktExpansion
  526. mtu -= (mtu % 4)
  527. }
  528. return mtu
  529. }
  530. func clearEncryptionStates() {
  531. nlh := ns.NlHandle()
  532. spList, err := nlh.XfrmPolicyList(netlink.FAMILY_ALL)
  533. if err != nil {
  534. logrus.Warnf("Failed to retrieve SP list for cleanup: %v", err)
  535. }
  536. saList, err := nlh.XfrmStateList(netlink.FAMILY_ALL)
  537. if err != nil {
  538. logrus.Warnf("Failed to retrieve SA list for cleanup: %v", err)
  539. }
  540. for _, sp := range spList {
  541. if sp.Mark != nil && sp.Mark.Value == spMark.Value {
  542. if err := nlh.XfrmPolicyDel(&sp); err != nil {
  543. logrus.Warnf("Failed to delete stale SP %s: %v", sp, err)
  544. continue
  545. }
  546. logrus.Debugf("Removed stale SP: %s", sp)
  547. }
  548. }
  549. for _, sa := range saList {
  550. if sa.Reqid == r {
  551. if err := nlh.XfrmStateDel(&sa); err != nil {
  552. logrus.Warnf("Failed to delete stale SA %s: %v", sa, err)
  553. continue
  554. }
  555. logrus.Debugf("Removed stale SA: %s", sa)
  556. }
  557. }
  558. }