encryption.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639
  1. package overlay
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "encoding/hex"
  6. "fmt"
  7. "hash/fnv"
  8. "net"
  9. "sync"
  10. "syscall"
  11. "strconv"
  12. "github.com/docker/libnetwork/iptables"
  13. "github.com/docker/libnetwork/ns"
  14. "github.com/docker/libnetwork/types"
  15. "github.com/sirupsen/logrus"
  16. "github.com/vishvananda/netlink"
  17. )
  18. const (
  19. r = 0xD0C4E3
  20. pktExpansion = 26 // SPI(4) + SeqN(4) + IV(8) + PadLength(1) + NextHeader(1) + ICV(8)
  21. )
  22. const (
  23. forward = iota + 1
  24. reverse
  25. bidir
  26. )
  27. var spMark = netlink.XfrmMark{Value: uint32(r), Mask: 0xffffffff}
  28. type key struct {
  29. value []byte
  30. tag uint32
  31. }
  32. func (k *key) String() string {
  33. if k != nil {
  34. return fmt.Sprintf("(key: %s, tag: 0x%x)", hex.EncodeToString(k.value)[0:5], k.tag)
  35. }
  36. return ""
  37. }
  38. type spi struct {
  39. forward int
  40. reverse int
  41. }
  42. func (s *spi) String() string {
  43. return fmt.Sprintf("SPI(FWD: 0x%x, REV: 0x%x)", uint32(s.forward), uint32(s.reverse))
  44. }
  45. type encrMap struct {
  46. nodes map[string][]*spi
  47. sync.Mutex
  48. }
  49. func (e *encrMap) String() string {
  50. e.Lock()
  51. defer e.Unlock()
  52. b := new(bytes.Buffer)
  53. for k, v := range e.nodes {
  54. b.WriteString("\n")
  55. b.WriteString(k)
  56. b.WriteString(":")
  57. b.WriteString("[")
  58. for _, s := range v {
  59. b.WriteString(s.String())
  60. b.WriteString(",")
  61. }
  62. b.WriteString("]")
  63. }
  64. return b.String()
  65. }
  66. func (d *driver) checkEncryption(nid string, rIP net.IP, vxlanID uint32, isLocal, add bool) error {
  67. logrus.Debugf("checkEncryption(%.7s, %v, %d, %t)", nid, rIP, vxlanID, isLocal)
  68. n := d.network(nid)
  69. if n == nil || !n.secure {
  70. return nil
  71. }
  72. if len(d.keys) == 0 {
  73. return types.ForbiddenErrorf("encryption key is not present")
  74. }
  75. lIP := net.ParseIP(d.bindAddress)
  76. aIP := net.ParseIP(d.advertiseAddress)
  77. nodes := map[string]net.IP{}
  78. switch {
  79. case isLocal:
  80. if err := d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool {
  81. if !aIP.Equal(pEntry.vtep) {
  82. nodes[pEntry.vtep.String()] = pEntry.vtep
  83. }
  84. return false
  85. }); err != nil {
  86. logrus.Warnf("Failed to retrieve list of participating nodes in overlay network %.5s: %v", nid, err)
  87. }
  88. default:
  89. if len(d.network(nid).endpoints) > 0 {
  90. nodes[rIP.String()] = rIP
  91. }
  92. }
  93. logrus.Debugf("List of nodes: %s", nodes)
  94. if add {
  95. for _, rIP := range nodes {
  96. if err := setupEncryption(lIP, aIP, rIP, vxlanID, d.secMap, d.keys); err != nil {
  97. logrus.Warnf("Failed to program network encryption between %s and %s: %v", lIP, rIP, err)
  98. }
  99. }
  100. } else {
  101. if len(nodes) == 0 {
  102. if err := removeEncryption(lIP, rIP, d.secMap); err != nil {
  103. logrus.Warnf("Failed to remove network encryption between %s and %s: %v", lIP, rIP, err)
  104. }
  105. }
  106. }
  107. return nil
  108. }
  109. func setupEncryption(localIP, advIP, remoteIP net.IP, vni uint32, em *encrMap, keys []*key) error {
  110. logrus.Debugf("Programming encryption for vxlan %d between %s and %s", vni, localIP, remoteIP)
  111. rIPs := remoteIP.String()
  112. indices := make([]*spi, 0, len(keys))
  113. err := programMangle(vni, true)
  114. if err != nil {
  115. logrus.Warn(err)
  116. }
  117. err = programInput(vni, true)
  118. if err != nil {
  119. logrus.Warn(err)
  120. }
  121. for i, k := range keys {
  122. spis := &spi{buildSPI(advIP, remoteIP, k.tag), buildSPI(remoteIP, advIP, k.tag)}
  123. dir := reverse
  124. if i == 0 {
  125. dir = bidir
  126. }
  127. fSA, rSA, err := programSA(localIP, remoteIP, spis, k, dir, true)
  128. if err != nil {
  129. logrus.Warn(err)
  130. }
  131. indices = append(indices, spis)
  132. if i != 0 {
  133. continue
  134. }
  135. err = programSP(fSA, rSA, true)
  136. if err != nil {
  137. logrus.Warn(err)
  138. }
  139. }
  140. em.Lock()
  141. em.nodes[rIPs] = indices
  142. em.Unlock()
  143. return nil
  144. }
  145. func removeEncryption(localIP, remoteIP net.IP, em *encrMap) error {
  146. em.Lock()
  147. indices, ok := em.nodes[remoteIP.String()]
  148. em.Unlock()
  149. if !ok {
  150. return nil
  151. }
  152. for i, idxs := range indices {
  153. dir := reverse
  154. if i == 0 {
  155. dir = bidir
  156. }
  157. fSA, rSA, err := programSA(localIP, remoteIP, idxs, nil, dir, false)
  158. if err != nil {
  159. logrus.Warn(err)
  160. }
  161. if i != 0 {
  162. continue
  163. }
  164. err = programSP(fSA, rSA, false)
  165. if err != nil {
  166. logrus.Warn(err)
  167. }
  168. }
  169. return nil
  170. }
  171. func programMangle(vni uint32, add bool) (err error) {
  172. var (
  173. p = strconv.FormatUint(uint64(vxlanPort), 10)
  174. c = fmt.Sprintf("0>>22&0x3C@12&0xFFFFFF00=%d", int(vni)<<8)
  175. m = strconv.FormatUint(uint64(r), 10)
  176. chain = "OUTPUT"
  177. rule = []string{"-p", "udp", "--dport", p, "-m", "u32", "--u32", c, "-j", "MARK", "--set-mark", m}
  178. a = "-A"
  179. action = "install"
  180. )
  181. if add == iptables.Exists(iptables.Mangle, chain, rule...) {
  182. return
  183. }
  184. if !add {
  185. a = "-D"
  186. action = "remove"
  187. }
  188. if err = iptables.RawCombinedOutput(append([]string{"-t", string(iptables.Mangle), a, chain}, rule...)...); err != nil {
  189. logrus.Warnf("could not %s mangle rule: %v", action, err)
  190. }
  191. return
  192. }
  193. func programInput(vni uint32, add bool) (err error) {
  194. var (
  195. port = strconv.FormatUint(uint64(vxlanPort), 10)
  196. vniMatch = fmt.Sprintf("0>>22&0x3C@12&0xFFFFFF00=%d", int(vni)<<8)
  197. plainVxlan = []string{"-p", "udp", "--dport", port, "-m", "u32", "--u32", vniMatch, "-j"}
  198. ipsecVxlan = append([]string{"-m", "policy", "--dir", "in", "--pol", "ipsec"}, plainVxlan...)
  199. block = append(plainVxlan, "DROP")
  200. accept = append(ipsecVxlan, "ACCEPT")
  201. chain = "INPUT"
  202. action = iptables.Append
  203. msg = "add"
  204. )
  205. if !add {
  206. action = iptables.Delete
  207. msg = "remove"
  208. }
  209. if err := iptables.ProgramRule(iptables.Filter, chain, action, accept); err != nil {
  210. logrus.Errorf("could not %s input rule: %v. Please do it manually.", msg, err)
  211. }
  212. if err := iptables.ProgramRule(iptables.Filter, chain, action, block); err != nil {
  213. logrus.Errorf("could not %s input rule: %v. Please do it manually.", msg, err)
  214. }
  215. return
  216. }
  217. func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (fSA *netlink.XfrmState, rSA *netlink.XfrmState, err error) {
  218. var (
  219. action = "Removing"
  220. xfrmProgram = ns.NlHandle().XfrmStateDel
  221. )
  222. if add {
  223. action = "Adding"
  224. xfrmProgram = ns.NlHandle().XfrmStateAdd
  225. }
  226. if dir&reverse > 0 {
  227. rSA = &netlink.XfrmState{
  228. Src: remoteIP,
  229. Dst: localIP,
  230. Proto: netlink.XFRM_PROTO_ESP,
  231. Spi: spi.reverse,
  232. Mode: netlink.XFRM_MODE_TRANSPORT,
  233. Reqid: r,
  234. }
  235. if add {
  236. rSA.Aead = buildAeadAlgo(k, spi.reverse)
  237. }
  238. exists, err := saExists(rSA)
  239. if err != nil {
  240. exists = !add
  241. }
  242. if add != exists {
  243. logrus.Debugf("%s: rSA{%s}", action, rSA)
  244. if err := xfrmProgram(rSA); err != nil {
  245. logrus.Warnf("Failed %s rSA{%s}: %v", action, rSA, err)
  246. }
  247. }
  248. }
  249. if dir&forward > 0 {
  250. fSA = &netlink.XfrmState{
  251. Src: localIP,
  252. Dst: remoteIP,
  253. Proto: netlink.XFRM_PROTO_ESP,
  254. Spi: spi.forward,
  255. Mode: netlink.XFRM_MODE_TRANSPORT,
  256. Reqid: r,
  257. }
  258. if add {
  259. fSA.Aead = buildAeadAlgo(k, spi.forward)
  260. }
  261. exists, err := saExists(fSA)
  262. if err != nil {
  263. exists = !add
  264. }
  265. if add != exists {
  266. logrus.Debugf("%s fSA{%s}", action, fSA)
  267. if err := xfrmProgram(fSA); err != nil {
  268. logrus.Warnf("Failed %s fSA{%s}: %v.", action, fSA, err)
  269. }
  270. }
  271. }
  272. return
  273. }
  274. func programSP(fSA *netlink.XfrmState, rSA *netlink.XfrmState, add bool) error {
  275. action := "Removing"
  276. xfrmProgram := ns.NlHandle().XfrmPolicyDel
  277. if add {
  278. action = "Adding"
  279. xfrmProgram = ns.NlHandle().XfrmPolicyAdd
  280. }
  281. // Create a congruent cidr
  282. s := types.GetMinimalIP(fSA.Src)
  283. d := types.GetMinimalIP(fSA.Dst)
  284. fullMask := net.CIDRMask(8*len(s), 8*len(s))
  285. fPol := &netlink.XfrmPolicy{
  286. Src: &net.IPNet{IP: s, Mask: fullMask},
  287. Dst: &net.IPNet{IP: d, Mask: fullMask},
  288. Dir: netlink.XFRM_DIR_OUT,
  289. Proto: 17,
  290. DstPort: 4789,
  291. Mark: &spMark,
  292. Tmpls: []netlink.XfrmPolicyTmpl{
  293. {
  294. Src: fSA.Src,
  295. Dst: fSA.Dst,
  296. Proto: netlink.XFRM_PROTO_ESP,
  297. Mode: netlink.XFRM_MODE_TRANSPORT,
  298. Spi: fSA.Spi,
  299. Reqid: r,
  300. },
  301. },
  302. }
  303. exists, err := spExists(fPol)
  304. if err != nil {
  305. exists = !add
  306. }
  307. if add != exists {
  308. logrus.Debugf("%s fSP{%s}", action, fPol)
  309. if err := xfrmProgram(fPol); err != nil {
  310. logrus.Warnf("%s fSP{%s}: %v", action, fPol, err)
  311. }
  312. }
  313. return nil
  314. }
  315. func saExists(sa *netlink.XfrmState) (bool, error) {
  316. _, err := ns.NlHandle().XfrmStateGet(sa)
  317. switch err {
  318. case nil:
  319. return true, nil
  320. case syscall.ESRCH:
  321. return false, nil
  322. default:
  323. err = fmt.Errorf("Error while checking for SA existence: %v", err)
  324. logrus.Warn(err)
  325. return false, err
  326. }
  327. }
  328. func spExists(sp *netlink.XfrmPolicy) (bool, error) {
  329. _, err := ns.NlHandle().XfrmPolicyGet(sp)
  330. switch err {
  331. case nil:
  332. return true, nil
  333. case syscall.ENOENT:
  334. return false, nil
  335. default:
  336. err = fmt.Errorf("Error while checking for SP existence: %v", err)
  337. logrus.Warn(err)
  338. return false, err
  339. }
  340. }
  341. func buildSPI(src, dst net.IP, st uint32) int {
  342. b := make([]byte, 4)
  343. binary.BigEndian.PutUint32(b, st)
  344. h := fnv.New32a()
  345. h.Write(src)
  346. h.Write(b)
  347. h.Write(dst)
  348. return int(binary.BigEndian.Uint32(h.Sum(nil)))
  349. }
  350. func buildAeadAlgo(k *key, s int) *netlink.XfrmStateAlgo {
  351. salt := make([]byte, 4)
  352. binary.BigEndian.PutUint32(salt, uint32(s))
  353. return &netlink.XfrmStateAlgo{
  354. Name: "rfc4106(gcm(aes))",
  355. Key: append(k.value, salt...),
  356. ICVLen: 64,
  357. }
  358. }
  359. func (d *driver) secMapWalk(f func(string, []*spi) ([]*spi, bool)) error {
  360. d.secMap.Lock()
  361. for node, indices := range d.secMap.nodes {
  362. idxs, stop := f(node, indices)
  363. if idxs != nil {
  364. d.secMap.nodes[node] = idxs
  365. }
  366. if stop {
  367. break
  368. }
  369. }
  370. d.secMap.Unlock()
  371. return nil
  372. }
  373. func (d *driver) setKeys(keys []*key) error {
  374. // Remove any stale policy, state
  375. clearEncryptionStates()
  376. // Accept the encryption keys and clear any stale encryption map
  377. d.Lock()
  378. d.keys = keys
  379. d.secMap = &encrMap{nodes: map[string][]*spi{}}
  380. d.Unlock()
  381. logrus.Debugf("Initial encryption keys: %v", keys)
  382. return nil
  383. }
  384. // updateKeys allows to add a new key and/or change the primary key and/or prune an existing key
  385. // The primary key is the key used in transmission and will go in first position in the list.
  386. func (d *driver) updateKeys(newKey, primary, pruneKey *key) error {
  387. logrus.Debugf("Updating Keys. New: %v, Primary: %v, Pruned: %v", newKey, primary, pruneKey)
  388. logrus.Debugf("Current: %v", d.keys)
  389. var (
  390. newIdx = -1
  391. priIdx = -1
  392. delIdx = -1
  393. lIP = net.ParseIP(d.bindAddress)
  394. aIP = net.ParseIP(d.advertiseAddress)
  395. )
  396. d.Lock()
  397. defer d.Unlock()
  398. // add new
  399. if newKey != nil {
  400. d.keys = append(d.keys, newKey)
  401. newIdx += len(d.keys)
  402. }
  403. for i, k := range d.keys {
  404. if primary != nil && k.tag == primary.tag {
  405. priIdx = i
  406. }
  407. if pruneKey != nil && k.tag == pruneKey.tag {
  408. delIdx = i
  409. }
  410. }
  411. if (newKey != nil && newIdx == -1) ||
  412. (primary != nil && priIdx == -1) ||
  413. (pruneKey != nil && delIdx == -1) {
  414. return types.BadRequestErrorf("cannot find proper key indices while processing key update:"+
  415. "(newIdx,priIdx,delIdx):(%d, %d, %d)", newIdx, priIdx, delIdx)
  416. }
  417. if priIdx != -1 && priIdx == delIdx {
  418. return types.BadRequestErrorf("attempting to both make a key (index %d) primary and delete it", priIdx)
  419. }
  420. d.secMapWalk(func(rIPs string, spis []*spi) ([]*spi, bool) {
  421. rIP := net.ParseIP(rIPs)
  422. return updateNodeKey(lIP, aIP, rIP, spis, d.keys, newIdx, priIdx, delIdx), false
  423. })
  424. // swap primary
  425. if priIdx != -1 {
  426. d.keys[0], d.keys[priIdx] = d.keys[priIdx], d.keys[0]
  427. }
  428. // prune
  429. if delIdx != -1 {
  430. if delIdx == 0 {
  431. delIdx = priIdx
  432. }
  433. d.keys = append(d.keys[:delIdx], d.keys[delIdx+1:]...)
  434. }
  435. logrus.Debugf("Updated: %v", d.keys)
  436. return nil
  437. }
  438. /********************************************************
  439. * Steady state: rSA0, rSA1, rSA2, fSA1, fSP1
  440. * Rotation --> -rSA0, +rSA3, +fSA2, +fSP2/-fSP1, -fSA1
  441. * Steady state: rSA1, rSA2, rSA3, fSA2, fSP2
  442. *********************************************************/
  443. // Spis and keys are sorted in such away the one in position 0 is the primary
  444. func updateNodeKey(lIP, aIP, rIP net.IP, idxs []*spi, curKeys []*key, newIdx, priIdx, delIdx int) []*spi {
  445. logrus.Debugf("Updating keys for node: %s (%d,%d,%d)", rIP, newIdx, priIdx, delIdx)
  446. spis := idxs
  447. logrus.Debugf("Current: %v", spis)
  448. // add new
  449. if newIdx != -1 {
  450. spis = append(spis, &spi{
  451. forward: buildSPI(aIP, rIP, curKeys[newIdx].tag),
  452. reverse: buildSPI(rIP, aIP, curKeys[newIdx].tag),
  453. })
  454. }
  455. if delIdx != -1 {
  456. // -rSA0
  457. programSA(lIP, rIP, spis[delIdx], nil, reverse, false)
  458. }
  459. if newIdx > -1 {
  460. // +rSA2
  461. programSA(lIP, rIP, spis[newIdx], curKeys[newIdx], reverse, true)
  462. }
  463. if priIdx > 0 {
  464. // +fSA2
  465. fSA2, _, _ := programSA(lIP, rIP, spis[priIdx], curKeys[priIdx], forward, true)
  466. // +fSP2, -fSP1
  467. s := types.GetMinimalIP(fSA2.Src)
  468. d := types.GetMinimalIP(fSA2.Dst)
  469. fullMask := net.CIDRMask(8*len(s), 8*len(s))
  470. fSP1 := &netlink.XfrmPolicy{
  471. Src: &net.IPNet{IP: s, Mask: fullMask},
  472. Dst: &net.IPNet{IP: d, Mask: fullMask},
  473. Dir: netlink.XFRM_DIR_OUT,
  474. Proto: 17,
  475. DstPort: 4789,
  476. Mark: &spMark,
  477. Tmpls: []netlink.XfrmPolicyTmpl{
  478. {
  479. Src: fSA2.Src,
  480. Dst: fSA2.Dst,
  481. Proto: netlink.XFRM_PROTO_ESP,
  482. Mode: netlink.XFRM_MODE_TRANSPORT,
  483. Spi: fSA2.Spi,
  484. Reqid: r,
  485. },
  486. },
  487. }
  488. logrus.Debugf("Updating fSP{%s}", fSP1)
  489. if err := ns.NlHandle().XfrmPolicyUpdate(fSP1); err != nil {
  490. logrus.Warnf("Failed to update fSP{%s}: %v", fSP1, err)
  491. }
  492. // -fSA1
  493. programSA(lIP, rIP, spis[0], nil, forward, false)
  494. }
  495. // swap
  496. if priIdx > 0 {
  497. swp := spis[0]
  498. spis[0] = spis[priIdx]
  499. spis[priIdx] = swp
  500. }
  501. // prune
  502. if delIdx != -1 {
  503. if delIdx == 0 {
  504. delIdx = priIdx
  505. }
  506. spis = append(spis[:delIdx], spis[delIdx+1:]...)
  507. }
  508. logrus.Debugf("Updated: %v", spis)
  509. return spis
  510. }
  511. func (n *network) maxMTU() int {
  512. mtu := 1500
  513. if n.mtu != 0 {
  514. mtu = n.mtu
  515. }
  516. mtu -= vxlanEncap
  517. if n.secure {
  518. // In case of encryption account for the
  519. // esp packet espansion and padding
  520. mtu -= pktExpansion
  521. mtu -= (mtu % 4)
  522. }
  523. return mtu
  524. }
  525. func clearEncryptionStates() {
  526. nlh := ns.NlHandle()
  527. spList, err := nlh.XfrmPolicyList(netlink.FAMILY_ALL)
  528. if err != nil {
  529. logrus.Warnf("Failed to retrieve SP list for cleanup: %v", err)
  530. }
  531. saList, err := nlh.XfrmStateList(netlink.FAMILY_ALL)
  532. if err != nil {
  533. logrus.Warnf("Failed to retrieve SA list for cleanup: %v", err)
  534. }
  535. for _, sp := range spList {
  536. if sp.Mark != nil && sp.Mark.Value == spMark.Value {
  537. if err := nlh.XfrmPolicyDel(&sp); err != nil {
  538. logrus.Warnf("Failed to delete stale SP %s: %v", sp, err)
  539. continue
  540. }
  541. logrus.Debugf("Removed stale SP: %s", sp)
  542. }
  543. }
  544. for _, sa := range saList {
  545. if sa.Reqid == r {
  546. if err := nlh.XfrmStateDel(&sa); err != nil {
  547. logrus.Warnf("Failed to delete stale SA %s: %v", sa, err)
  548. continue
  549. }
  550. logrus.Debugf("Removed stale SA: %s", sa)
  551. }
  552. }
  553. }