setup_ip_tables_linux.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483
  1. package bridge
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "net"
  7. "strings"
  8. "github.com/containerd/log"
  9. "github.com/docker/docker/libnetwork/iptables"
  10. "github.com/docker/docker/libnetwork/types"
  11. "github.com/vishvananda/netlink"
  12. )
  13. // DockerChain: DOCKER iptable chain name
  14. const (
  15. DockerChain = "DOCKER"
  16. // Isolation between bridge networks is achieved in two stages by means
  17. // of the following two chains in the filter table. The first chain matches
  18. // on the source interface being a bridge network's bridge and the
  19. // destination being a different interface. A positive match leads to the
  20. // second isolation chain. No match returns to the parent chain. The second
  21. // isolation chain matches on destination interface being a bridge network's
  22. // bridge. A positive match identifies a packet originated from one bridge
  23. // network's bridge destined to another bridge network's bridge and will
  24. // result in the packet being dropped. No match returns to the parent chain.
  25. IsolationChain1 = "DOCKER-ISOLATION-STAGE-1"
  26. IsolationChain2 = "DOCKER-ISOLATION-STAGE-2"
  27. )
  28. func setupIPChains(config configuration, version iptables.IPVersion) (natChain *iptables.ChainInfo, filterChain *iptables.ChainInfo, isolationChain1 *iptables.ChainInfo, isolationChain2 *iptables.ChainInfo, retErr error) {
  29. // Sanity check.
  30. if !config.EnableIPTables {
  31. return nil, nil, nil, nil, errors.New("cannot create new chains, EnableIPTable is disabled")
  32. }
  33. hairpinMode := !config.EnableUserlandProxy
  34. iptable := iptables.GetIptable(version)
  35. natChain, err := iptable.NewChain(DockerChain, iptables.Nat, hairpinMode)
  36. if err != nil {
  37. return nil, nil, nil, nil, fmt.Errorf("failed to create NAT chain %s: %v", DockerChain, err)
  38. }
  39. defer func() {
  40. if retErr != nil {
  41. if err := iptable.RemoveExistingChain(DockerChain, iptables.Nat); err != nil {
  42. log.G(context.TODO()).Warnf("failed on removing iptables NAT chain %s on cleanup: %v", DockerChain, err)
  43. }
  44. }
  45. }()
  46. filterChain, err = iptable.NewChain(DockerChain, iptables.Filter, false)
  47. if err != nil {
  48. return nil, nil, nil, nil, fmt.Errorf("failed to create FILTER chain %s: %v", DockerChain, err)
  49. }
  50. defer func() {
  51. if err != nil {
  52. if err := iptable.RemoveExistingChain(DockerChain, iptables.Filter); err != nil {
  53. log.G(context.TODO()).Warnf("failed on removing iptables FILTER chain %s on cleanup: %v", DockerChain, err)
  54. }
  55. }
  56. }()
  57. isolationChain1, err = iptable.NewChain(IsolationChain1, iptables.Filter, false)
  58. if err != nil {
  59. return nil, nil, nil, nil, fmt.Errorf("failed to create FILTER isolation chain: %v", err)
  60. }
  61. defer func() {
  62. if retErr != nil {
  63. if err := iptable.RemoveExistingChain(IsolationChain1, iptables.Filter); err != nil {
  64. log.G(context.TODO()).Warnf("failed on removing iptables FILTER chain %s on cleanup: %v", IsolationChain1, err)
  65. }
  66. }
  67. }()
  68. isolationChain2, err = iptable.NewChain(IsolationChain2, iptables.Filter, false)
  69. if err != nil {
  70. return nil, nil, nil, nil, fmt.Errorf("failed to create FILTER isolation chain: %v", err)
  71. }
  72. defer func() {
  73. if retErr != nil {
  74. if err := iptable.RemoveExistingChain(IsolationChain2, iptables.Filter); err != nil {
  75. log.G(context.TODO()).Warnf("failed on removing iptables FILTER chain %s on cleanup: %v", IsolationChain2, err)
  76. }
  77. }
  78. }()
  79. if err := iptable.AddReturnRule(IsolationChain1); err != nil {
  80. return nil, nil, nil, nil, err
  81. }
  82. if err := iptable.AddReturnRule(IsolationChain2); err != nil {
  83. return nil, nil, nil, nil, err
  84. }
  85. return natChain, filterChain, isolationChain1, isolationChain2, nil
  86. }
  87. func (n *bridgeNetwork) setupIP4Tables(config *networkConfiguration, i *bridgeInterface) error {
  88. d := n.driver
  89. d.Lock()
  90. driverConfig := d.config
  91. d.Unlock()
  92. // Sanity check.
  93. if !driverConfig.EnableIPTables {
  94. return errors.New("Cannot program chains, EnableIPTable is disabled")
  95. }
  96. maskedAddrv4 := &net.IPNet{
  97. IP: i.bridgeIPv4.IP.Mask(i.bridgeIPv4.Mask),
  98. Mask: i.bridgeIPv4.Mask,
  99. }
  100. return n.setupIPTables(iptables.IPv4, maskedAddrv4, config, i)
  101. }
  102. func (n *bridgeNetwork) setupIP6Tables(config *networkConfiguration, i *bridgeInterface) error {
  103. d := n.driver
  104. d.Lock()
  105. driverConfig := d.config
  106. d.Unlock()
  107. // Sanity check.
  108. if !driverConfig.EnableIP6Tables {
  109. return errors.New("Cannot program chains, EnableIP6Tables is disabled")
  110. }
  111. maskedAddrv6 := &net.IPNet{
  112. IP: i.bridgeIPv6.IP.Mask(i.bridgeIPv6.Mask),
  113. Mask: i.bridgeIPv6.Mask,
  114. }
  115. return n.setupIPTables(iptables.IPv6, maskedAddrv6, config, i)
  116. }
  117. func (n *bridgeNetwork) setupIPTables(ipVersion iptables.IPVersion, maskedAddr *net.IPNet, config *networkConfiguration, i *bridgeInterface) error {
  118. var err error
  119. d := n.driver
  120. d.Lock()
  121. driverConfig := d.config
  122. d.Unlock()
  123. // Pickup this configuration option from driver
  124. hairpinMode := !driverConfig.EnableUserlandProxy
  125. iptable := iptables.GetIptable(ipVersion)
  126. if config.Internal {
  127. if err = setupInternalNetworkRules(config.BridgeName, maskedAddr, config.EnableICC, true); err != nil {
  128. return fmt.Errorf("Failed to Setup IP tables: %s", err.Error())
  129. }
  130. n.registerIptCleanFunc(func() error {
  131. return setupInternalNetworkRules(config.BridgeName, maskedAddr, config.EnableICC, false)
  132. })
  133. } else {
  134. if err = setupIPTablesInternal(ipVersion, config, maskedAddr, hairpinMode, true); err != nil {
  135. return fmt.Errorf("Failed to Setup IP tables: %s", err.Error())
  136. }
  137. n.registerIptCleanFunc(func() error {
  138. return setupIPTablesInternal(ipVersion, config, maskedAddr, hairpinMode, false)
  139. })
  140. natChain, filterChain, _, _, err := n.getDriverChains(ipVersion)
  141. if err != nil {
  142. return fmt.Errorf("Failed to setup IP tables, cannot acquire chain info %s", err.Error())
  143. }
  144. err = iptable.ProgramChain(natChain, config.BridgeName, hairpinMode, true)
  145. if err != nil {
  146. return fmt.Errorf("Failed to program NAT chain: %s", err.Error())
  147. }
  148. err = iptable.ProgramChain(filterChain, config.BridgeName, hairpinMode, true)
  149. if err != nil {
  150. return fmt.Errorf("Failed to program FILTER chain: %s", err.Error())
  151. }
  152. n.registerIptCleanFunc(func() error {
  153. return iptable.ProgramChain(filterChain, config.BridgeName, hairpinMode, false)
  154. })
  155. if ipVersion == iptables.IPv4 {
  156. n.portMapper.SetIptablesChain(natChain, n.getNetworkBridgeName())
  157. } else {
  158. n.portMapperV6.SetIptablesChain(natChain, n.getNetworkBridgeName())
  159. }
  160. }
  161. d.Lock()
  162. err = iptable.EnsureJumpRule("FORWARD", IsolationChain1)
  163. d.Unlock()
  164. return err
  165. }
  166. type iptRule struct {
  167. ipv iptables.IPVersion
  168. table iptables.Table
  169. chain string
  170. args []string
  171. }
  172. // Exists returns true if the rule exists in the kernel.
  173. func (r iptRule) Exists() bool {
  174. return iptables.GetIptable(r.ipv).Exists(r.table, r.chain, r.args...)
  175. }
  176. func (r iptRule) cmdArgs(op iptables.Action) []string {
  177. return append([]string{"-t", string(r.table), string(op), r.chain}, r.args...)
  178. }
  179. func (r iptRule) exec(op iptables.Action) error {
  180. return iptables.GetIptable(r.ipv).RawCombinedOutput(r.cmdArgs(op)...)
  181. }
  182. // Append appends the rule to the end of the chain. If the rule already exists anywhere in the
  183. // chain, this is a no-op.
  184. func (r iptRule) Append() error {
  185. if r.Exists() {
  186. return nil
  187. }
  188. return r.exec(iptables.Append)
  189. }
  190. // Insert inserts the rule at the head of the chain. If the rule already exists anywhere in the
  191. // chain, this is a no-op.
  192. func (r iptRule) Insert() error {
  193. if r.Exists() {
  194. return nil
  195. }
  196. return r.exec(iptables.Insert)
  197. }
  198. // Delete deletes the rule from the kernel. If the rule does not exist, this is a no-op.
  199. func (r iptRule) Delete() error {
  200. if !r.Exists() {
  201. return nil
  202. }
  203. return r.exec(iptables.Delete)
  204. }
  205. func (r iptRule) String() string {
  206. cmd := append([]string{"iptables"}, r.cmdArgs("-A")...)
  207. if r.ipv == iptables.IPv6 {
  208. cmd[0] = "ip6tables"
  209. }
  210. return strings.Join(cmd, " ")
  211. }
  212. func setupIPTablesInternal(ipVer iptables.IPVersion, config *networkConfiguration, addr *net.IPNet, hairpin, enable bool) error {
  213. var (
  214. address = addr.String()
  215. skipDNAT = iptRule{ipv: ipVer, table: iptables.Nat, chain: DockerChain, args: []string{"-i", config.BridgeName, "-j", "RETURN"}}
  216. outRule = iptRule{ipv: ipVer, table: iptables.Filter, chain: "FORWARD", args: []string{"-i", config.BridgeName, "!", "-o", config.BridgeName, "-j", "ACCEPT"}}
  217. natArgs []string
  218. hpNatArgs []string
  219. )
  220. hostIP := config.HostIPv4
  221. if ipVer == iptables.IPv6 {
  222. hostIP = config.HostIPv6
  223. }
  224. // If hostIP is set, the user wants IPv4/IPv6 SNAT with the given address.
  225. if hostIP != nil {
  226. hostAddr := hostIP.String()
  227. natArgs = []string{"-s", address, "!", "-o", config.BridgeName, "-j", "SNAT", "--to-source", hostAddr}
  228. hpNatArgs = []string{"-m", "addrtype", "--src-type", "LOCAL", "-o", config.BridgeName, "-j", "SNAT", "--to-source", hostAddr}
  229. // Else use MASQUERADE which picks the src-ip based on NH from the route table
  230. } else {
  231. natArgs = []string{"-s", address, "!", "-o", config.BridgeName, "-j", "MASQUERADE"}
  232. hpNatArgs = []string{"-m", "addrtype", "--src-type", "LOCAL", "-o", config.BridgeName, "-j", "MASQUERADE"}
  233. }
  234. natRule := iptRule{ipv: ipVer, table: iptables.Nat, chain: "POSTROUTING", args: natArgs}
  235. hpNatRule := iptRule{ipv: ipVer, table: iptables.Nat, chain: "POSTROUTING", args: hpNatArgs}
  236. // Set NAT.
  237. if config.EnableIPMasquerade {
  238. if err := programChainRule(natRule, "NAT", enable); err != nil {
  239. return err
  240. }
  241. }
  242. if config.EnableIPMasquerade && !hairpin {
  243. if err := programChainRule(skipDNAT, "SKIP DNAT", enable); err != nil {
  244. return err
  245. }
  246. }
  247. // In hairpin mode, masquerade traffic from localhost. If hairpin is disabled or if we're tearing down
  248. // that bridge, make sure the iptables rule isn't lying around.
  249. if err := programChainRule(hpNatRule, "MASQ LOCAL HOST", enable && hairpin); err != nil {
  250. return err
  251. }
  252. // Set Inter Container Communication.
  253. if err := setIcc(ipVer, config.BridgeName, config.EnableICC, enable); err != nil {
  254. return err
  255. }
  256. // Set Accept on all non-intercontainer outgoing packets.
  257. return programChainRule(outRule, "ACCEPT NON_ICC OUTGOING", enable)
  258. }
  259. func programChainRule(rule iptRule, ruleDescr string, insert bool) error {
  260. operation := "disable"
  261. fn := rule.Delete
  262. if insert {
  263. operation = "enable"
  264. fn = rule.Insert
  265. }
  266. if err := fn(); err != nil {
  267. return fmt.Errorf("Unable to %s %s rule: %s", operation, ruleDescr, err.Error())
  268. }
  269. return nil
  270. }
  271. func setIcc(version iptables.IPVersion, bridgeIface string, iccEnable, insert bool) error {
  272. args := []string{"-i", bridgeIface, "-o", bridgeIface, "-j"}
  273. acceptRule := iptRule{ipv: version, table: iptables.Filter, chain: "FORWARD", args: append(args, "ACCEPT")}
  274. dropRule := iptRule{ipv: version, table: iptables.Filter, chain: "FORWARD", args: append(args, "DROP")}
  275. if insert {
  276. if !iccEnable {
  277. acceptRule.Delete()
  278. if err := dropRule.Append(); err != nil {
  279. return fmt.Errorf("Unable to prevent intercontainer communication: %s", err.Error())
  280. }
  281. } else {
  282. dropRule.Delete()
  283. if err := acceptRule.Insert(); err != nil {
  284. return fmt.Errorf("Unable to allow intercontainer communication: %s", err.Error())
  285. }
  286. }
  287. } else {
  288. // Remove any ICC rule.
  289. if !iccEnable {
  290. dropRule.Delete()
  291. } else {
  292. acceptRule.Delete()
  293. }
  294. }
  295. return nil
  296. }
  297. // Control Inter Network Communication. Install[Remove] only if it is [not] present.
  298. func setINC(version iptables.IPVersion, iface string, enable bool) error {
  299. iptable := iptables.GetIptable(version)
  300. var (
  301. action = iptables.Insert
  302. actionMsg = "add"
  303. chains = []string{IsolationChain1, IsolationChain2}
  304. rules = [][]string{
  305. {"-i", iface, "!", "-o", iface, "-j", IsolationChain2},
  306. {"-o", iface, "-j", "DROP"},
  307. }
  308. )
  309. if !enable {
  310. action = iptables.Delete
  311. actionMsg = "remove"
  312. }
  313. for i, chain := range chains {
  314. if err := iptable.ProgramRule(iptables.Filter, chain, action, rules[i]); err != nil {
  315. msg := fmt.Sprintf("unable to %s inter-network communication rule: %v", actionMsg, err)
  316. if enable {
  317. if i == 1 {
  318. // Rollback the rule installed on first chain
  319. if err2 := iptable.ProgramRule(iptables.Filter, chains[0], iptables.Delete, rules[0]); err2 != nil {
  320. log.G(context.TODO()).Warnf("Failed to rollback iptables rule after failure (%v): %v", err, err2)
  321. }
  322. }
  323. return fmt.Errorf(msg)
  324. }
  325. log.G(context.TODO()).Warn(msg)
  326. }
  327. }
  328. return nil
  329. }
  330. // Obsolete chain from previous docker versions
  331. const oldIsolationChain = "DOCKER-ISOLATION"
  332. func removeIPChains(version iptables.IPVersion) {
  333. ipt := iptables.GetIptable(version)
  334. // Remove obsolete rules from default chains
  335. ipt.ProgramRule(iptables.Filter, "FORWARD", iptables.Delete, []string{"-j", oldIsolationChain})
  336. // Remove chains
  337. for _, chainInfo := range []iptables.ChainInfo{
  338. {Name: DockerChain, Table: iptables.Nat, IPVersion: version},
  339. {Name: DockerChain, Table: iptables.Filter, IPVersion: version},
  340. {Name: IsolationChain1, Table: iptables.Filter, IPVersion: version},
  341. {Name: IsolationChain2, Table: iptables.Filter, IPVersion: version},
  342. {Name: oldIsolationChain, Table: iptables.Filter, IPVersion: version},
  343. } {
  344. if err := chainInfo.Remove(); err != nil {
  345. log.G(context.TODO()).Warnf("Failed to remove existing iptables entries in table %s chain %s : %v", chainInfo.Table, chainInfo.Name, err)
  346. }
  347. }
  348. }
  349. func setupInternalNetworkRules(bridgeIface string, addr *net.IPNet, icc, insert bool) error {
  350. var version iptables.IPVersion
  351. var inDropRule, outDropRule iptRule
  352. if addr.IP.To4() != nil {
  353. version = iptables.IPv4
  354. inDropRule = iptRule{
  355. ipv: version,
  356. table: iptables.Filter,
  357. chain: IsolationChain1,
  358. args: []string{"-i", bridgeIface, "!", "-d", addr.String(), "-j", "DROP"},
  359. }
  360. outDropRule = iptRule{
  361. ipv: version,
  362. table: iptables.Filter,
  363. chain: IsolationChain1,
  364. args: []string{"-o", bridgeIface, "!", "-s", addr.String(), "-j", "DROP"},
  365. }
  366. } else {
  367. version = iptables.IPv6
  368. inDropRule = iptRule{
  369. ipv: version,
  370. table: iptables.Filter,
  371. chain: IsolationChain1,
  372. args: []string{"-i", bridgeIface, "!", "-o", bridgeIface, "!", "-d", addr.String(), "-j", "DROP"},
  373. }
  374. outDropRule = iptRule{
  375. ipv: version,
  376. table: iptables.Filter,
  377. chain: IsolationChain1,
  378. args: []string{"!", "-i", bridgeIface, "-o", bridgeIface, "!", "-s", addr.String(), "-j", "DROP"},
  379. }
  380. }
  381. if err := programChainRule(inDropRule, "DROP INCOMING", insert); err != nil {
  382. return err
  383. }
  384. if err := programChainRule(outDropRule, "DROP OUTGOING", insert); err != nil {
  385. return err
  386. }
  387. // Set Inter Container Communication.
  388. return setIcc(version, bridgeIface, icc, insert)
  389. }
  390. // clearConntrackEntries flushes conntrack entries matching endpoint IP address
  391. // or matching one of the exposed UDP port.
  392. // In the first case, this could happen if packets were received by the host
  393. // between userland proxy startup and iptables setup.
  394. // In the latter case, this could happen if packets were received whereas there
  395. // were nowhere to route them, as netfilter creates entries in such case.
  396. // This is required because iptables NAT rules are evaluated by netfilter only
  397. // when creating a new conntrack entry. When Docker latter adds NAT rules,
  398. // netfilter ignore them for any packet matching a pre-existing conntrack entry.
  399. // As such, we need to flush all those conntrack entries to make sure NAT rules
  400. // are correctly applied to all packets.
  401. // See: #8795, #44688 & #44742.
  402. func clearConntrackEntries(nlh *netlink.Handle, ep *bridgeEndpoint) {
  403. var ipv4List []net.IP
  404. var ipv6List []net.IP
  405. var udpPorts []uint16
  406. if ep.addr != nil {
  407. ipv4List = append(ipv4List, ep.addr.IP)
  408. }
  409. if ep.addrv6 != nil {
  410. ipv6List = append(ipv6List, ep.addrv6.IP)
  411. }
  412. for _, pb := range ep.portMapping {
  413. if pb.Proto == types.UDP {
  414. udpPorts = append(udpPorts, pb.HostPort)
  415. }
  416. }
  417. iptables.DeleteConntrackEntries(nlh, ipv4List, ipv6List)
  418. iptables.DeleteConntrackEntriesByPort(nlh, types.UDP, udpPorts)
  419. }