iptables.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621
  1. //go:build linux
  2. package iptables
  3. import (
  4. "context"
  5. "errors"
  6. "fmt"
  7. "net"
  8. "os/exec"
  9. "strconv"
  10. "strings"
  11. "sync"
  12. "time"
  13. "github.com/containerd/log"
  14. "github.com/docker/docker/errdefs"
  15. "github.com/docker/docker/pkg/rootless"
  16. )
  17. // Action signifies the iptable action.
  18. type Action string
  19. const (
  20. // Append appends the rule at the end of the chain.
  21. Append Action = "-A"
  22. // Delete deletes the rule from the chain.
  23. Delete Action = "-D"
  24. // Insert inserts the rule at the top of the chain.
  25. Insert Action = "-I"
  26. )
  27. // Policy is the default iptable policies
  28. type Policy string
  29. const (
  30. // Drop is the default iptables DROP policy.
  31. Drop Policy = "DROP"
  32. // Accept is the default iptables ACCEPT policy.
  33. Accept Policy = "ACCEPT"
  34. )
  35. // Table refers to Nat, Filter or Mangle.
  36. type Table string
  37. const (
  38. // Nat table is used for nat translation rules.
  39. Nat Table = "nat"
  40. // Filter table is used for filter rules.
  41. Filter Table = "filter"
  42. // Mangle table is used for mangling the packet.
  43. Mangle Table = "mangle"
  44. )
  45. // IPVersion refers to IP version, v4 or v6
  46. type IPVersion string
  47. const (
  48. // IPv4 is version 4.
  49. IPv4 IPVersion = "IPV4"
  50. // IPv6 is version 6.
  51. IPv6 IPVersion = "IPV6"
  52. )
  53. var (
  54. iptablesPath string
  55. ip6tablesPath string
  56. supportsXlock = false
  57. // used to lock iptables commands if xtables lock is not supported
  58. bestEffortLock sync.Mutex
  59. initOnce sync.Once
  60. )
  61. // IPTable defines struct with [IPVersion].
  62. type IPTable struct {
  63. ipVersion IPVersion
  64. }
  65. // ChainInfo defines the iptables chain.
  66. type ChainInfo struct {
  67. Name string
  68. Table Table
  69. HairpinMode bool
  70. IPVersion IPVersion
  71. }
  72. // ChainError is returned to represent errors during ip table operation.
  73. type ChainError struct {
  74. Chain string
  75. Output []byte
  76. }
  77. func (e ChainError) Error() string {
  78. return fmt.Sprintf("error iptables %s: %s", e.Chain, string(e.Output))
  79. }
  80. // loopbackAddress returns the loopback address for the given IP version.
  81. func loopbackAddress(version IPVersion) string {
  82. switch version {
  83. case IPv4, "":
  84. // IPv4 (default for backward-compatibility)
  85. return "127.0.0.0/8"
  86. case IPv6:
  87. return "::1/128"
  88. default:
  89. panic("unknown IP version: " + version)
  90. }
  91. }
  92. func detectIptables() {
  93. path, err := exec.LookPath("iptables")
  94. if err != nil {
  95. log.G(context.TODO()).WithError(err).Warnf("failed to find iptables")
  96. return
  97. }
  98. iptablesPath = path
  99. // The --wait flag was added in iptables v1.6.0.
  100. // TODO remove this check once we drop support for CentOS/RHEL 7, which uses an older version of iptables
  101. if out, err := exec.Command(path, "--wait", "-L", "-n").CombinedOutput(); err != nil {
  102. log.G(context.TODO()).WithError(err).Infof("unable to detect if iptables supports xlock: 'iptables --wait -L -n': `%s`", strings.TrimSpace(string(out)))
  103. } else {
  104. supportsXlock = true
  105. }
  106. path, err = exec.LookPath("ip6tables")
  107. if err != nil {
  108. log.G(context.TODO()).WithError(err).Warnf("unable to find ip6tables")
  109. } else {
  110. ip6tablesPath = path
  111. }
  112. }
  113. func initFirewalld() {
  114. // When running with RootlessKit, firewalld is running as the root outside our network namespace
  115. // https://github.com/moby/moby/issues/43781
  116. if rootless.RunningWithRootlessKit() {
  117. log.G(context.TODO()).Info("skipping firewalld management for rootless mode")
  118. return
  119. }
  120. if err := firewalldInit(); err != nil {
  121. log.G(context.TODO()).WithError(err).Debugf("unable to initialize firewalld; using raw iptables instead")
  122. }
  123. }
  124. func initDependencies() {
  125. initFirewalld()
  126. detectIptables()
  127. }
  128. func initCheck() error {
  129. initOnce.Do(initDependencies)
  130. if iptablesPath == "" {
  131. return errors.New("iptables not found")
  132. }
  133. return nil
  134. }
  135. // GetIptable returns an instance of IPTable with specified version ([IPv4]
  136. // or [IPv6]). It panics if an invalid [IPVersion] is provided.
  137. func GetIptable(version IPVersion) *IPTable {
  138. switch version {
  139. case IPv4, IPv6:
  140. // valid version
  141. case "":
  142. // default is IPv4 for backward-compatibility
  143. version = IPv4
  144. default:
  145. panic("unknown IP version: " + version)
  146. }
  147. return &IPTable{ipVersion: version}
  148. }
  149. // NewChain adds a new chain to ip table.
  150. func (iptable IPTable) NewChain(name string, table Table, hairpinMode bool) (*ChainInfo, error) {
  151. if name == "" {
  152. return nil, fmt.Errorf("could not create chain: chain name is empty")
  153. }
  154. if table == "" {
  155. return nil, fmt.Errorf("could not create chain %s: invalid table name: table name is empty", name)
  156. }
  157. // Add chain if it doesn't exist
  158. if _, err := iptable.Raw("-t", string(table), "-n", "-L", name); err != nil {
  159. if output, err := iptable.Raw("-t", string(table), "-N", name); err != nil {
  160. return nil, err
  161. } else if len(output) != 0 {
  162. return nil, fmt.Errorf("could not create %s/%s chain: %s", table, name, output)
  163. }
  164. }
  165. return &ChainInfo{
  166. Name: name,
  167. Table: table,
  168. HairpinMode: hairpinMode,
  169. IPVersion: iptable.ipVersion,
  170. }, nil
  171. }
  172. // ProgramChain is used to add rules to a chain
  173. func (iptable IPTable) ProgramChain(c *ChainInfo, bridgeName string, hairpinMode, enable bool) error {
  174. if c.Name == "" {
  175. return errors.New("could not program chain, missing chain name")
  176. }
  177. // Either add or remove the interface from the firewalld zone, if firewalld is running.
  178. if enable {
  179. if err := AddInterfaceFirewalld(bridgeName); err != nil {
  180. return err
  181. }
  182. } else {
  183. if err := DelInterfaceFirewalld(bridgeName); err != nil && !errdefs.IsNotFound(err) {
  184. return err
  185. }
  186. }
  187. switch c.Table {
  188. case Nat:
  189. preroute := []string{
  190. "-m", "addrtype",
  191. "--dst-type", "LOCAL",
  192. "-j", c.Name,
  193. }
  194. if !iptable.Exists(Nat, "PREROUTING", preroute...) && enable {
  195. if err := c.Prerouting(Append, preroute...); err != nil {
  196. return fmt.Errorf("failed to inject %s in PREROUTING chain: %s", c.Name, err)
  197. }
  198. } else if iptable.Exists(Nat, "PREROUTING", preroute...) && !enable {
  199. if err := c.Prerouting(Delete, preroute...); err != nil {
  200. return fmt.Errorf("failed to remove %s in PREROUTING chain: %s", c.Name, err)
  201. }
  202. }
  203. output := []string{
  204. "-m", "addrtype",
  205. "--dst-type", "LOCAL",
  206. "-j", c.Name,
  207. }
  208. if !hairpinMode {
  209. output = append(output, "!", "--dst", loopbackAddress(iptable.ipVersion))
  210. }
  211. if !iptable.Exists(Nat, "OUTPUT", output...) && enable {
  212. if err := c.Output(Append, output...); err != nil {
  213. return fmt.Errorf("failed to inject %s in OUTPUT chain: %s", c.Name, err)
  214. }
  215. } else if iptable.Exists(Nat, "OUTPUT", output...) && !enable {
  216. if err := c.Output(Delete, output...); err != nil {
  217. return fmt.Errorf("failed to inject %s in OUTPUT chain: %s", c.Name, err)
  218. }
  219. }
  220. case Filter:
  221. if bridgeName == "" {
  222. return fmt.Errorf("could not program chain %s/%s, missing bridge name", c.Table, c.Name)
  223. }
  224. link := []string{
  225. "-o", bridgeName,
  226. "-j", c.Name,
  227. }
  228. if !iptable.Exists(Filter, "FORWARD", link...) && enable {
  229. insert := append([]string{string(Insert), "FORWARD"}, link...)
  230. if output, err := iptable.Raw(insert...); err != nil {
  231. return err
  232. } else if len(output) != 0 {
  233. return fmt.Errorf("could not create linking rule to %s/%s: %s", c.Table, c.Name, output)
  234. }
  235. } else if iptable.Exists(Filter, "FORWARD", link...) && !enable {
  236. del := append([]string{string(Delete), "FORWARD"}, link...)
  237. if output, err := iptable.Raw(del...); err != nil {
  238. return err
  239. } else if len(output) != 0 {
  240. return fmt.Errorf("could not delete linking rule from %s/%s: %s", c.Table, c.Name, output)
  241. }
  242. }
  243. establish := []string{
  244. "-o", bridgeName,
  245. "-m", "conntrack",
  246. "--ctstate", "RELATED,ESTABLISHED",
  247. "-j", "ACCEPT",
  248. }
  249. if !iptable.Exists(Filter, "FORWARD", establish...) && enable {
  250. insert := append([]string{string(Insert), "FORWARD"}, establish...)
  251. if output, err := iptable.Raw(insert...); err != nil {
  252. return err
  253. } else if len(output) != 0 {
  254. return fmt.Errorf("could not create establish rule to %s: %s", c.Table, output)
  255. }
  256. } else if iptable.Exists(Filter, "FORWARD", establish...) && !enable {
  257. del := append([]string{string(Delete), "FORWARD"}, establish...)
  258. if output, err := iptable.Raw(del...); err != nil {
  259. return err
  260. } else if len(output) != 0 {
  261. return fmt.Errorf("could not delete establish rule from %s: %s", c.Table, output)
  262. }
  263. }
  264. }
  265. return nil
  266. }
  267. // RemoveExistingChain removes existing chain from the table.
  268. func (iptable IPTable) RemoveExistingChain(name string, table Table) error {
  269. if name == "" {
  270. return fmt.Errorf("could not remove chain: chain name is empty")
  271. }
  272. if table == "" {
  273. return fmt.Errorf("could not remove chain %s: invalid table name: table name is empty", name)
  274. }
  275. c := &ChainInfo{
  276. Name: name,
  277. Table: table,
  278. IPVersion: iptable.ipVersion,
  279. }
  280. return c.Remove()
  281. }
  282. // Forward adds forwarding rule to 'filter' table and corresponding nat rule to 'nat' table.
  283. func (c *ChainInfo) Forward(action Action, ip net.IP, port int, proto, destAddr string, destPort int, bridgeName string) error {
  284. iptable := GetIptable(c.IPVersion)
  285. daddr := ip.String()
  286. if ip.IsUnspecified() {
  287. // iptables interprets "0.0.0.0" as "0.0.0.0/32", whereas we
  288. // want "0.0.0.0/0". "0/0" is correctly interpreted as "any
  289. // value" by both iptables and ip6tables.
  290. daddr = "0/0"
  291. }
  292. args := []string{
  293. "-p", proto,
  294. "-d", daddr,
  295. "--dport", strconv.Itoa(port),
  296. "-j", "DNAT",
  297. "--to-destination", net.JoinHostPort(destAddr, strconv.Itoa(destPort)),
  298. }
  299. if !c.HairpinMode {
  300. args = append(args, "!", "-i", bridgeName)
  301. }
  302. if err := iptable.ProgramRule(Nat, c.Name, action, args); err != nil {
  303. return err
  304. }
  305. args = []string{
  306. "!", "-i", bridgeName,
  307. "-o", bridgeName,
  308. "-p", proto,
  309. "-d", destAddr,
  310. "--dport", strconv.Itoa(destPort),
  311. "-j", "ACCEPT",
  312. }
  313. if err := iptable.ProgramRule(Filter, c.Name, action, args); err != nil {
  314. return err
  315. }
  316. args = []string{
  317. "-p", proto,
  318. "-s", destAddr,
  319. "-d", destAddr,
  320. "--dport", strconv.Itoa(destPort),
  321. "-j", "MASQUERADE",
  322. }
  323. if err := iptable.ProgramRule(Nat, "POSTROUTING", action, args); err != nil {
  324. return err
  325. }
  326. if proto == "sctp" {
  327. // Linux kernel v4.9 and below enables NETIF_F_SCTP_CRC for veth by
  328. // the following commit.
  329. // This introduces a problem when conbined with a physical NIC without
  330. // NETIF_F_SCTP_CRC. As for a workaround, here we add an iptables entry
  331. // to fill the checksum.
  332. //
  333. // https://github.com/torvalds/linux/commit/c80fafbbb59ef9924962f83aac85531039395b18
  334. args = []string{
  335. "-p", proto,
  336. "--sport", strconv.Itoa(destPort),
  337. "-j", "CHECKSUM",
  338. "--checksum-fill",
  339. }
  340. if err := iptable.ProgramRule(Mangle, "POSTROUTING", action, args); err != nil {
  341. return err
  342. }
  343. }
  344. return nil
  345. }
  346. // Link adds reciprocal ACCEPT rule for two supplied IP addresses.
  347. // Traffic is allowed from ip1 to ip2 and vice-versa
  348. func (c *ChainInfo) Link(action Action, ip1, ip2 net.IP, port int, proto string, bridgeName string) error {
  349. iptable := GetIptable(c.IPVersion)
  350. // forward
  351. args := []string{
  352. "-i", bridgeName, "-o", bridgeName,
  353. "-p", proto,
  354. "-s", ip1.String(),
  355. "-d", ip2.String(),
  356. "--dport", strconv.Itoa(port),
  357. "-j", "ACCEPT",
  358. }
  359. if err := iptable.ProgramRule(Filter, c.Name, action, args); err != nil {
  360. return err
  361. }
  362. // reverse
  363. args[7], args[9] = args[9], args[7]
  364. args[10] = "--sport"
  365. return iptable.ProgramRule(Filter, c.Name, action, args)
  366. }
  367. // ProgramRule adds the rule specified by args only if the
  368. // rule is not already present in the chain. Reciprocally,
  369. // it removes the rule only if present.
  370. func (iptable IPTable) ProgramRule(table Table, chain string, action Action, args []string) error {
  371. if iptable.Exists(table, chain, args...) != (action == Delete) {
  372. return nil
  373. }
  374. return iptable.RawCombinedOutput(append([]string{"-t", string(table), string(action), chain}, args...)...)
  375. }
  376. // Prerouting adds linking rule to nat/PREROUTING chain.
  377. func (c *ChainInfo) Prerouting(action Action, args ...string) error {
  378. iptable := GetIptable(c.IPVersion)
  379. a := []string{"-t", string(Nat), string(action), "PREROUTING"}
  380. if len(args) > 0 {
  381. a = append(a, args...)
  382. }
  383. if output, err := iptable.Raw(a...); err != nil {
  384. return err
  385. } else if len(output) != 0 {
  386. return ChainError{Chain: "PREROUTING", Output: output}
  387. }
  388. return nil
  389. }
  390. // Output adds linking rule to an OUTPUT chain.
  391. func (c *ChainInfo) Output(action Action, args ...string) error {
  392. a := []string{"-t", string(c.Table), string(action), "OUTPUT"}
  393. if len(args) > 0 {
  394. a = append(a, args...)
  395. }
  396. if output, err := GetIptable(c.IPVersion).Raw(a...); err != nil {
  397. return err
  398. } else if len(output) != 0 {
  399. return ChainError{Chain: "OUTPUT", Output: output}
  400. }
  401. return nil
  402. }
  403. // Remove removes the chain.
  404. func (c *ChainInfo) Remove() error {
  405. // Ignore errors - This could mean the chains were never set up
  406. if c.Table == Nat {
  407. _ = c.Prerouting(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "-j", c.Name)
  408. _ = c.Output(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "!", "--dst", loopbackAddress(c.IPVersion), "-j", c.Name)
  409. _ = c.Output(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "-j", c.Name) // Created in versions <= 0.1.6
  410. _ = c.Prerouting(Delete)
  411. _ = c.Output(Delete)
  412. }
  413. iptable := GetIptable(c.IPVersion)
  414. _, _ = iptable.Raw("-t", string(c.Table), "-F", c.Name)
  415. _, _ = iptable.Raw("-t", string(c.Table), "-X", c.Name)
  416. return nil
  417. }
  418. // Exists checks if a rule exists
  419. func (iptable IPTable) Exists(table Table, chain string, rule ...string) bool {
  420. return iptable.exists(false, table, chain, rule...)
  421. }
  422. // ExistsNative behaves as Exists with the difference it
  423. // will always invoke `iptables` binary.
  424. func (iptable IPTable) ExistsNative(table Table, chain string, rule ...string) bool {
  425. return iptable.exists(true, table, chain, rule...)
  426. }
  427. func (iptable IPTable) exists(native bool, table Table, chain string, rule ...string) bool {
  428. if err := initCheck(); err != nil {
  429. // The exists() signature does not allow us to return an error, but at least
  430. // we can skip the (likely invalid) exec invocation.
  431. return false
  432. }
  433. f := iptable.Raw
  434. if native {
  435. f = iptable.raw
  436. }
  437. if table == "" {
  438. table = Filter
  439. }
  440. // if exit status is 0 then return true, the rule exists
  441. _, err := f(append([]string{"-t", string(table), "-C", chain}, rule...)...)
  442. return err == nil
  443. }
  444. const (
  445. // opWarnTime is the maximum duration that an iptables operation can take before flagging a warning.
  446. opWarnTime = 2 * time.Second
  447. // xLockWaitMsg is the iptables warning about xtables lock that can be suppressed.
  448. xLockWaitMsg = "Another app is currently holding the xtables lock"
  449. )
  450. func filterOutput(start time.Time, output []byte, args ...string) []byte {
  451. if opTime := time.Since(start); opTime > opWarnTime {
  452. // Flag operations that have taken a long time to complete
  453. log.G(context.TODO()).Warnf("xtables contention detected while running [%s]: Waited for %.2f seconds and received %q", strings.Join(args, " "), float64(opTime)/float64(time.Second), string(output))
  454. }
  455. // ignore iptables' message about xtables lock:
  456. // it is a warning, not an error.
  457. if strings.Contains(string(output), xLockWaitMsg) {
  458. output = []byte("")
  459. }
  460. // Put further filters here if desired
  461. return output
  462. }
  463. // Raw calls 'iptables' system command, passing supplied arguments.
  464. func (iptable IPTable) Raw(args ...string) ([]byte, error) {
  465. if firewalldRunning {
  466. // select correct IP version for firewalld
  467. ipv := Iptables
  468. if iptable.ipVersion == IPv6 {
  469. ipv = IP6Tables
  470. }
  471. startTime := time.Now()
  472. output, err := Passthrough(ipv, args...)
  473. if err == nil || !strings.Contains(err.Error(), "was not provided by any .service files") {
  474. return filterOutput(startTime, output, args...), err
  475. }
  476. }
  477. return iptable.raw(args...)
  478. }
  479. func (iptable IPTable) raw(args ...string) ([]byte, error) {
  480. if err := initCheck(); err != nil {
  481. return nil, err
  482. }
  483. path := iptablesPath
  484. commandName := "iptables"
  485. if iptable.ipVersion == IPv6 {
  486. if ip6tablesPath == "" {
  487. return nil, fmt.Errorf("ip6tables is missing")
  488. }
  489. path = ip6tablesPath
  490. commandName = "ip6tables"
  491. }
  492. if supportsXlock {
  493. args = append([]string{"--wait"}, args...)
  494. } else {
  495. bestEffortLock.Lock()
  496. defer bestEffortLock.Unlock()
  497. }
  498. log.G(context.TODO()).Debugf("%s, %v", path, args)
  499. startTime := time.Now()
  500. output, err := exec.Command(path, args...).CombinedOutput()
  501. if err != nil {
  502. return nil, fmt.Errorf("iptables failed: %s %v: %s (%s)", commandName, strings.Join(args, " "), output, err)
  503. }
  504. return filterOutput(startTime, output, args...), err
  505. }
  506. // RawCombinedOutput internally calls the Raw function and returns a non nil
  507. // error if Raw returned a non nil error or a non empty output
  508. func (iptable IPTable) RawCombinedOutput(args ...string) error {
  509. if output, err := iptable.Raw(args...); err != nil || len(output) != 0 {
  510. return fmt.Errorf("%s (%v)", string(output), err)
  511. }
  512. return nil
  513. }
  514. // RawCombinedOutputNative behave as RawCombinedOutput with the difference it
  515. // will always invoke `iptables` binary
  516. func (iptable IPTable) RawCombinedOutputNative(args ...string) error {
  517. if output, err := iptable.raw(args...); err != nil || len(output) != 0 {
  518. return fmt.Errorf("%s (%v)", string(output), err)
  519. }
  520. return nil
  521. }
  522. // ExistChain checks if a chain exists
  523. func (iptable IPTable) ExistChain(chain string, table Table) bool {
  524. _, err := iptable.Raw("-t", string(table), "-nL", chain)
  525. return err == nil
  526. }
  527. // SetDefaultPolicy sets the passed default policy for the table/chain
  528. func (iptable IPTable) SetDefaultPolicy(table Table, chain string, policy Policy) error {
  529. if err := iptable.RawCombinedOutput("-t", string(table), "-P", chain, string(policy)); err != nil {
  530. return fmt.Errorf("setting default policy to %v in %v chain failed: %v", policy, chain, err)
  531. }
  532. return nil
  533. }
  534. // AddReturnRule adds a return rule for the chain in the filter table
  535. func (iptable IPTable) AddReturnRule(chain string) error {
  536. if iptable.Exists(Filter, chain, "-j", "RETURN") {
  537. return nil
  538. }
  539. if err := iptable.RawCombinedOutput("-A", chain, "-j", "RETURN"); err != nil {
  540. return fmt.Errorf("unable to add return rule in %s chain: %v", chain, err)
  541. }
  542. return nil
  543. }
  544. // EnsureJumpRule ensures the jump rule is on top
  545. func (iptable IPTable) EnsureJumpRule(fromChain, toChain string) error {
  546. if iptable.Exists(Filter, fromChain, "-j", toChain) {
  547. if err := iptable.RawCombinedOutput("-D", fromChain, "-j", toChain); err != nil {
  548. return fmt.Errorf("unable to remove jump to %s rule in %s chain: %v", toChain, fromChain, err)
  549. }
  550. }
  551. if err := iptable.RawCombinedOutput("-I", fromChain, "-j", toChain); err != nil {
  552. return fmt.Errorf("unable to insert jump to %s rule in %s chain: %v", toChain, fromChain, err)
  553. }
  554. return nil
  555. }