iptables.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601
  1. //go:build linux
  2. // +build linux
  3. package iptables
  4. import (
  5. "context"
  6. "errors"
  7. "fmt"
  8. "net"
  9. "os/exec"
  10. "strconv"
  11. "strings"
  12. "sync"
  13. "time"
  14. "github.com/containerd/containerd/log"
  15. "github.com/docker/docker/pkg/rootless"
  16. )
  17. // Action signifies the iptable action.
  18. type Action string
  19. // Policy is the default iptable policies
  20. type Policy string
  21. // Table refers to Nat, Filter or Mangle.
  22. type Table string
  23. // IPVersion refers to IP version, v4 or v6
  24. type IPVersion string
  25. const (
  26. // Append appends the rule at the end of the chain.
  27. Append Action = "-A"
  28. // Delete deletes the rule from the chain.
  29. Delete Action = "-D"
  30. // Insert inserts the rule at the top of the chain.
  31. Insert Action = "-I"
  32. // Nat table is used for nat translation rules.
  33. Nat Table = "nat"
  34. // Filter table is used for filter rules.
  35. Filter Table = "filter"
  36. // Mangle table is used for mangling the packet.
  37. Mangle Table = "mangle"
  38. // Drop is the default iptables DROP policy
  39. Drop Policy = "DROP"
  40. // Accept is the default iptables ACCEPT policy
  41. Accept Policy = "ACCEPT"
  42. // IPv4 is version 4
  43. IPv4 IPVersion = "IPV4"
  44. // IPv6 is version 6
  45. IPv6 IPVersion = "IPV6"
  46. )
  47. var (
  48. iptablesPath string
  49. ip6tablesPath string
  50. supportsXlock = false
  51. xLockWaitMsg = "Another app is currently holding the xtables lock"
  52. // used to lock iptables commands if xtables lock is not supported
  53. bestEffortLock sync.Mutex
  54. initOnce sync.Once
  55. )
  56. // IPTable defines struct with IPVersion
  57. type IPTable struct {
  58. Version IPVersion
  59. }
  60. // ChainInfo defines the iptables chain.
  61. type ChainInfo struct {
  62. Name string
  63. Table Table
  64. HairpinMode bool
  65. IPVersion IPVersion
  66. }
  67. // ChainError is returned to represent errors during ip table operation.
  68. type ChainError struct {
  69. Chain string
  70. Output []byte
  71. }
  72. func (e ChainError) Error() string {
  73. return fmt.Sprintf("Error iptables %s: %s", e.Chain, string(e.Output))
  74. }
  75. func detectIptables() {
  76. path, err := exec.LookPath("iptables")
  77. if err != nil {
  78. log.G(context.TODO()).WithError(err).Warnf("failed to find iptables")
  79. return
  80. }
  81. iptablesPath = path
  82. // The --wait flag was added in iptables v1.6.0.
  83. // TODO remove this check once we drop support for CentOS/RHEL 7, which uses an older version of iptables
  84. if out, err := exec.Command(path, "--wait", "-L", "-n").CombinedOutput(); err != nil {
  85. log.G(context.TODO()).WithError(err).Infof("unable to detect if iptables supports xlock: 'iptables --wait -L -n': `%s`", strings.TrimSpace(string(out)))
  86. } else {
  87. supportsXlock = true
  88. }
  89. path, err = exec.LookPath("ip6tables")
  90. if err != nil {
  91. log.G(context.TODO()).WithError(err).Warnf("unable to find ip6tables")
  92. } else {
  93. ip6tablesPath = path
  94. }
  95. }
  96. func initFirewalld() {
  97. // When running with RootlessKit, firewalld is running as the root outside our network namespace
  98. // https://github.com/moby/moby/issues/43781
  99. if rootless.RunningWithRootlessKit() {
  100. log.G(context.TODO()).Info("skipping firewalld management for rootless mode")
  101. return
  102. }
  103. if err := FirewalldInit(); err != nil {
  104. log.G(context.TODO()).WithError(err).Debugf("unable to initialize firewalld; using raw iptables instead")
  105. }
  106. }
  107. func initDependencies() {
  108. initFirewalld()
  109. detectIptables()
  110. }
  111. func initCheck() error {
  112. initOnce.Do(initDependencies)
  113. if iptablesPath == "" {
  114. return errors.New("iptables not found")
  115. }
  116. return nil
  117. }
  118. // GetIptable returns an instance of IPTable with specified version
  119. func GetIptable(version IPVersion) *IPTable {
  120. return &IPTable{Version: version}
  121. }
  122. // NewChain adds a new chain to ip table.
  123. func (iptable IPTable) NewChain(name string, table Table, hairpinMode bool) (*ChainInfo, error) {
  124. if table == "" {
  125. table = Filter
  126. }
  127. // Add chain if it doesn't exist
  128. if _, err := iptable.Raw("-t", string(table), "-n", "-L", name); err != nil {
  129. if output, err := iptable.Raw("-t", string(table), "-N", name); err != nil {
  130. return nil, err
  131. } else if len(output) != 0 {
  132. return nil, fmt.Errorf("could not create %s/%s chain: %s", table, name, output)
  133. }
  134. }
  135. return &ChainInfo{
  136. Name: name,
  137. Table: table,
  138. HairpinMode: hairpinMode,
  139. IPVersion: iptable.Version,
  140. }, nil
  141. }
  142. // LoopbackByVersion returns loopback address by version
  143. func (iptable IPTable) LoopbackByVersion() string {
  144. if iptable.Version == IPv6 {
  145. return "::1/128"
  146. }
  147. return "127.0.0.0/8"
  148. }
  149. // ProgramChain is used to add rules to a chain
  150. func (iptable IPTable) ProgramChain(c *ChainInfo, bridgeName string, hairpinMode, enable bool) error {
  151. if c.Name == "" {
  152. return errors.New("Could not program chain, missing chain name")
  153. }
  154. // Either add or remove the interface from the firewalld zone
  155. if firewalldRunning {
  156. if enable {
  157. if err := AddInterfaceFirewalld(bridgeName); err != nil {
  158. return err
  159. }
  160. } else {
  161. if err := DelInterfaceFirewalld(bridgeName); err != nil {
  162. return err
  163. }
  164. }
  165. }
  166. switch c.Table {
  167. case Nat:
  168. preroute := []string{
  169. "-m", "addrtype",
  170. "--dst-type", "LOCAL",
  171. "-j", c.Name,
  172. }
  173. if !iptable.Exists(Nat, "PREROUTING", preroute...) && enable {
  174. if err := c.Prerouting(Append, preroute...); err != nil {
  175. return fmt.Errorf("Failed to inject %s in PREROUTING chain: %s", c.Name, err)
  176. }
  177. } else if iptable.Exists(Nat, "PREROUTING", preroute...) && !enable {
  178. if err := c.Prerouting(Delete, preroute...); err != nil {
  179. return fmt.Errorf("Failed to remove %s in PREROUTING chain: %s", c.Name, err)
  180. }
  181. }
  182. output := []string{
  183. "-m", "addrtype",
  184. "--dst-type", "LOCAL",
  185. "-j", c.Name,
  186. }
  187. if !hairpinMode {
  188. output = append(output, "!", "--dst", iptable.LoopbackByVersion())
  189. }
  190. if !iptable.Exists(Nat, "OUTPUT", output...) && enable {
  191. if err := c.Output(Append, output...); err != nil {
  192. return fmt.Errorf("Failed to inject %s in OUTPUT chain: %s", c.Name, err)
  193. }
  194. } else if iptable.Exists(Nat, "OUTPUT", output...) && !enable {
  195. if err := c.Output(Delete, output...); err != nil {
  196. return fmt.Errorf("Failed to inject %s in OUTPUT chain: %s", c.Name, err)
  197. }
  198. }
  199. case Filter:
  200. if bridgeName == "" {
  201. return fmt.Errorf("Could not program chain %s/%s, missing bridge name",
  202. c.Table, c.Name)
  203. }
  204. link := []string{
  205. "-o", bridgeName,
  206. "-j", c.Name,
  207. }
  208. if !iptable.Exists(Filter, "FORWARD", link...) && enable {
  209. insert := append([]string{string(Insert), "FORWARD"}, link...)
  210. if output, err := iptable.Raw(insert...); err != nil {
  211. return err
  212. } else if len(output) != 0 {
  213. return fmt.Errorf("Could not create linking rule to %s/%s: %s", c.Table, c.Name, output)
  214. }
  215. } else if iptable.Exists(Filter, "FORWARD", link...) && !enable {
  216. del := append([]string{string(Delete), "FORWARD"}, link...)
  217. if output, err := iptable.Raw(del...); err != nil {
  218. return err
  219. } else if len(output) != 0 {
  220. return fmt.Errorf("Could not delete linking rule from %s/%s: %s", c.Table, c.Name, output)
  221. }
  222. }
  223. establish := []string{
  224. "-o", bridgeName,
  225. "-m", "conntrack",
  226. "--ctstate", "RELATED,ESTABLISHED",
  227. "-j", "ACCEPT",
  228. }
  229. if !iptable.Exists(Filter, "FORWARD", establish...) && enable {
  230. insert := append([]string{string(Insert), "FORWARD"}, establish...)
  231. if output, err := iptable.Raw(insert...); err != nil {
  232. return err
  233. } else if len(output) != 0 {
  234. return fmt.Errorf("Could not create establish rule to %s: %s", c.Table, output)
  235. }
  236. } else if iptable.Exists(Filter, "FORWARD", establish...) && !enable {
  237. del := append([]string{string(Delete), "FORWARD"}, establish...)
  238. if output, err := iptable.Raw(del...); err != nil {
  239. return err
  240. } else if len(output) != 0 {
  241. return fmt.Errorf("Could not delete establish rule from %s: %s", c.Table, output)
  242. }
  243. }
  244. }
  245. return nil
  246. }
  247. // RemoveExistingChain removes existing chain from the table.
  248. func (iptable IPTable) RemoveExistingChain(name string, table Table) error {
  249. if table == "" {
  250. table = Filter
  251. }
  252. c := &ChainInfo{
  253. Name: name,
  254. Table: table,
  255. IPVersion: iptable.Version,
  256. }
  257. return c.Remove()
  258. }
  259. // Forward adds forwarding rule to 'filter' table and corresponding nat rule to 'nat' table.
  260. func (c *ChainInfo) Forward(action Action, ip net.IP, port int, proto, destAddr string, destPort int, bridgeName string) error {
  261. iptable := GetIptable(c.IPVersion)
  262. daddr := ip.String()
  263. if ip.IsUnspecified() {
  264. // iptables interprets "0.0.0.0" as "0.0.0.0/32", whereas we
  265. // want "0.0.0.0/0". "0/0" is correctly interpreted as "any
  266. // value" by both iptables and ip6tables.
  267. daddr = "0/0"
  268. }
  269. args := []string{
  270. "-p", proto,
  271. "-d", daddr,
  272. "--dport", strconv.Itoa(port),
  273. "-j", "DNAT",
  274. "--to-destination", net.JoinHostPort(destAddr, strconv.Itoa(destPort)),
  275. }
  276. if !c.HairpinMode {
  277. args = append(args, "!", "-i", bridgeName)
  278. }
  279. if err := iptable.ProgramRule(Nat, c.Name, action, args); err != nil {
  280. return err
  281. }
  282. args = []string{
  283. "!", "-i", bridgeName,
  284. "-o", bridgeName,
  285. "-p", proto,
  286. "-d", destAddr,
  287. "--dport", strconv.Itoa(destPort),
  288. "-j", "ACCEPT",
  289. }
  290. if err := iptable.ProgramRule(Filter, c.Name, action, args); err != nil {
  291. return err
  292. }
  293. args = []string{
  294. "-p", proto,
  295. "-s", destAddr,
  296. "-d", destAddr,
  297. "--dport", strconv.Itoa(destPort),
  298. "-j", "MASQUERADE",
  299. }
  300. if err := iptable.ProgramRule(Nat, "POSTROUTING", action, args); err != nil {
  301. return err
  302. }
  303. if proto == "sctp" {
  304. // Linux kernel v4.9 and below enables NETIF_F_SCTP_CRC for veth by
  305. // the following commit.
  306. // This introduces a problem when conbined with a physical NIC without
  307. // NETIF_F_SCTP_CRC. As for a workaround, here we add an iptables entry
  308. // to fill the checksum.
  309. //
  310. // https://github.com/torvalds/linux/commit/c80fafbbb59ef9924962f83aac85531039395b18
  311. args = []string{
  312. "-p", proto,
  313. "--sport", strconv.Itoa(destPort),
  314. "-j", "CHECKSUM",
  315. "--checksum-fill",
  316. }
  317. if err := iptable.ProgramRule(Mangle, "POSTROUTING", action, args); err != nil {
  318. return err
  319. }
  320. }
  321. return nil
  322. }
  323. // Link adds reciprocal ACCEPT rule for two supplied IP addresses.
  324. // Traffic is allowed from ip1 to ip2 and vice-versa
  325. func (c *ChainInfo) Link(action Action, ip1, ip2 net.IP, port int, proto string, bridgeName string) error {
  326. iptable := GetIptable(c.IPVersion)
  327. // forward
  328. args := []string{
  329. "-i", bridgeName, "-o", bridgeName,
  330. "-p", proto,
  331. "-s", ip1.String(),
  332. "-d", ip2.String(),
  333. "--dport", strconv.Itoa(port),
  334. "-j", "ACCEPT",
  335. }
  336. if err := iptable.ProgramRule(Filter, c.Name, action, args); err != nil {
  337. return err
  338. }
  339. // reverse
  340. args[7], args[9] = args[9], args[7]
  341. args[10] = "--sport"
  342. return iptable.ProgramRule(Filter, c.Name, action, args)
  343. }
  344. // ProgramRule adds the rule specified by args only if the
  345. // rule is not already present in the chain. Reciprocally,
  346. // it removes the rule only if present.
  347. func (iptable IPTable) ProgramRule(table Table, chain string, action Action, args []string) error {
  348. if iptable.Exists(table, chain, args...) != (action == Delete) {
  349. return nil
  350. }
  351. return iptable.RawCombinedOutput(append([]string{"-t", string(table), string(action), chain}, args...)...)
  352. }
  353. // Prerouting adds linking rule to nat/PREROUTING chain.
  354. func (c *ChainInfo) Prerouting(action Action, args ...string) error {
  355. iptable := GetIptable(c.IPVersion)
  356. a := []string{"-t", string(Nat), string(action), "PREROUTING"}
  357. if len(args) > 0 {
  358. a = append(a, args...)
  359. }
  360. if output, err := iptable.Raw(a...); err != nil {
  361. return err
  362. } else if len(output) != 0 {
  363. return ChainError{Chain: "PREROUTING", Output: output}
  364. }
  365. return nil
  366. }
  367. // Output adds linking rule to an OUTPUT chain.
  368. func (c *ChainInfo) Output(action Action, args ...string) error {
  369. a := []string{"-t", string(c.Table), string(action), "OUTPUT"}
  370. if len(args) > 0 {
  371. a = append(a, args...)
  372. }
  373. if output, err := GetIptable(c.IPVersion).Raw(a...); err != nil {
  374. return err
  375. } else if len(output) != 0 {
  376. return ChainError{Chain: "OUTPUT", Output: output}
  377. }
  378. return nil
  379. }
  380. // Remove removes the chain.
  381. func (c *ChainInfo) Remove() error {
  382. iptable := GetIptable(c.IPVersion)
  383. // Ignore errors - This could mean the chains were never set up
  384. if c.Table == Nat {
  385. _ = c.Prerouting(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "-j", c.Name)
  386. _ = c.Output(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "!", "--dst", iptable.LoopbackByVersion(), "-j", c.Name)
  387. _ = c.Output(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "-j", c.Name) // Created in versions <= 0.1.6
  388. _ = c.Prerouting(Delete)
  389. _ = c.Output(Delete)
  390. }
  391. _, _ = iptable.Raw("-t", string(c.Table), "-F", c.Name)
  392. _, _ = iptable.Raw("-t", string(c.Table), "-X", c.Name)
  393. return nil
  394. }
  395. // Exists checks if a rule exists
  396. func (iptable IPTable) Exists(table Table, chain string, rule ...string) bool {
  397. return iptable.exists(false, table, chain, rule...)
  398. }
  399. // ExistsNative behaves as Exists with the difference it
  400. // will always invoke `iptables` binary.
  401. func (iptable IPTable) ExistsNative(table Table, chain string, rule ...string) bool {
  402. return iptable.exists(true, table, chain, rule...)
  403. }
  404. func (iptable IPTable) exists(native bool, table Table, chain string, rule ...string) bool {
  405. if err := initCheck(); err != nil {
  406. // The exists() signature does not allow us to return an error, but at least
  407. // we can skip the (likely invalid) exec invocation.
  408. return false
  409. }
  410. f := iptable.Raw
  411. if native {
  412. f = iptable.raw
  413. }
  414. if table == "" {
  415. table = Filter
  416. }
  417. // if exit status is 0 then return true, the rule exists
  418. _, err := f(append([]string{"-t", string(table), "-C", chain}, rule...)...)
  419. return err == nil
  420. }
  421. // Maximum duration that an iptables operation can take
  422. // before flagging a warning.
  423. const opWarnTime = 2 * time.Second
  424. func filterOutput(start time.Time, output []byte, args ...string) []byte {
  425. // Flag operations that have taken a long time to complete
  426. opTime := time.Since(start)
  427. if opTime > opWarnTime {
  428. log.G(context.TODO()).Warnf("xtables contention detected while running [%s]: Waited for %.2f seconds and received %q", strings.Join(args, " "), float64(opTime)/float64(time.Second), string(output))
  429. }
  430. // ignore iptables' message about xtables lock:
  431. // it is a warning, not an error.
  432. if strings.Contains(string(output), xLockWaitMsg) {
  433. output = []byte("")
  434. }
  435. // Put further filters here if desired
  436. return output
  437. }
  438. // Raw calls 'iptables' system command, passing supplied arguments.
  439. func (iptable IPTable) Raw(args ...string) ([]byte, error) {
  440. if firewalldRunning {
  441. // select correct IP version for firewalld
  442. ipv := Iptables
  443. if iptable.Version == IPv6 {
  444. ipv = IP6Tables
  445. }
  446. startTime := time.Now()
  447. output, err := Passthrough(ipv, args...)
  448. if err == nil || !strings.Contains(err.Error(), "was not provided by any .service files") {
  449. return filterOutput(startTime, output, args...), err
  450. }
  451. }
  452. return iptable.raw(args...)
  453. }
  454. func (iptable IPTable) raw(args ...string) ([]byte, error) {
  455. if err := initCheck(); err != nil {
  456. return nil, err
  457. }
  458. if supportsXlock {
  459. args = append([]string{"--wait"}, args...)
  460. } else {
  461. bestEffortLock.Lock()
  462. defer bestEffortLock.Unlock()
  463. }
  464. path := iptablesPath
  465. commandName := "iptables"
  466. if iptable.Version == IPv6 {
  467. if ip6tablesPath == "" {
  468. return nil, fmt.Errorf("ip6tables is missing")
  469. }
  470. path = ip6tablesPath
  471. commandName = "ip6tables"
  472. }
  473. log.G(context.TODO()).Debugf("%s, %v", path, args)
  474. startTime := time.Now()
  475. output, err := exec.Command(path, args...).CombinedOutput()
  476. if err != nil {
  477. return nil, fmt.Errorf("iptables failed: %s %v: %s (%s)", commandName, strings.Join(args, " "), output, err)
  478. }
  479. return filterOutput(startTime, output, args...), err
  480. }
  481. // RawCombinedOutput internally calls the Raw function and returns a non nil
  482. // error if Raw returned a non nil error or a non empty output
  483. func (iptable IPTable) RawCombinedOutput(args ...string) error {
  484. if output, err := iptable.Raw(args...); err != nil || len(output) != 0 {
  485. return fmt.Errorf("%s (%v)", string(output), err)
  486. }
  487. return nil
  488. }
  489. // RawCombinedOutputNative behave as RawCombinedOutput with the difference it
  490. // will always invoke `iptables` binary
  491. func (iptable IPTable) RawCombinedOutputNative(args ...string) error {
  492. if output, err := iptable.raw(args...); err != nil || len(output) != 0 {
  493. return fmt.Errorf("%s (%v)", string(output), err)
  494. }
  495. return nil
  496. }
  497. // ExistChain checks if a chain exists
  498. func (iptable IPTable) ExistChain(chain string, table Table) bool {
  499. if _, err := iptable.Raw("-t", string(table), "-nL", chain); err == nil {
  500. return true
  501. }
  502. return false
  503. }
  504. // SetDefaultPolicy sets the passed default policy for the table/chain
  505. func (iptable IPTable) SetDefaultPolicy(table Table, chain string, policy Policy) error {
  506. if err := iptable.RawCombinedOutput("-t", string(table), "-P", chain, string(policy)); err != nil {
  507. return fmt.Errorf("setting default policy to %v in %v chain failed: %v", policy, chain, err)
  508. }
  509. return nil
  510. }
  511. // AddReturnRule adds a return rule for the chain in the filter table
  512. func (iptable IPTable) AddReturnRule(chain string) error {
  513. if iptable.Exists(Filter, chain, "-j", "RETURN") {
  514. return nil
  515. }
  516. err := iptable.RawCombinedOutput("-A", chain, "-j", "RETURN")
  517. if err != nil {
  518. return fmt.Errorf("unable to add return rule in %s chain: %v", chain, err)
  519. }
  520. return nil
  521. }
  522. // EnsureJumpRule ensures the jump rule is on top
  523. func (iptable IPTable) EnsureJumpRule(fromChain, toChain string) error {
  524. if iptable.Exists(Filter, fromChain, "-j", toChain) {
  525. err := iptable.RawCombinedOutput("-D", fromChain, "-j", toChain)
  526. if err != nil {
  527. return fmt.Errorf("unable to remove jump to %s rule in %s chain: %v", toChain, fromChain, err)
  528. }
  529. }
  530. err := iptable.RawCombinedOutput("-I", fromChain, "-j", toChain)
  531. if err != nil {
  532. return fmt.Errorf("unable to insert jump to %s rule in %s chain: %v", toChain, fromChain, err)
  533. }
  534. return nil
  535. }