iptables.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598
  1. //go:build linux
  2. // +build linux
  3. package iptables
  4. import (
  5. "context"
  6. "errors"
  7. "fmt"
  8. "net"
  9. "os/exec"
  10. "strconv"
  11. "strings"
  12. "sync"
  13. "time"
  14. "github.com/containerd/containerd/log"
  15. "github.com/docker/docker/pkg/rootless"
  16. )
  17. // Action signifies the iptable action.
  18. type Action string
  19. // Policy is the default iptable policies
  20. type Policy string
  21. // Table refers to Nat, Filter or Mangle.
  22. type Table string
  23. // IPVersion refers to IP version, v4 or v6
  24. type IPVersion string
  25. const (
  26. // Append appends the rule at the end of the chain.
  27. Append Action = "-A"
  28. // Delete deletes the rule from the chain.
  29. Delete Action = "-D"
  30. // Insert inserts the rule at the top of the chain.
  31. Insert Action = "-I"
  32. // Nat table is used for nat translation rules.
  33. Nat Table = "nat"
  34. // Filter table is used for filter rules.
  35. Filter Table = "filter"
  36. // Mangle table is used for mangling the packet.
  37. Mangle Table = "mangle"
  38. // Drop is the default iptables DROP policy
  39. Drop Policy = "DROP"
  40. // Accept is the default iptables ACCEPT policy
  41. Accept Policy = "ACCEPT"
  42. // IPv4 is version 4
  43. IPv4 IPVersion = "IPV4"
  44. // IPv6 is version 6
  45. IPv6 IPVersion = "IPV6"
  46. )
  47. var (
  48. iptablesPath string
  49. ip6tablesPath string
  50. supportsXlock = false
  51. xLockWaitMsg = "Another app is currently holding the xtables lock"
  52. // used to lock iptables commands if xtables lock is not supported
  53. bestEffortLock sync.Mutex
  54. initOnce sync.Once
  55. )
  56. // IPTable defines struct with IPVersion
  57. type IPTable struct {
  58. Version IPVersion
  59. }
  60. // ChainInfo defines the iptables chain.
  61. type ChainInfo struct {
  62. Name string
  63. Table Table
  64. HairpinMode bool
  65. IPVersion IPVersion
  66. }
  67. // ChainError is returned to represent errors during ip table operation.
  68. type ChainError struct {
  69. Chain string
  70. Output []byte
  71. }
  72. func (e ChainError) Error() string {
  73. return fmt.Sprintf("error iptables %s: %s", e.Chain, string(e.Output))
  74. }
  75. func detectIptables() {
  76. path, err := exec.LookPath("iptables")
  77. if err != nil {
  78. log.G(context.TODO()).WithError(err).Warnf("failed to find iptables")
  79. return
  80. }
  81. iptablesPath = path
  82. // The --wait flag was added in iptables v1.6.0.
  83. // TODO remove this check once we drop support for CentOS/RHEL 7, which uses an older version of iptables
  84. if out, err := exec.Command(path, "--wait", "-L", "-n").CombinedOutput(); err != nil {
  85. log.G(context.TODO()).WithError(err).Infof("unable to detect if iptables supports xlock: 'iptables --wait -L -n': `%s`", strings.TrimSpace(string(out)))
  86. } else {
  87. supportsXlock = true
  88. }
  89. path, err = exec.LookPath("ip6tables")
  90. if err != nil {
  91. log.G(context.TODO()).WithError(err).Warnf("unable to find ip6tables")
  92. } else {
  93. ip6tablesPath = path
  94. }
  95. }
  96. func initFirewalld() {
  97. // When running with RootlessKit, firewalld is running as the root outside our network namespace
  98. // https://github.com/moby/moby/issues/43781
  99. if rootless.RunningWithRootlessKit() {
  100. log.G(context.TODO()).Info("skipping firewalld management for rootless mode")
  101. return
  102. }
  103. if err := FirewalldInit(); err != nil {
  104. log.G(context.TODO()).WithError(err).Debugf("unable to initialize firewalld; using raw iptables instead")
  105. }
  106. }
  107. func initDependencies() {
  108. initFirewalld()
  109. detectIptables()
  110. }
  111. func initCheck() error {
  112. initOnce.Do(initDependencies)
  113. if iptablesPath == "" {
  114. return errors.New("iptables not found")
  115. }
  116. return nil
  117. }
  118. // GetIptable returns an instance of IPTable with specified version
  119. func GetIptable(version IPVersion) *IPTable {
  120. return &IPTable{Version: version}
  121. }
  122. // NewChain adds a new chain to ip table.
  123. func (iptable IPTable) NewChain(name string, table Table, hairpinMode bool) (*ChainInfo, error) {
  124. if table == "" {
  125. table = Filter
  126. }
  127. // Add chain if it doesn't exist
  128. if _, err := iptable.Raw("-t", string(table), "-n", "-L", name); err != nil {
  129. if output, err := iptable.Raw("-t", string(table), "-N", name); err != nil {
  130. return nil, err
  131. } else if len(output) != 0 {
  132. return nil, fmt.Errorf("could not create %s/%s chain: %s", table, name, output)
  133. }
  134. }
  135. return &ChainInfo{
  136. Name: name,
  137. Table: table,
  138. HairpinMode: hairpinMode,
  139. IPVersion: iptable.Version,
  140. }, nil
  141. }
  142. // LoopbackByVersion returns loopback address by version
  143. func (iptable IPTable) LoopbackByVersion() string {
  144. if iptable.Version == IPv6 {
  145. return "::1/128"
  146. }
  147. return "127.0.0.0/8"
  148. }
  149. // ProgramChain is used to add rules to a chain
  150. func (iptable IPTable) ProgramChain(c *ChainInfo, bridgeName string, hairpinMode, enable bool) error {
  151. if c.Name == "" {
  152. return errors.New("could not program chain, missing chain name")
  153. }
  154. // Either add or remove the interface from the firewalld zone
  155. if firewalldRunning {
  156. if enable {
  157. if err := AddInterfaceFirewalld(bridgeName); err != nil {
  158. return err
  159. }
  160. } else {
  161. if err := DelInterfaceFirewalld(bridgeName); err != nil {
  162. return err
  163. }
  164. }
  165. }
  166. switch c.Table {
  167. case Nat:
  168. preroute := []string{
  169. "-m", "addrtype",
  170. "--dst-type", "LOCAL",
  171. "-j", c.Name,
  172. }
  173. if !iptable.Exists(Nat, "PREROUTING", preroute...) && enable {
  174. if err := c.Prerouting(Append, preroute...); err != nil {
  175. return fmt.Errorf("failed to inject %s in PREROUTING chain: %s", c.Name, err)
  176. }
  177. } else if iptable.Exists(Nat, "PREROUTING", preroute...) && !enable {
  178. if err := c.Prerouting(Delete, preroute...); err != nil {
  179. return fmt.Errorf("failed to remove %s in PREROUTING chain: %s", c.Name, err)
  180. }
  181. }
  182. output := []string{
  183. "-m", "addrtype",
  184. "--dst-type", "LOCAL",
  185. "-j", c.Name,
  186. }
  187. if !hairpinMode {
  188. output = append(output, "!", "--dst", iptable.LoopbackByVersion())
  189. }
  190. if !iptable.Exists(Nat, "OUTPUT", output...) && enable {
  191. if err := c.Output(Append, output...); err != nil {
  192. return fmt.Errorf("failed to inject %s in OUTPUT chain: %s", c.Name, err)
  193. }
  194. } else if iptable.Exists(Nat, "OUTPUT", output...) && !enable {
  195. if err := c.Output(Delete, output...); err != nil {
  196. return fmt.Errorf("failed to inject %s in OUTPUT chain: %s", c.Name, err)
  197. }
  198. }
  199. case Filter:
  200. if bridgeName == "" {
  201. return fmt.Errorf("could not program chain %s/%s, missing bridge name", c.Table, c.Name)
  202. }
  203. link := []string{
  204. "-o", bridgeName,
  205. "-j", c.Name,
  206. }
  207. if !iptable.Exists(Filter, "FORWARD", link...) && enable {
  208. insert := append([]string{string(Insert), "FORWARD"}, link...)
  209. if output, err := iptable.Raw(insert...); err != nil {
  210. return err
  211. } else if len(output) != 0 {
  212. return fmt.Errorf("could not create linking rule to %s/%s: %s", c.Table, c.Name, output)
  213. }
  214. } else if iptable.Exists(Filter, "FORWARD", link...) && !enable {
  215. del := append([]string{string(Delete), "FORWARD"}, link...)
  216. if output, err := iptable.Raw(del...); err != nil {
  217. return err
  218. } else if len(output) != 0 {
  219. return fmt.Errorf("could not delete linking rule from %s/%s: %s", c.Table, c.Name, output)
  220. }
  221. }
  222. establish := []string{
  223. "-o", bridgeName,
  224. "-m", "conntrack",
  225. "--ctstate", "RELATED,ESTABLISHED",
  226. "-j", "ACCEPT",
  227. }
  228. if !iptable.Exists(Filter, "FORWARD", establish...) && enable {
  229. insert := append([]string{string(Insert), "FORWARD"}, establish...)
  230. if output, err := iptable.Raw(insert...); err != nil {
  231. return err
  232. } else if len(output) != 0 {
  233. return fmt.Errorf("could not create establish rule to %s: %s", c.Table, output)
  234. }
  235. } else if iptable.Exists(Filter, "FORWARD", establish...) && !enable {
  236. del := append([]string{string(Delete), "FORWARD"}, establish...)
  237. if output, err := iptable.Raw(del...); err != nil {
  238. return err
  239. } else if len(output) != 0 {
  240. return fmt.Errorf("could not delete establish rule from %s: %s", c.Table, output)
  241. }
  242. }
  243. }
  244. return nil
  245. }
  246. // RemoveExistingChain removes existing chain from the table.
  247. func (iptable IPTable) RemoveExistingChain(name string, table Table) error {
  248. if table == "" {
  249. table = Filter
  250. }
  251. c := &ChainInfo{
  252. Name: name,
  253. Table: table,
  254. IPVersion: iptable.Version,
  255. }
  256. return c.Remove()
  257. }
  258. // Forward adds forwarding rule to 'filter' table and corresponding nat rule to 'nat' table.
  259. func (c *ChainInfo) Forward(action Action, ip net.IP, port int, proto, destAddr string, destPort int, bridgeName string) error {
  260. iptable := GetIptable(c.IPVersion)
  261. daddr := ip.String()
  262. if ip.IsUnspecified() {
  263. // iptables interprets "0.0.0.0" as "0.0.0.0/32", whereas we
  264. // want "0.0.0.0/0". "0/0" is correctly interpreted as "any
  265. // value" by both iptables and ip6tables.
  266. daddr = "0/0"
  267. }
  268. args := []string{
  269. "-p", proto,
  270. "-d", daddr,
  271. "--dport", strconv.Itoa(port),
  272. "-j", "DNAT",
  273. "--to-destination", net.JoinHostPort(destAddr, strconv.Itoa(destPort)),
  274. }
  275. if !c.HairpinMode {
  276. args = append(args, "!", "-i", bridgeName)
  277. }
  278. if err := iptable.ProgramRule(Nat, c.Name, action, args); err != nil {
  279. return err
  280. }
  281. args = []string{
  282. "!", "-i", bridgeName,
  283. "-o", bridgeName,
  284. "-p", proto,
  285. "-d", destAddr,
  286. "--dport", strconv.Itoa(destPort),
  287. "-j", "ACCEPT",
  288. }
  289. if err := iptable.ProgramRule(Filter, c.Name, action, args); err != nil {
  290. return err
  291. }
  292. args = []string{
  293. "-p", proto,
  294. "-s", destAddr,
  295. "-d", destAddr,
  296. "--dport", strconv.Itoa(destPort),
  297. "-j", "MASQUERADE",
  298. }
  299. if err := iptable.ProgramRule(Nat, "POSTROUTING", action, args); err != nil {
  300. return err
  301. }
  302. if proto == "sctp" {
  303. // Linux kernel v4.9 and below enables NETIF_F_SCTP_CRC for veth by
  304. // the following commit.
  305. // This introduces a problem when conbined with a physical NIC without
  306. // NETIF_F_SCTP_CRC. As for a workaround, here we add an iptables entry
  307. // to fill the checksum.
  308. //
  309. // https://github.com/torvalds/linux/commit/c80fafbbb59ef9924962f83aac85531039395b18
  310. args = []string{
  311. "-p", proto,
  312. "--sport", strconv.Itoa(destPort),
  313. "-j", "CHECKSUM",
  314. "--checksum-fill",
  315. }
  316. if err := iptable.ProgramRule(Mangle, "POSTROUTING", action, args); err != nil {
  317. return err
  318. }
  319. }
  320. return nil
  321. }
  322. // Link adds reciprocal ACCEPT rule for two supplied IP addresses.
  323. // Traffic is allowed from ip1 to ip2 and vice-versa
  324. func (c *ChainInfo) Link(action Action, ip1, ip2 net.IP, port int, proto string, bridgeName string) error {
  325. iptable := GetIptable(c.IPVersion)
  326. // forward
  327. args := []string{
  328. "-i", bridgeName, "-o", bridgeName,
  329. "-p", proto,
  330. "-s", ip1.String(),
  331. "-d", ip2.String(),
  332. "--dport", strconv.Itoa(port),
  333. "-j", "ACCEPT",
  334. }
  335. if err := iptable.ProgramRule(Filter, c.Name, action, args); err != nil {
  336. return err
  337. }
  338. // reverse
  339. args[7], args[9] = args[9], args[7]
  340. args[10] = "--sport"
  341. return iptable.ProgramRule(Filter, c.Name, action, args)
  342. }
  343. // ProgramRule adds the rule specified by args only if the
  344. // rule is not already present in the chain. Reciprocally,
  345. // it removes the rule only if present.
  346. func (iptable IPTable) ProgramRule(table Table, chain string, action Action, args []string) error {
  347. if iptable.Exists(table, chain, args...) != (action == Delete) {
  348. return nil
  349. }
  350. return iptable.RawCombinedOutput(append([]string{"-t", string(table), string(action), chain}, args...)...)
  351. }
  352. // Prerouting adds linking rule to nat/PREROUTING chain.
  353. func (c *ChainInfo) Prerouting(action Action, args ...string) error {
  354. iptable := GetIptable(c.IPVersion)
  355. a := []string{"-t", string(Nat), string(action), "PREROUTING"}
  356. if len(args) > 0 {
  357. a = append(a, args...)
  358. }
  359. if output, err := iptable.Raw(a...); err != nil {
  360. return err
  361. } else if len(output) != 0 {
  362. return ChainError{Chain: "PREROUTING", Output: output}
  363. }
  364. return nil
  365. }
  366. // Output adds linking rule to an OUTPUT chain.
  367. func (c *ChainInfo) Output(action Action, args ...string) error {
  368. a := []string{"-t", string(c.Table), string(action), "OUTPUT"}
  369. if len(args) > 0 {
  370. a = append(a, args...)
  371. }
  372. if output, err := GetIptable(c.IPVersion).Raw(a...); err != nil {
  373. return err
  374. } else if len(output) != 0 {
  375. return ChainError{Chain: "OUTPUT", Output: output}
  376. }
  377. return nil
  378. }
  379. // Remove removes the chain.
  380. func (c *ChainInfo) Remove() error {
  381. iptable := GetIptable(c.IPVersion)
  382. // Ignore errors - This could mean the chains were never set up
  383. if c.Table == Nat {
  384. _ = c.Prerouting(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "-j", c.Name)
  385. _ = c.Output(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "!", "--dst", iptable.LoopbackByVersion(), "-j", c.Name)
  386. _ = c.Output(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "-j", c.Name) // Created in versions <= 0.1.6
  387. _ = c.Prerouting(Delete)
  388. _ = c.Output(Delete)
  389. }
  390. _, _ = iptable.Raw("-t", string(c.Table), "-F", c.Name)
  391. _, _ = iptable.Raw("-t", string(c.Table), "-X", c.Name)
  392. return nil
  393. }
  394. // Exists checks if a rule exists
  395. func (iptable IPTable) Exists(table Table, chain string, rule ...string) bool {
  396. return iptable.exists(false, table, chain, rule...)
  397. }
  398. // ExistsNative behaves as Exists with the difference it
  399. // will always invoke `iptables` binary.
  400. func (iptable IPTable) ExistsNative(table Table, chain string, rule ...string) bool {
  401. return iptable.exists(true, table, chain, rule...)
  402. }
  403. func (iptable IPTable) exists(native bool, table Table, chain string, rule ...string) bool {
  404. if err := initCheck(); err != nil {
  405. // The exists() signature does not allow us to return an error, but at least
  406. // we can skip the (likely invalid) exec invocation.
  407. return false
  408. }
  409. f := iptable.Raw
  410. if native {
  411. f = iptable.raw
  412. }
  413. if table == "" {
  414. table = Filter
  415. }
  416. // if exit status is 0 then return true, the rule exists
  417. _, err := f(append([]string{"-t", string(table), "-C", chain}, rule...)...)
  418. return err == nil
  419. }
  420. // Maximum duration that an iptables operation can take
  421. // before flagging a warning.
  422. const opWarnTime = 2 * time.Second
  423. func filterOutput(start time.Time, output []byte, args ...string) []byte {
  424. // Flag operations that have taken a long time to complete
  425. opTime := time.Since(start)
  426. if opTime > opWarnTime {
  427. log.G(context.TODO()).Warnf("xtables contention detected while running [%s]: Waited for %.2f seconds and received %q", strings.Join(args, " "), float64(opTime)/float64(time.Second), string(output))
  428. }
  429. // ignore iptables' message about xtables lock:
  430. // it is a warning, not an error.
  431. if strings.Contains(string(output), xLockWaitMsg) {
  432. output = []byte("")
  433. }
  434. // Put further filters here if desired
  435. return output
  436. }
  437. // Raw calls 'iptables' system command, passing supplied arguments.
  438. func (iptable IPTable) Raw(args ...string) ([]byte, error) {
  439. if firewalldRunning {
  440. // select correct IP version for firewalld
  441. ipv := Iptables
  442. if iptable.Version == IPv6 {
  443. ipv = IP6Tables
  444. }
  445. startTime := time.Now()
  446. output, err := Passthrough(ipv, args...)
  447. if err == nil || !strings.Contains(err.Error(), "was not provided by any .service files") {
  448. return filterOutput(startTime, output, args...), err
  449. }
  450. }
  451. return iptable.raw(args...)
  452. }
  453. func (iptable IPTable) raw(args ...string) ([]byte, error) {
  454. if err := initCheck(); err != nil {
  455. return nil, err
  456. }
  457. if supportsXlock {
  458. args = append([]string{"--wait"}, args...)
  459. } else {
  460. bestEffortLock.Lock()
  461. defer bestEffortLock.Unlock()
  462. }
  463. path := iptablesPath
  464. commandName := "iptables"
  465. if iptable.Version == IPv6 {
  466. if ip6tablesPath == "" {
  467. return nil, fmt.Errorf("ip6tables is missing")
  468. }
  469. path = ip6tablesPath
  470. commandName = "ip6tables"
  471. }
  472. log.G(context.TODO()).Debugf("%s, %v", path, args)
  473. startTime := time.Now()
  474. output, err := exec.Command(path, args...).CombinedOutput()
  475. if err != nil {
  476. return nil, fmt.Errorf("iptables failed: %s %v: %s (%s)", commandName, strings.Join(args, " "), output, err)
  477. }
  478. return filterOutput(startTime, output, args...), err
  479. }
  480. // RawCombinedOutput internally calls the Raw function and returns a non nil
  481. // error if Raw returned a non nil error or a non empty output
  482. func (iptable IPTable) RawCombinedOutput(args ...string) error {
  483. if output, err := iptable.Raw(args...); err != nil || len(output) != 0 {
  484. return fmt.Errorf("%s (%v)", string(output), err)
  485. }
  486. return nil
  487. }
  488. // RawCombinedOutputNative behave as RawCombinedOutput with the difference it
  489. // will always invoke `iptables` binary
  490. func (iptable IPTable) RawCombinedOutputNative(args ...string) error {
  491. if output, err := iptable.raw(args...); err != nil || len(output) != 0 {
  492. return fmt.Errorf("%s (%v)", string(output), err)
  493. }
  494. return nil
  495. }
  496. // ExistChain checks if a chain exists
  497. func (iptable IPTable) ExistChain(chain string, table Table) bool {
  498. _, err := iptable.Raw("-t", string(table), "-nL", chain)
  499. return err == nil
  500. }
  501. // SetDefaultPolicy sets the passed default policy for the table/chain
  502. func (iptable IPTable) SetDefaultPolicy(table Table, chain string, policy Policy) error {
  503. if err := iptable.RawCombinedOutput("-t", string(table), "-P", chain, string(policy)); err != nil {
  504. return fmt.Errorf("setting default policy to %v in %v chain failed: %v", policy, chain, err)
  505. }
  506. return nil
  507. }
  508. // AddReturnRule adds a return rule for the chain in the filter table
  509. func (iptable IPTable) AddReturnRule(chain string) error {
  510. if iptable.Exists(Filter, chain, "-j", "RETURN") {
  511. return nil
  512. }
  513. err := iptable.RawCombinedOutput("-A", chain, "-j", "RETURN")
  514. if err != nil {
  515. return fmt.Errorf("unable to add return rule in %s chain: %v", chain, err)
  516. }
  517. return nil
  518. }
  519. // EnsureJumpRule ensures the jump rule is on top
  520. func (iptable IPTable) EnsureJumpRule(fromChain, toChain string) error {
  521. if iptable.Exists(Filter, fromChain, "-j", toChain) {
  522. err := iptable.RawCombinedOutput("-D", fromChain, "-j", toChain)
  523. if err != nil {
  524. return fmt.Errorf("unable to remove jump to %s rule in %s chain: %v", toChain, fromChain, err)
  525. }
  526. }
  527. err := iptable.RawCombinedOutput("-I", fromChain, "-j", toChain)
  528. if err != nil {
  529. return fmt.Errorf("unable to insert jump to %s rule in %s chain: %v", toChain, fromChain, err)
  530. }
  531. return nil
  532. }