iptables.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. //go:build linux
  2. // +build linux
  3. package iptables
  4. import (
  5. "context"
  6. "errors"
  7. "fmt"
  8. "net"
  9. "os/exec"
  10. "strconv"
  11. "strings"
  12. "sync"
  13. "time"
  14. "github.com/containerd/containerd/log"
  15. "github.com/docker/docker/pkg/rootless"
  16. )
  17. // Action signifies the iptable action.
  18. type Action string
  19. // Policy is the default iptable policies
  20. type Policy string
  21. // Table refers to Nat, Filter or Mangle.
  22. type Table string
  23. // IPVersion refers to IP version, v4 or v6
  24. type IPVersion string
  25. const (
  26. // Append appends the rule at the end of the chain.
  27. Append Action = "-A"
  28. // Delete deletes the rule from the chain.
  29. Delete Action = "-D"
  30. // Insert inserts the rule at the top of the chain.
  31. Insert Action = "-I"
  32. // Nat table is used for nat translation rules.
  33. Nat Table = "nat"
  34. // Filter table is used for filter rules.
  35. Filter Table = "filter"
  36. // Mangle table is used for mangling the packet.
  37. Mangle Table = "mangle"
  38. // Drop is the default iptables DROP policy
  39. Drop Policy = "DROP"
  40. // Accept is the default iptables ACCEPT policy
  41. Accept Policy = "ACCEPT"
  42. // IPv4 is version 4
  43. IPv4 IPVersion = "IPV4"
  44. // IPv6 is version 6
  45. IPv6 IPVersion = "IPV6"
  46. )
  47. var (
  48. iptablesPath string
  49. ip6tablesPath string
  50. supportsXlock = false
  51. xLockWaitMsg = "Another app is currently holding the xtables lock"
  52. // used to lock iptables commands if xtables lock is not supported
  53. bestEffortLock sync.Mutex
  54. // ErrIptablesNotFound is returned when the rule is not found.
  55. ErrIptablesNotFound = errors.New("Iptables not found")
  56. initOnce sync.Once
  57. )
  58. // IPTable defines struct with IPVersion
  59. type IPTable struct {
  60. Version IPVersion
  61. }
  62. // ChainInfo defines the iptables chain.
  63. type ChainInfo struct {
  64. Name string
  65. Table Table
  66. HairpinMode bool
  67. IPTable IPTable
  68. }
  69. // ChainError is returned to represent errors during ip table operation.
  70. type ChainError struct {
  71. Chain string
  72. Output []byte
  73. }
  74. func (e ChainError) Error() string {
  75. return fmt.Sprintf("Error iptables %s: %s", e.Chain, string(e.Output))
  76. }
  77. func detectIptables() {
  78. path, err := exec.LookPath("iptables")
  79. if err != nil {
  80. log.G(context.TODO()).WithError(err).Warnf("failed to find iptables")
  81. return
  82. }
  83. iptablesPath = path
  84. // The --wait flag was added in iptables v1.6.0.
  85. // TODO remove this check once we drop support for CentOS/RHEL 7, which uses an older version of iptables
  86. if out, err := exec.Command(path, "--wait", "-L", "-n").CombinedOutput(); err != nil {
  87. log.G(context.TODO()).WithError(err).Infof("unable to detect if iptables supports xlock: 'iptables --wait -L -n': `%s`", strings.TrimSpace(string(out)))
  88. } else {
  89. supportsXlock = true
  90. }
  91. path, err = exec.LookPath("ip6tables")
  92. if err != nil {
  93. log.G(context.TODO()).WithError(err).Warnf("unable to find ip6tables")
  94. } else {
  95. ip6tablesPath = path
  96. }
  97. }
  98. func initFirewalld() {
  99. // When running with RootlessKit, firewalld is running as the root outside our network namespace
  100. // https://github.com/moby/moby/issues/43781
  101. if rootless.RunningWithRootlessKit() {
  102. log.G(context.TODO()).Info("skipping firewalld management for rootless mode")
  103. return
  104. }
  105. if err := FirewalldInit(); err != nil {
  106. log.G(context.TODO()).WithError(err).Debugf("unable to initialize firewalld; using raw iptables instead")
  107. }
  108. }
  109. func initDependencies() {
  110. initFirewalld()
  111. detectIptables()
  112. }
  113. func initCheck() error {
  114. initOnce.Do(initDependencies)
  115. if iptablesPath == "" {
  116. return ErrIptablesNotFound
  117. }
  118. return nil
  119. }
  120. // GetIptable returns an instance of IPTable with specified version
  121. func GetIptable(version IPVersion) *IPTable {
  122. return &IPTable{Version: version}
  123. }
  124. // NewChain adds a new chain to ip table.
  125. func (iptable IPTable) NewChain(name string, table Table, hairpinMode bool) (*ChainInfo, error) {
  126. if table == "" {
  127. table = Filter
  128. }
  129. // Add chain if it doesn't exist
  130. if _, err := iptable.Raw("-t", string(table), "-n", "-L", name); err != nil {
  131. if output, err := iptable.Raw("-t", string(table), "-N", name); err != nil {
  132. return nil, err
  133. } else if len(output) != 0 {
  134. return nil, fmt.Errorf("could not create %s/%s chain: %s", table, name, output)
  135. }
  136. }
  137. return &ChainInfo{
  138. Name: name,
  139. Table: table,
  140. HairpinMode: hairpinMode,
  141. IPTable: iptable,
  142. }, nil
  143. }
  144. // LoopbackByVersion returns loopback address by version
  145. func (iptable IPTable) LoopbackByVersion() string {
  146. if iptable.Version == IPv6 {
  147. return "::1/128"
  148. }
  149. return "127.0.0.0/8"
  150. }
  151. // ProgramChain is used to add rules to a chain
  152. func (iptable IPTable) ProgramChain(c *ChainInfo, bridgeName string, hairpinMode, enable bool) error {
  153. if c.Name == "" {
  154. return errors.New("Could not program chain, missing chain name")
  155. }
  156. // Either add or remove the interface from the firewalld zone
  157. if firewalldRunning {
  158. if enable {
  159. if err := AddInterfaceFirewalld(bridgeName); err != nil {
  160. return err
  161. }
  162. } else {
  163. if err := DelInterfaceFirewalld(bridgeName); err != nil {
  164. return err
  165. }
  166. }
  167. }
  168. switch c.Table {
  169. case Nat:
  170. preroute := []string{
  171. "-m", "addrtype",
  172. "--dst-type", "LOCAL",
  173. "-j", c.Name,
  174. }
  175. if !iptable.Exists(Nat, "PREROUTING", preroute...) && enable {
  176. if err := c.Prerouting(Append, preroute...); err != nil {
  177. return fmt.Errorf("Failed to inject %s in PREROUTING chain: %s", c.Name, err)
  178. }
  179. } else if iptable.Exists(Nat, "PREROUTING", preroute...) && !enable {
  180. if err := c.Prerouting(Delete, preroute...); err != nil {
  181. return fmt.Errorf("Failed to remove %s in PREROUTING chain: %s", c.Name, err)
  182. }
  183. }
  184. output := []string{
  185. "-m", "addrtype",
  186. "--dst-type", "LOCAL",
  187. "-j", c.Name,
  188. }
  189. if !hairpinMode {
  190. output = append(output, "!", "--dst", iptable.LoopbackByVersion())
  191. }
  192. if !iptable.Exists(Nat, "OUTPUT", output...) && enable {
  193. if err := c.Output(Append, output...); err != nil {
  194. return fmt.Errorf("Failed to inject %s in OUTPUT chain: %s", c.Name, err)
  195. }
  196. } else if iptable.Exists(Nat, "OUTPUT", output...) && !enable {
  197. if err := c.Output(Delete, output...); err != nil {
  198. return fmt.Errorf("Failed to inject %s in OUTPUT chain: %s", c.Name, err)
  199. }
  200. }
  201. case Filter:
  202. if bridgeName == "" {
  203. return fmt.Errorf("Could not program chain %s/%s, missing bridge name",
  204. c.Table, c.Name)
  205. }
  206. link := []string{
  207. "-o", bridgeName,
  208. "-j", c.Name,
  209. }
  210. if !iptable.Exists(Filter, "FORWARD", link...) && enable {
  211. insert := append([]string{string(Insert), "FORWARD"}, link...)
  212. if output, err := iptable.Raw(insert...); err != nil {
  213. return err
  214. } else if len(output) != 0 {
  215. return fmt.Errorf("Could not create linking rule to %s/%s: %s", c.Table, c.Name, output)
  216. }
  217. } else if iptable.Exists(Filter, "FORWARD", link...) && !enable {
  218. del := append([]string{string(Delete), "FORWARD"}, link...)
  219. if output, err := iptable.Raw(del...); err != nil {
  220. return err
  221. } else if len(output) != 0 {
  222. return fmt.Errorf("Could not delete linking rule from %s/%s: %s", c.Table, c.Name, output)
  223. }
  224. }
  225. establish := []string{
  226. "-o", bridgeName,
  227. "-m", "conntrack",
  228. "--ctstate", "RELATED,ESTABLISHED",
  229. "-j", "ACCEPT",
  230. }
  231. if !iptable.Exists(Filter, "FORWARD", establish...) && enable {
  232. insert := append([]string{string(Insert), "FORWARD"}, establish...)
  233. if output, err := iptable.Raw(insert...); err != nil {
  234. return err
  235. } else if len(output) != 0 {
  236. return fmt.Errorf("Could not create establish rule to %s: %s", c.Table, output)
  237. }
  238. } else if iptable.Exists(Filter, "FORWARD", establish...) && !enable {
  239. del := append([]string{string(Delete), "FORWARD"}, establish...)
  240. if output, err := iptable.Raw(del...); err != nil {
  241. return err
  242. } else if len(output) != 0 {
  243. return fmt.Errorf("Could not delete establish rule from %s: %s", c.Table, output)
  244. }
  245. }
  246. }
  247. return nil
  248. }
  249. // RemoveExistingChain removes existing chain from the table.
  250. func (iptable IPTable) RemoveExistingChain(name string, table Table) error {
  251. c := &ChainInfo{
  252. Name: name,
  253. Table: table,
  254. IPTable: iptable,
  255. }
  256. if string(c.Table) == "" {
  257. c.Table = Filter
  258. }
  259. return c.Remove()
  260. }
  261. // Forward adds forwarding rule to 'filter' table and corresponding nat rule to 'nat' table.
  262. func (c *ChainInfo) Forward(action Action, ip net.IP, port int, proto, destAddr string, destPort int, bridgeName string) error {
  263. iptable := GetIptable(c.IPTable.Version)
  264. daddr := ip.String()
  265. if ip.IsUnspecified() {
  266. // iptables interprets "0.0.0.0" as "0.0.0.0/32", whereas we
  267. // want "0.0.0.0/0". "0/0" is correctly interpreted as "any
  268. // value" by both iptables and ip6tables.
  269. daddr = "0/0"
  270. }
  271. args := []string{
  272. "-p", proto,
  273. "-d", daddr,
  274. "--dport", strconv.Itoa(port),
  275. "-j", "DNAT",
  276. "--to-destination", net.JoinHostPort(destAddr, strconv.Itoa(destPort)),
  277. }
  278. if !c.HairpinMode {
  279. args = append(args, "!", "-i", bridgeName)
  280. }
  281. if err := iptable.ProgramRule(Nat, c.Name, action, args); err != nil {
  282. return err
  283. }
  284. args = []string{
  285. "!", "-i", bridgeName,
  286. "-o", bridgeName,
  287. "-p", proto,
  288. "-d", destAddr,
  289. "--dport", strconv.Itoa(destPort),
  290. "-j", "ACCEPT",
  291. }
  292. if err := iptable.ProgramRule(Filter, c.Name, action, args); err != nil {
  293. return err
  294. }
  295. args = []string{
  296. "-p", proto,
  297. "-s", destAddr,
  298. "-d", destAddr,
  299. "--dport", strconv.Itoa(destPort),
  300. "-j", "MASQUERADE",
  301. }
  302. if err := iptable.ProgramRule(Nat, "POSTROUTING", action, args); err != nil {
  303. return err
  304. }
  305. if proto == "sctp" {
  306. // Linux kernel v4.9 and below enables NETIF_F_SCTP_CRC for veth by
  307. // the following commit.
  308. // This introduces a problem when conbined with a physical NIC without
  309. // NETIF_F_SCTP_CRC. As for a workaround, here we add an iptables entry
  310. // to fill the checksum.
  311. //
  312. // https://github.com/torvalds/linux/commit/c80fafbbb59ef9924962f83aac85531039395b18
  313. args = []string{
  314. "-p", proto,
  315. "--sport", strconv.Itoa(destPort),
  316. "-j", "CHECKSUM",
  317. "--checksum-fill",
  318. }
  319. if err := iptable.ProgramRule(Mangle, "POSTROUTING", action, args); err != nil {
  320. return err
  321. }
  322. }
  323. return nil
  324. }
  325. // Link adds reciprocal ACCEPT rule for two supplied IP addresses.
  326. // Traffic is allowed from ip1 to ip2 and vice-versa
  327. func (c *ChainInfo) Link(action Action, ip1, ip2 net.IP, port int, proto string, bridgeName string) error {
  328. iptable := GetIptable(c.IPTable.Version)
  329. // forward
  330. args := []string{
  331. "-i", bridgeName, "-o", bridgeName,
  332. "-p", proto,
  333. "-s", ip1.String(),
  334. "-d", ip2.String(),
  335. "--dport", strconv.Itoa(port),
  336. "-j", "ACCEPT",
  337. }
  338. if err := iptable.ProgramRule(Filter, c.Name, action, args); err != nil {
  339. return err
  340. }
  341. // reverse
  342. args[7], args[9] = args[9], args[7]
  343. args[10] = "--sport"
  344. return iptable.ProgramRule(Filter, c.Name, action, args)
  345. }
  346. // ProgramRule adds the rule specified by args only if the
  347. // rule is not already present in the chain. Reciprocally,
  348. // it removes the rule only if present.
  349. func (iptable IPTable) ProgramRule(table Table, chain string, action Action, args []string) error {
  350. if iptable.Exists(table, chain, args...) != (action == Delete) {
  351. return nil
  352. }
  353. return iptable.RawCombinedOutput(append([]string{"-t", string(table), string(action), chain}, args...)...)
  354. }
  355. // Prerouting adds linking rule to nat/PREROUTING chain.
  356. func (c *ChainInfo) Prerouting(action Action, args ...string) error {
  357. iptable := GetIptable(c.IPTable.Version)
  358. a := []string{"-t", string(Nat), string(action), "PREROUTING"}
  359. if len(args) > 0 {
  360. a = append(a, args...)
  361. }
  362. if output, err := iptable.Raw(a...); err != nil {
  363. return err
  364. } else if len(output) != 0 {
  365. return ChainError{Chain: "PREROUTING", Output: output}
  366. }
  367. return nil
  368. }
  369. // Output adds linking rule to an OUTPUT chain.
  370. func (c *ChainInfo) Output(action Action, args ...string) error {
  371. iptable := GetIptable(c.IPTable.Version)
  372. a := []string{"-t", string(c.Table), string(action), "OUTPUT"}
  373. if len(args) > 0 {
  374. a = append(a, args...)
  375. }
  376. if output, err := iptable.Raw(a...); err != nil {
  377. return err
  378. } else if len(output) != 0 {
  379. return ChainError{Chain: "OUTPUT", Output: output}
  380. }
  381. return nil
  382. }
  383. // Remove removes the chain.
  384. func (c *ChainInfo) Remove() error {
  385. iptable := GetIptable(c.IPTable.Version)
  386. // Ignore errors - This could mean the chains were never set up
  387. if c.Table == Nat {
  388. c.Prerouting(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "-j", c.Name)
  389. c.Output(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "!", "--dst", iptable.LoopbackByVersion(), "-j", c.Name)
  390. c.Output(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "-j", c.Name) // Created in versions <= 0.1.6
  391. c.Prerouting(Delete)
  392. c.Output(Delete)
  393. }
  394. iptable.Raw("-t", string(c.Table), "-F", c.Name)
  395. iptable.Raw("-t", string(c.Table), "-X", c.Name)
  396. return nil
  397. }
  398. // Exists checks if a rule exists
  399. func (iptable IPTable) Exists(table Table, chain string, rule ...string) bool {
  400. return iptable.exists(false, table, chain, rule...)
  401. }
  402. // ExistsNative behaves as Exists with the difference it
  403. // will always invoke `iptables` binary.
  404. func (iptable IPTable) ExistsNative(table Table, chain string, rule ...string) bool {
  405. return iptable.exists(true, table, chain, rule...)
  406. }
  407. func (iptable IPTable) exists(native bool, table Table, chain string, rule ...string) bool {
  408. if err := initCheck(); err != nil {
  409. // The exists() signature does not allow us to return an error, but at least
  410. // we can skip the (likely invalid) exec invocation.
  411. return false
  412. }
  413. f := iptable.Raw
  414. if native {
  415. f = iptable.raw
  416. }
  417. if table == "" {
  418. table = Filter
  419. }
  420. // if exit status is 0 then return true, the rule exists
  421. _, err := f(append([]string{"-t", string(table), "-C", chain}, rule...)...)
  422. return err == nil
  423. }
  424. // Maximum duration that an iptables operation can take
  425. // before flagging a warning.
  426. const opWarnTime = 2 * time.Second
  427. func filterOutput(start time.Time, output []byte, args ...string) []byte {
  428. // Flag operations that have taken a long time to complete
  429. opTime := time.Since(start)
  430. if opTime > opWarnTime {
  431. log.G(context.TODO()).Warnf("xtables contention detected while running [%s]: Waited for %.2f seconds and received %q", strings.Join(args, " "), float64(opTime)/float64(time.Second), string(output))
  432. }
  433. // ignore iptables' message about xtables lock:
  434. // it is a warning, not an error.
  435. if strings.Contains(string(output), xLockWaitMsg) {
  436. output = []byte("")
  437. }
  438. // Put further filters here if desired
  439. return output
  440. }
  441. // Raw calls 'iptables' system command, passing supplied arguments.
  442. func (iptable IPTable) Raw(args ...string) ([]byte, error) {
  443. if firewalldRunning {
  444. // select correct IP version for firewalld
  445. ipv := Iptables
  446. if iptable.Version == IPv6 {
  447. ipv = IP6Tables
  448. }
  449. startTime := time.Now()
  450. output, err := Passthrough(ipv, args...)
  451. if err == nil || !strings.Contains(err.Error(), "was not provided by any .service files") {
  452. return filterOutput(startTime, output, args...), err
  453. }
  454. }
  455. return iptable.raw(args...)
  456. }
  457. func (iptable IPTable) raw(args ...string) ([]byte, error) {
  458. if err := initCheck(); err != nil {
  459. return nil, err
  460. }
  461. if supportsXlock {
  462. args = append([]string{"--wait"}, args...)
  463. } else {
  464. bestEffortLock.Lock()
  465. defer bestEffortLock.Unlock()
  466. }
  467. path := iptablesPath
  468. commandName := "iptables"
  469. if iptable.Version == IPv6 {
  470. if ip6tablesPath == "" {
  471. return nil, fmt.Errorf("ip6tables is missing")
  472. }
  473. path = ip6tablesPath
  474. commandName = "ip6tables"
  475. }
  476. log.G(context.TODO()).Debugf("%s, %v", path, args)
  477. startTime := time.Now()
  478. output, err := exec.Command(path, args...).CombinedOutput()
  479. if err != nil {
  480. return nil, fmt.Errorf("iptables failed: %s %v: %s (%s)", commandName, strings.Join(args, " "), output, err)
  481. }
  482. return filterOutput(startTime, output, args...), err
  483. }
  484. // RawCombinedOutput internally calls the Raw function and returns a non nil
  485. // error if Raw returned a non nil error or a non empty output
  486. func (iptable IPTable) RawCombinedOutput(args ...string) error {
  487. if output, err := iptable.Raw(args...); err != nil || len(output) != 0 {
  488. return fmt.Errorf("%s (%v)", string(output), err)
  489. }
  490. return nil
  491. }
  492. // RawCombinedOutputNative behave as RawCombinedOutput with the difference it
  493. // will always invoke `iptables` binary
  494. func (iptable IPTable) RawCombinedOutputNative(args ...string) error {
  495. if output, err := iptable.raw(args...); err != nil || len(output) != 0 {
  496. return fmt.Errorf("%s (%v)", string(output), err)
  497. }
  498. return nil
  499. }
  500. // ExistChain checks if a chain exists
  501. func (iptable IPTable) ExistChain(chain string, table Table) bool {
  502. if _, err := iptable.Raw("-t", string(table), "-nL", chain); err == nil {
  503. return true
  504. }
  505. return false
  506. }
  507. // SetDefaultPolicy sets the passed default policy for the table/chain
  508. func (iptable IPTable) SetDefaultPolicy(table Table, chain string, policy Policy) error {
  509. if err := iptable.RawCombinedOutput("-t", string(table), "-P", chain, string(policy)); err != nil {
  510. return fmt.Errorf("setting default policy to %v in %v chain failed: %v", policy, chain, err)
  511. }
  512. return nil
  513. }
  514. // AddReturnRule adds a return rule for the chain in the filter table
  515. func (iptable IPTable) AddReturnRule(chain string) error {
  516. if iptable.Exists(Filter, chain, "-j", "RETURN") {
  517. return nil
  518. }
  519. err := iptable.RawCombinedOutput("-A", chain, "-j", "RETURN")
  520. if err != nil {
  521. return fmt.Errorf("unable to add return rule in %s chain: %v", chain, err)
  522. }
  523. return nil
  524. }
  525. // EnsureJumpRule ensures the jump rule is on top
  526. func (iptable IPTable) EnsureJumpRule(fromChain, toChain string) error {
  527. if iptable.Exists(Filter, fromChain, "-j", toChain) {
  528. err := iptable.RawCombinedOutput("-D", fromChain, "-j", toChain)
  529. if err != nil {
  530. return fmt.Errorf("unable to remove jump to %s rule in %s chain: %v", toChain, fromChain, err)
  531. }
  532. }
  533. err := iptable.RawCombinedOutput("-I", fromChain, "-j", toChain)
  534. if err != nil {
  535. return fmt.Errorf("unable to insert jump to %s rule in %s chain: %v", toChain, fromChain, err)
  536. }
  537. return nil
  538. }