123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621 |
- //go:build linux
- package iptables
- import (
- "context"
- "errors"
- "fmt"
- "net"
- "os/exec"
- "strconv"
- "strings"
- "sync"
- "time"
- "github.com/containerd/log"
- "github.com/docker/docker/errdefs"
- "github.com/docker/docker/pkg/rootless"
- )
- // Action signifies the iptable action.
- type Action string
- const (
- // Append appends the rule at the end of the chain.
- Append Action = "-A"
- // Delete deletes the rule from the chain.
- Delete Action = "-D"
- // Insert inserts the rule at the top of the chain.
- Insert Action = "-I"
- )
- // Policy is the default iptable policies
- type Policy string
- const (
- // Drop is the default iptables DROP policy.
- Drop Policy = "DROP"
- // Accept is the default iptables ACCEPT policy.
- Accept Policy = "ACCEPT"
- )
- // Table refers to Nat, Filter or Mangle.
- type Table string
- const (
- // Nat table is used for nat translation rules.
- Nat Table = "nat"
- // Filter table is used for filter rules.
- Filter Table = "filter"
- // Mangle table is used for mangling the packet.
- Mangle Table = "mangle"
- )
- // IPVersion refers to IP version, v4 or v6
- type IPVersion string
- const (
- // IPv4 is version 4.
- IPv4 IPVersion = "IPV4"
- // IPv6 is version 6.
- IPv6 IPVersion = "IPV6"
- )
- var (
- iptablesPath string
- ip6tablesPath string
- supportsXlock = false
- // used to lock iptables commands if xtables lock is not supported
- bestEffortLock sync.Mutex
- initOnce sync.Once
- )
- // IPTable defines struct with [IPVersion].
- type IPTable struct {
- ipVersion IPVersion
- }
- // ChainInfo defines the iptables chain.
- type ChainInfo struct {
- Name string
- Table Table
- HairpinMode bool
- IPVersion IPVersion
- }
- // ChainError is returned to represent errors during ip table operation.
- type ChainError struct {
- Chain string
- Output []byte
- }
- func (e ChainError) Error() string {
- return fmt.Sprintf("error iptables %s: %s", e.Chain, string(e.Output))
- }
- // loopbackAddress returns the loopback address for the given IP version.
- func loopbackAddress(version IPVersion) string {
- switch version {
- case IPv4, "":
- // IPv4 (default for backward-compatibility)
- return "127.0.0.0/8"
- case IPv6:
- return "::1/128"
- default:
- panic("unknown IP version: " + version)
- }
- }
- func detectIptables() {
- path, err := exec.LookPath("iptables")
- if err != nil {
- log.G(context.TODO()).WithError(err).Warnf("failed to find iptables")
- return
- }
- iptablesPath = path
- // The --wait flag was added in iptables v1.6.0.
- // TODO remove this check once we drop support for CentOS/RHEL 7, which uses an older version of iptables
- if out, err := exec.Command(path, "--wait", "-L", "-n").CombinedOutput(); err != nil {
- log.G(context.TODO()).WithError(err).Infof("unable to detect if iptables supports xlock: 'iptables --wait -L -n': `%s`", strings.TrimSpace(string(out)))
- } else {
- supportsXlock = true
- }
- path, err = exec.LookPath("ip6tables")
- if err != nil {
- log.G(context.TODO()).WithError(err).Warnf("unable to find ip6tables")
- } else {
- ip6tablesPath = path
- }
- }
- func initFirewalld() {
- // When running with RootlessKit, firewalld is running as the root outside our network namespace
- // https://github.com/moby/moby/issues/43781
- if rootless.RunningWithRootlessKit() {
- log.G(context.TODO()).Info("skipping firewalld management for rootless mode")
- return
- }
- if err := firewalldInit(); err != nil {
- log.G(context.TODO()).WithError(err).Debugf("unable to initialize firewalld; using raw iptables instead")
- }
- }
- func initDependencies() {
- initFirewalld()
- detectIptables()
- }
- func initCheck() error {
- initOnce.Do(initDependencies)
- if iptablesPath == "" {
- return errors.New("iptables not found")
- }
- return nil
- }
- // GetIptable returns an instance of IPTable with specified version ([IPv4]
- // or [IPv6]). It panics if an invalid [IPVersion] is provided.
- func GetIptable(version IPVersion) *IPTable {
- switch version {
- case IPv4, IPv6:
- // valid version
- case "":
- // default is IPv4 for backward-compatibility
- version = IPv4
- default:
- panic("unknown IP version: " + version)
- }
- return &IPTable{ipVersion: version}
- }
- // NewChain adds a new chain to ip table.
- func (iptable IPTable) NewChain(name string, table Table, hairpinMode bool) (*ChainInfo, error) {
- if name == "" {
- return nil, fmt.Errorf("could not create chain: chain name is empty")
- }
- if table == "" {
- return nil, fmt.Errorf("could not create chain %s: invalid table name: table name is empty", name)
- }
- // Add chain if it doesn't exist
- if _, err := iptable.Raw("-t", string(table), "-n", "-L", name); err != nil {
- if output, err := iptable.Raw("-t", string(table), "-N", name); err != nil {
- return nil, err
- } else if len(output) != 0 {
- return nil, fmt.Errorf("could not create %s/%s chain: %s", table, name, output)
- }
- }
- return &ChainInfo{
- Name: name,
- Table: table,
- HairpinMode: hairpinMode,
- IPVersion: iptable.ipVersion,
- }, nil
- }
- // ProgramChain is used to add rules to a chain
- func (iptable IPTable) ProgramChain(c *ChainInfo, bridgeName string, hairpinMode, enable bool) error {
- if c.Name == "" {
- return errors.New("could not program chain, missing chain name")
- }
- // Either add or remove the interface from the firewalld zone, if firewalld is running.
- if enable {
- if err := AddInterfaceFirewalld(bridgeName); err != nil {
- return err
- }
- } else {
- if err := DelInterfaceFirewalld(bridgeName); err != nil && !errdefs.IsNotFound(err) {
- return err
- }
- }
- switch c.Table {
- case Nat:
- preroute := []string{
- "-m", "addrtype",
- "--dst-type", "LOCAL",
- "-j", c.Name,
- }
- if !iptable.Exists(Nat, "PREROUTING", preroute...) && enable {
- if err := c.Prerouting(Append, preroute...); err != nil {
- return fmt.Errorf("failed to inject %s in PREROUTING chain: %s", c.Name, err)
- }
- } else if iptable.Exists(Nat, "PREROUTING", preroute...) && !enable {
- if err := c.Prerouting(Delete, preroute...); err != nil {
- return fmt.Errorf("failed to remove %s in PREROUTING chain: %s", c.Name, err)
- }
- }
- output := []string{
- "-m", "addrtype",
- "--dst-type", "LOCAL",
- "-j", c.Name,
- }
- if !hairpinMode {
- output = append(output, "!", "--dst", loopbackAddress(iptable.ipVersion))
- }
- if !iptable.Exists(Nat, "OUTPUT", output...) && enable {
- if err := c.Output(Append, output...); err != nil {
- return fmt.Errorf("failed to inject %s in OUTPUT chain: %s", c.Name, err)
- }
- } else if iptable.Exists(Nat, "OUTPUT", output...) && !enable {
- if err := c.Output(Delete, output...); err != nil {
- return fmt.Errorf("failed to inject %s in OUTPUT chain: %s", c.Name, err)
- }
- }
- case Filter:
- if bridgeName == "" {
- return fmt.Errorf("could not program chain %s/%s, missing bridge name", c.Table, c.Name)
- }
- link := []string{
- "-o", bridgeName,
- "-j", c.Name,
- }
- if !iptable.Exists(Filter, "FORWARD", link...) && enable {
- insert := append([]string{string(Insert), "FORWARD"}, link...)
- if output, err := iptable.Raw(insert...); err != nil {
- return err
- } else if len(output) != 0 {
- return fmt.Errorf("could not create linking rule to %s/%s: %s", c.Table, c.Name, output)
- }
- } else if iptable.Exists(Filter, "FORWARD", link...) && !enable {
- del := append([]string{string(Delete), "FORWARD"}, link...)
- if output, err := iptable.Raw(del...); err != nil {
- return err
- } else if len(output) != 0 {
- return fmt.Errorf("could not delete linking rule from %s/%s: %s", c.Table, c.Name, output)
- }
- }
- establish := []string{
- "-o", bridgeName,
- "-m", "conntrack",
- "--ctstate", "RELATED,ESTABLISHED",
- "-j", "ACCEPT",
- }
- if !iptable.Exists(Filter, "FORWARD", establish...) && enable {
- insert := append([]string{string(Insert), "FORWARD"}, establish...)
- if output, err := iptable.Raw(insert...); err != nil {
- return err
- } else if len(output) != 0 {
- return fmt.Errorf("could not create establish rule to %s: %s", c.Table, output)
- }
- } else if iptable.Exists(Filter, "FORWARD", establish...) && !enable {
- del := append([]string{string(Delete), "FORWARD"}, establish...)
- if output, err := iptable.Raw(del...); err != nil {
- return err
- } else if len(output) != 0 {
- return fmt.Errorf("could not delete establish rule from %s: %s", c.Table, output)
- }
- }
- }
- return nil
- }
- // RemoveExistingChain removes existing chain from the table.
- func (iptable IPTable) RemoveExistingChain(name string, table Table) error {
- if name == "" {
- return fmt.Errorf("could not remove chain: chain name is empty")
- }
- if table == "" {
- return fmt.Errorf("could not remove chain %s: invalid table name: table name is empty", name)
- }
- c := &ChainInfo{
- Name: name,
- Table: table,
- IPVersion: iptable.ipVersion,
- }
- return c.Remove()
- }
- // Forward adds forwarding rule to 'filter' table and corresponding nat rule to 'nat' table.
- func (c *ChainInfo) Forward(action Action, ip net.IP, port int, proto, destAddr string, destPort int, bridgeName string) error {
- iptable := GetIptable(c.IPVersion)
- daddr := ip.String()
- if ip.IsUnspecified() {
- // iptables interprets "0.0.0.0" as "0.0.0.0/32", whereas we
- // want "0.0.0.0/0". "0/0" is correctly interpreted as "any
- // value" by both iptables and ip6tables.
- daddr = "0/0"
- }
- args := []string{
- "-p", proto,
- "-d", daddr,
- "--dport", strconv.Itoa(port),
- "-j", "DNAT",
- "--to-destination", net.JoinHostPort(destAddr, strconv.Itoa(destPort)),
- }
- if !c.HairpinMode {
- args = append(args, "!", "-i", bridgeName)
- }
- if err := iptable.ProgramRule(Nat, c.Name, action, args); err != nil {
- return err
- }
- args = []string{
- "!", "-i", bridgeName,
- "-o", bridgeName,
- "-p", proto,
- "-d", destAddr,
- "--dport", strconv.Itoa(destPort),
- "-j", "ACCEPT",
- }
- if err := iptable.ProgramRule(Filter, c.Name, action, args); err != nil {
- return err
- }
- args = []string{
- "-p", proto,
- "-s", destAddr,
- "-d", destAddr,
- "--dport", strconv.Itoa(destPort),
- "-j", "MASQUERADE",
- }
- if err := iptable.ProgramRule(Nat, "POSTROUTING", action, args); err != nil {
- return err
- }
- if proto == "sctp" {
- // Linux kernel v4.9 and below enables NETIF_F_SCTP_CRC for veth by
- // the following commit.
- // This introduces a problem when conbined with a physical NIC without
- // NETIF_F_SCTP_CRC. As for a workaround, here we add an iptables entry
- // to fill the checksum.
- //
- // https://github.com/torvalds/linux/commit/c80fafbbb59ef9924962f83aac85531039395b18
- args = []string{
- "-p", proto,
- "--sport", strconv.Itoa(destPort),
- "-j", "CHECKSUM",
- "--checksum-fill",
- }
- if err := iptable.ProgramRule(Mangle, "POSTROUTING", action, args); err != nil {
- return err
- }
- }
- return nil
- }
- // Link adds reciprocal ACCEPT rule for two supplied IP addresses.
- // Traffic is allowed from ip1 to ip2 and vice-versa
- func (c *ChainInfo) Link(action Action, ip1, ip2 net.IP, port int, proto string, bridgeName string) error {
- iptable := GetIptable(c.IPVersion)
- // forward
- args := []string{
- "-i", bridgeName, "-o", bridgeName,
- "-p", proto,
- "-s", ip1.String(),
- "-d", ip2.String(),
- "--dport", strconv.Itoa(port),
- "-j", "ACCEPT",
- }
- if err := iptable.ProgramRule(Filter, c.Name, action, args); err != nil {
- return err
- }
- // reverse
- args[7], args[9] = args[9], args[7]
- args[10] = "--sport"
- return iptable.ProgramRule(Filter, c.Name, action, args)
- }
- // ProgramRule adds the rule specified by args only if the
- // rule is not already present in the chain. Reciprocally,
- // it removes the rule only if present.
- func (iptable IPTable) ProgramRule(table Table, chain string, action Action, args []string) error {
- if iptable.Exists(table, chain, args...) != (action == Delete) {
- return nil
- }
- return iptable.RawCombinedOutput(append([]string{"-t", string(table), string(action), chain}, args...)...)
- }
- // Prerouting adds linking rule to nat/PREROUTING chain.
- func (c *ChainInfo) Prerouting(action Action, args ...string) error {
- iptable := GetIptable(c.IPVersion)
- a := []string{"-t", string(Nat), string(action), "PREROUTING"}
- if len(args) > 0 {
- a = append(a, args...)
- }
- if output, err := iptable.Raw(a...); err != nil {
- return err
- } else if len(output) != 0 {
- return ChainError{Chain: "PREROUTING", Output: output}
- }
- return nil
- }
- // Output adds linking rule to an OUTPUT chain.
- func (c *ChainInfo) Output(action Action, args ...string) error {
- a := []string{"-t", string(c.Table), string(action), "OUTPUT"}
- if len(args) > 0 {
- a = append(a, args...)
- }
- if output, err := GetIptable(c.IPVersion).Raw(a...); err != nil {
- return err
- } else if len(output) != 0 {
- return ChainError{Chain: "OUTPUT", Output: output}
- }
- return nil
- }
- // Remove removes the chain.
- func (c *ChainInfo) Remove() error {
- // Ignore errors - This could mean the chains were never set up
- if c.Table == Nat {
- _ = c.Prerouting(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "-j", c.Name)
- _ = c.Output(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "!", "--dst", loopbackAddress(c.IPVersion), "-j", c.Name)
- _ = c.Output(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "-j", c.Name) // Created in versions <= 0.1.6
- _ = c.Prerouting(Delete)
- _ = c.Output(Delete)
- }
- iptable := GetIptable(c.IPVersion)
- _, _ = iptable.Raw("-t", string(c.Table), "-F", c.Name)
- _, _ = iptable.Raw("-t", string(c.Table), "-X", c.Name)
- return nil
- }
- // Exists checks if a rule exists
- func (iptable IPTable) Exists(table Table, chain string, rule ...string) bool {
- return iptable.exists(false, table, chain, rule...)
- }
- // ExistsNative behaves as Exists with the difference it
- // will always invoke `iptables` binary.
- func (iptable IPTable) ExistsNative(table Table, chain string, rule ...string) bool {
- return iptable.exists(true, table, chain, rule...)
- }
- func (iptable IPTable) exists(native bool, table Table, chain string, rule ...string) bool {
- if err := initCheck(); err != nil {
- // The exists() signature does not allow us to return an error, but at least
- // we can skip the (likely invalid) exec invocation.
- return false
- }
- f := iptable.Raw
- if native {
- f = iptable.raw
- }
- if table == "" {
- table = Filter
- }
- // if exit status is 0 then return true, the rule exists
- _, err := f(append([]string{"-t", string(table), "-C", chain}, rule...)...)
- return err == nil
- }
- const (
- // opWarnTime is the maximum duration that an iptables operation can take before flagging a warning.
- opWarnTime = 2 * time.Second
- // xLockWaitMsg is the iptables warning about xtables lock that can be suppressed.
- xLockWaitMsg = "Another app is currently holding the xtables lock"
- )
- func filterOutput(start time.Time, output []byte, args ...string) []byte {
- if opTime := time.Since(start); opTime > opWarnTime {
- // Flag operations that have taken a long time to complete
- log.G(context.TODO()).Warnf("xtables contention detected while running [%s]: Waited for %.2f seconds and received %q", strings.Join(args, " "), float64(opTime)/float64(time.Second), string(output))
- }
- // ignore iptables' message about xtables lock:
- // it is a warning, not an error.
- if strings.Contains(string(output), xLockWaitMsg) {
- output = []byte("")
- }
- // Put further filters here if desired
- return output
- }
- // Raw calls 'iptables' system command, passing supplied arguments.
- func (iptable IPTable) Raw(args ...string) ([]byte, error) {
- if firewalldRunning {
- // select correct IP version for firewalld
- ipv := Iptables
- if iptable.ipVersion == IPv6 {
- ipv = IP6Tables
- }
- startTime := time.Now()
- output, err := Passthrough(ipv, args...)
- if err == nil || !strings.Contains(err.Error(), "was not provided by any .service files") {
- return filterOutput(startTime, output, args...), err
- }
- }
- return iptable.raw(args...)
- }
- func (iptable IPTable) raw(args ...string) ([]byte, error) {
- if err := initCheck(); err != nil {
- return nil, err
- }
- path := iptablesPath
- commandName := "iptables"
- if iptable.ipVersion == IPv6 {
- if ip6tablesPath == "" {
- return nil, fmt.Errorf("ip6tables is missing")
- }
- path = ip6tablesPath
- commandName = "ip6tables"
- }
- if supportsXlock {
- args = append([]string{"--wait"}, args...)
- } else {
- bestEffortLock.Lock()
- defer bestEffortLock.Unlock()
- }
- log.G(context.TODO()).Debugf("%s, %v", path, args)
- startTime := time.Now()
- output, err := exec.Command(path, args...).CombinedOutput()
- if err != nil {
- return nil, fmt.Errorf("iptables failed: %s %v: %s (%s)", commandName, strings.Join(args, " "), output, err)
- }
- return filterOutput(startTime, output, args...), err
- }
- // RawCombinedOutput internally calls the Raw function and returns a non nil
- // error if Raw returned a non nil error or a non empty output
- func (iptable IPTable) RawCombinedOutput(args ...string) error {
- if output, err := iptable.Raw(args...); err != nil || len(output) != 0 {
- return fmt.Errorf("%s (%v)", string(output), err)
- }
- return nil
- }
- // RawCombinedOutputNative behave as RawCombinedOutput with the difference it
- // will always invoke `iptables` binary
- func (iptable IPTable) RawCombinedOutputNative(args ...string) error {
- if output, err := iptable.raw(args...); err != nil || len(output) != 0 {
- return fmt.Errorf("%s (%v)", string(output), err)
- }
- return nil
- }
- // ExistChain checks if a chain exists
- func (iptable IPTable) ExistChain(chain string, table Table) bool {
- _, err := iptable.Raw("-t", string(table), "-nL", chain)
- return err == nil
- }
- // SetDefaultPolicy sets the passed default policy for the table/chain
- func (iptable IPTable) SetDefaultPolicy(table Table, chain string, policy Policy) error {
- if err := iptable.RawCombinedOutput("-t", string(table), "-P", chain, string(policy)); err != nil {
- return fmt.Errorf("setting default policy to %v in %v chain failed: %v", policy, chain, err)
- }
- return nil
- }
- // AddReturnRule adds a return rule for the chain in the filter table
- func (iptable IPTable) AddReturnRule(chain string) error {
- if iptable.Exists(Filter, chain, "-j", "RETURN") {
- return nil
- }
- if err := iptable.RawCombinedOutput("-A", chain, "-j", "RETURN"); err != nil {
- return fmt.Errorf("unable to add return rule in %s chain: %v", chain, err)
- }
- return nil
- }
- // EnsureJumpRule ensures the jump rule is on top
- func (iptable IPTable) EnsureJumpRule(fromChain, toChain string) error {
- if iptable.Exists(Filter, fromChain, "-j", toChain) {
- if err := iptable.RawCombinedOutput("-D", fromChain, "-j", toChain); err != nil {
- return fmt.Errorf("unable to remove jump to %s rule in %s chain: %v", toChain, fromChain, err)
- }
- }
- if err := iptable.RawCombinedOutput("-I", fromChain, "-j", toChain); err != nil {
- return fmt.Errorf("unable to insert jump to %s rule in %s chain: %v", toChain, fromChain, err)
- }
- return nil
- }
|