Merge pull request #44491 from corhere/libnetwork-minus-reexec

libnetwork: eliminate almost all reexecs
This commit is contained in:
Brian Goff 2023-01-13 10:44:25 -08:00 committed by GitHub
commit 483b03562a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 233 additions and 489 deletions

View file

@ -8,7 +8,6 @@ import (
"fmt"
"net"
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
@ -23,7 +22,6 @@ import (
"github.com/docker/docker/libnetwork/osl"
"github.com/docker/docker/libnetwork/resolvconf"
"github.com/docker/docker/libnetwork/types"
"github.com/docker/docker/pkg/reexec"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
"github.com/vishvananda/netlink/nl"
@ -75,67 +73,15 @@ type network struct {
}
func init() {
reexec.Register("set-default-vlan", setDefaultVlan)
// Lock main() to the initial thread to exclude the goroutines executing
// func (*network).watchMiss() from being scheduled onto that thread.
// Changes to the network namespace of the initial thread alter
// /proc/self/ns/net, which would break any code which (incorrectly)
// assumes that that file is a handle to the network namespace for the
// thread it is currently executing on.
// func (*network).watchMiss() or func setDefaultVLAN() from being
// scheduled onto that thread. Changes to the network namespace of the
// initial thread alter /proc/self/ns/net, which would break any code
// which (incorrectly) assumes that that file is a handle to the network
// namespace for the thread it is currently executing on.
runtime.LockOSThread()
}
func setDefaultVlan() {
if len(os.Args) < 3 {
logrus.Error("insufficient number of arguments")
os.Exit(1)
}
runtime.LockOSThread()
defer runtime.UnlockOSThread()
nsPath := os.Args[1]
ns, err := netns.GetFromPath(nsPath)
if err != nil {
logrus.Errorf("overlay namespace get failed, %v", err)
os.Exit(1)
}
if err = netns.Set(ns); err != nil {
logrus.Errorf("setting into overlay namespace failed, %v", err)
os.Exit(1)
}
// make sure the sysfs mount doesn't propagate back
if err = unix.Unshare(unix.CLONE_NEWNS); err != nil {
logrus.Errorf("unshare failed, %v", err)
os.Exit(1)
}
flag := unix.MS_PRIVATE | unix.MS_REC
if err = unix.Mount("", "/", "", uintptr(flag), ""); err != nil {
logrus.Errorf("root mount failed, %v", err)
os.Exit(1)
}
if err = unix.Mount("sysfs", "/sys", "sysfs", 0, ""); err != nil {
logrus.Errorf("mounting sysfs failed, %v", err)
os.Exit(1)
}
brName := os.Args[2]
// IFLA_BR_VLAN_DEFAULT_PVID was added in Linux v4.4 (see torvalds/linux@0f963b7), so we can't use netlink for
// setting this until Docker drops support for CentOS/RHEL 7 (kernel 3.10, eol date: 2024-06-30).
path := filepath.Join("/sys/class/net", brName, "bridge/default_pvid")
data := []byte{'0', '\n'}
if err = os.WriteFile(path, data, 0644); err != nil {
logrus.Errorf("enabling default vlan on bridge %s failed %v", brName, err)
os.Exit(1)
}
os.Exit(0)
}
func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) {
return nil, types.NotImplementedErrorf("not implemented")
}
@ -641,32 +587,66 @@ func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error
return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err)
}
if !hostMode {
var name string
for _, i := range sbox.Info().Interfaces() {
if i.Bridge() {
name = i.DstName()
}
}
cmd := &exec.Cmd{
Path: reexec.Self(),
Args: []string{"set-default-vlan", sbox.Key(), name},
Stdout: os.Stdout,
Stderr: os.Stderr,
}
if err := cmd.Run(); err != nil {
// not a fatal error
logrus.Errorf("reexec to set bridge default vlan failed %v", err)
}
}
if hostMode {
if err := addFilters(n.id[:12], brName); err != nil {
return err
return addFilters(n.id[:12], brName)
}
if err := setDefaultVLAN(sbox); err != nil {
// not a fatal error
logrus.WithError(err).Error("set bridge default vlan failed")
}
return nil
}
func setDefaultVLAN(sbox osl.Sandbox) error {
var brName string
for _, i := range sbox.Info().Interfaces() {
if i.Bridge() {
brName = i.DstName()
}
}
return nil
// IFLA_BR_VLAN_DEFAULT_PVID was added in Linux v4.4 (see torvalds/linux@0f963b7), so we can't use netlink for
// setting this until Docker drops support for CentOS/RHEL 7 (kernel 3.10, eol date: 2024-06-30).
var innerErr error
err := sbox.InvokeFunc(func() {
// Contrary to what the sysfs(5) man page says, the entries of /sys/class/net
// represent the networking devices visible in the network namespace of the
// process which mounted the sysfs filesystem, irrespective of the network
// namespace of the process accessing the directory. Remount sysfs in order to
// see the network devices in sbox's network namespace, making sure the mount
// doesn't propagate back.
//
// The Linux implementation of (osl.Sandbox).InvokeFunc() runs the function in a
// dedicated goroutine. The effects of unshare(CLONE_NEWNS) on a thread cannot
// be reverted so the thread needs to be terminated once the goroutine is
// finished.
runtime.LockOSThread()
if err := unix.Unshare(unix.CLONE_NEWNS); err != nil {
innerErr = os.NewSyscallError("unshare", err)
return
}
if err := unix.Mount("", "/", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
innerErr = &os.PathError{Op: "mount", Path: "/", Err: err}
return
}
if err := unix.Mount("sysfs", "/sys", "sysfs", 0, ""); err != nil {
innerErr = &os.PathError{Op: "mount", Path: "/sys", Err: err}
return
}
path := filepath.Join("/sys/class/net", brName, "bridge/default_pvid")
data := []byte{'0', '\n'}
if err := os.WriteFile(path, data, 0o644); err != nil {
innerErr = fmt.Errorf("failed to enable default vlan on bridge %s: %w", brName, err)
return
}
})
if err != nil {
return err
}
return innerErr
}
// Must be called with the network lock

View file

@ -50,7 +50,7 @@ func (n *network) startResolver() {
for _, subnet := range hnsresponse.Subnets {
if subnet.GatewayAddress != "" {
for i := 0; i < 3; i++ {
resolver := NewResolver(subnet.GatewayAddress, false, "", n)
resolver := NewResolver(subnet.GatewayAddress, false, n)
logrus.Debugf("Binding a resolver on network %s gateway %s", n.Name(), subnet.GatewayAddress)
executeInCompartment(hnsresponse.DNSServerCompartment, resolver.SetupFunc(53))

View file

@ -5,7 +5,6 @@ import (
"fmt"
"net"
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
@ -14,10 +13,10 @@ import (
"syscall"
"time"
"github.com/docker/docker/internal/unshare"
"github.com/docker/docker/libnetwork/ns"
"github.com/docker/docker/libnetwork/osl/kernel"
"github.com/docker/docker/libnetwork/types"
"github.com/docker/docker/pkg/reexec"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
"github.com/vishvananda/netns"
@ -27,13 +26,11 @@ import (
const defaultPrefix = "/var/run/docker"
func init() {
reexec.Register("set-ipv6", reexecSetIPv6)
// Lock main() to the initial thread to exclude the goroutines spawned
// by func (*networkNamespace) InvokeFunc() from being scheduled onto
// that thread. Changes to the network namespace of the initial thread
// alter /proc/self/ns/net, which would break any code which
// (incorrectly) assumes that that file is a handle to the network
// by func (*networkNamespace) InvokeFunc() or func setIPv6() below from
// being scheduled onto that thread. Changes to the network namespace of
// the initial thread alter /proc/self/ns/net, which would break any
// code which (incorrectly) assumes that that file is the network
// namespace for the thread it is currently executing on.
runtime.LockOSThread()
}
@ -70,10 +67,6 @@ func SetBasePath(path string) {
prefix = path
}
func init() {
reexec.Register("netns-create", reexecCreateNamespace)
}
func basePath() string {
return filepath.Join(prefix, "netns")
}
@ -301,35 +294,18 @@ func GetSandboxForExternalKey(basePath string, key string) (Sandbox, error) {
return n, nil
}
func reexecCreateNamespace() {
if len(os.Args) < 2 {
logrus.Fatal("no namespace path provided")
}
if err := mountNetworkNamespace("/proc/self/ns/net", os.Args[1]); err != nil {
logrus.Fatal(err)
}
}
func createNetworkNamespace(path string, osCreate bool) error {
if err := createNamespaceFile(path); err != nil {
return err
}
cmd := &exec.Cmd{
Path: reexec.Self(),
Args: append([]string{"netns-create"}, path),
Stdout: os.Stdout,
Stderr: os.Stderr,
do := func() error {
return mountNetworkNamespace(fmt.Sprintf("/proc/self/task/%d/ns/net", unix.Gettid()), path)
}
if osCreate {
cmd.SysProcAttr = &syscall.SysProcAttr{}
cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWNET
return unshare.Go(unix.CLONE_NEWNET, do, nil)
}
if err := cmd.Run(); err != nil {
return fmt.Errorf("namespace creation reexec command failed: %v", err)
}
return nil
return do()
}
func unmountNamespaceFile(path string) {
@ -623,66 +599,65 @@ func (n *networkNamespace) checkLoV6() {
n.loV6Enabled = enable
}
func reexecSetIPv6() {
runtime.LockOSThread()
defer runtime.UnlockOSThread()
if len(os.Args) < 3 {
logrus.Errorf("invalid number of arguments for %s", os.Args[0])
os.Exit(1)
}
ns, err := netns.GetFromPath(os.Args[1])
func setIPv6(nspath, iface string, enable bool) error {
origns, err := netns.Get()
if err != nil {
logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err)
os.Exit(2)
return fmt.Errorf("failed to get current network namespace: %w", err)
}
defer origns.Close()
ns, err := netns.GetFromPath(nspath)
if err != nil {
return fmt.Errorf("failed get network namespace %q: %w", nspath, err)
}
defer ns.Close()
if err = netns.Set(ns); err != nil {
logrus.Errorf("setting into container netns %q failed: %v", os.Args[1], err)
os.Exit(3)
}
errCh := make(chan error, 1)
go func() {
defer close(errCh)
var (
action = "disable"
value = byte('1')
path = fmt.Sprintf("/proc/sys/net/ipv6/conf/%s/disable_ipv6", os.Args[2])
)
if os.Args[3] == "true" {
action = "enable"
value = byte('0')
}
if _, err := os.Stat(path); err != nil {
if os.IsNotExist(err) {
logrus.Warnf("file does not exist: %s : %v Has IPv6 been disabled in this node's kernel?", path, err)
os.Exit(0)
runtime.LockOSThread()
if err = netns.Set(ns); err != nil {
errCh <- fmt.Errorf("setting into container netns %q failed: %w", nspath, err)
return
}
logrus.Errorf("failed to stat %s : %v", path, err)
os.Exit(5)
}
defer func() {
if err := netns.Set(origns); err != nil {
logrus.WithError(err).Error("libnetwork: restoring thread network namespace failed")
// The error is only fatal for the current thread. Keep this
// goroutine locked to the thread to make the runtime replace it
// with a clean thread once this goroutine returns.
} else {
runtime.UnlockOSThread()
}
}()
if err = os.WriteFile(path, []byte{value, '\n'}, 0644); err != nil {
logrus.Errorf("failed to %s IPv6 forwarding for container's interface %s: %v", action, os.Args[2], err)
os.Exit(4)
}
var (
action = "disable"
value = byte('1')
path = fmt.Sprintf("/proc/sys/net/ipv6/conf/%s/disable_ipv6", iface)
)
os.Exit(0)
}
if enable {
action = "enable"
value = '0'
}
func setIPv6(path, iface string, enable bool) error {
cmd := &exec.Cmd{
Path: reexec.Self(),
Args: append([]string{"set-ipv6"}, path, iface, strconv.FormatBool(enable)),
Stdout: os.Stdout,
Stderr: os.Stderr,
}
if err := cmd.Run(); err != nil {
return fmt.Errorf("reexec to set IPv6 failed: %v", err)
}
return nil
if _, err := os.Stat(path); err != nil {
if os.IsNotExist(err) {
logrus.WithError(err).Warn("Cannot configure IPv6 forwarding on container interface. Has IPv6 been disabled in this node's kernel?")
return
}
errCh <- err
return
}
if err = os.WriteFile(path, []byte{value, '\n'}, 0o644); err != nil {
errCh <- fmt.Errorf("failed to %s IPv6 forwarding for container's interface %s: %w", action, iface, err)
return
}
}()
return <-errCh
}
// ApplyOSTweaks applies linux configs on the sandbox

View file

@ -90,7 +90,6 @@ type resolver struct {
queryLock sync.Mutex
listenAddress string
proxyDNS bool
resolverKey string
startCh chan struct{}
}
@ -99,12 +98,11 @@ func init() {
}
// NewResolver creates a new instance of the Resolver
func NewResolver(address string, proxyDNS bool, resolverKey string, backend DNSBackend) Resolver {
func NewResolver(address string, proxyDNS bool, backend DNSBackend) Resolver {
return &resolver{
backend: backend,
proxyDNS: proxyDNS,
listenAddress: address,
resolverKey: resolverKey,
err: fmt.Errorf("setup not done yet"),
startCh: make(chan struct{}, 1),
}

View file

@ -122,7 +122,7 @@ func TestDNSIPQuery(t *testing.T) {
w := new(tstwriter)
// the unit tests right now will focus on non-proxyed DNS requests
r := NewResolver(resolverIPSandbox, false, sb.Key(), sb.(*sandbox))
r := NewResolver(resolverIPSandbox, false, sb.(*sandbox))
// test name1's IP is resolved correctly with the default A type query
// Also make sure DNS lookups are case insensitive
@ -266,7 +266,7 @@ func TestDNSProxyServFail(t *testing.T) {
t.Log("DNS Server can be reached")
w := new(tstwriter)
r := NewResolver(resolverIPSandbox, true, sb.Key(), sb.(*sandbox))
r := NewResolver(resolverIPSandbox, true, sb.(*sandbox))
q := new(dns.Msg)
q.SetQuestion("name1.", dns.TypeA)

View file

@ -4,22 +4,12 @@
package libnetwork
import (
"fmt"
"net"
"os"
"os/exec"
"runtime"
"github.com/docker/docker/libnetwork/iptables"
"github.com/docker/docker/pkg/reexec"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netns"
)
func init() {
reexec.Register("setup-resolver", reexecSetupResolver)
}
const (
// outputChain used for docker embed dns
outputChain = "DOCKER_OUTPUT"
@ -27,79 +17,46 @@ const (
postroutingchain = "DOCKER_POSTROUTING"
)
func reexecSetupResolver() {
runtime.LockOSThread()
defer runtime.UnlockOSThread()
if len(os.Args) < 4 {
logrus.Error("invalid number of arguments..")
os.Exit(1)
}
resolverIP, ipPort, _ := net.SplitHostPort(os.Args[2])
_, tcpPort, _ := net.SplitHostPort(os.Args[3])
rules := [][]string{
{"-t", "nat", "-I", outputChain, "-d", resolverIP, "-p", "udp", "--dport", dnsPort, "-j", "DNAT", "--to-destination", os.Args[2]},
{"-t", "nat", "-I", postroutingchain, "-s", resolverIP, "-p", "udp", "--sport", ipPort, "-j", "SNAT", "--to-source", ":" + dnsPort},
{"-t", "nat", "-I", outputChain, "-d", resolverIP, "-p", "tcp", "--dport", dnsPort, "-j", "DNAT", "--to-destination", os.Args[3]},
{"-t", "nat", "-I", postroutingchain, "-s", resolverIP, "-p", "tcp", "--sport", tcpPort, "-j", "SNAT", "--to-source", ":" + dnsPort},
}
f, err := os.OpenFile(os.Args[1], os.O_RDONLY, 0)
if err != nil {
logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err)
os.Exit(2)
}
defer f.Close() //nolint:gosec
nsFD := f.Fd()
if err = netns.Set(netns.NsHandle(nsFD)); err != nil {
logrus.Errorf("setting into container net ns %v failed, %v", os.Args[1], err)
os.Exit(3)
}
// TODO IPv6 support
iptable := iptables.GetIptable(iptables.IPv4)
// insert outputChain and postroutingchain
err = iptable.RawCombinedOutputNative("-t", "nat", "-C", "OUTPUT", "-d", resolverIP, "-j", outputChain)
if err == nil {
iptable.RawCombinedOutputNative("-t", "nat", "-F", outputChain)
} else {
iptable.RawCombinedOutputNative("-t", "nat", "-N", outputChain)
iptable.RawCombinedOutputNative("-t", "nat", "-I", "OUTPUT", "-d", resolverIP, "-j", outputChain)
}
err = iptable.RawCombinedOutputNative("-t", "nat", "-C", "POSTROUTING", "-d", resolverIP, "-j", postroutingchain)
if err == nil {
iptable.RawCombinedOutputNative("-t", "nat", "-F", postroutingchain)
} else {
iptable.RawCombinedOutputNative("-t", "nat", "-N", postroutingchain)
iptable.RawCombinedOutputNative("-t", "nat", "-I", "POSTROUTING", "-d", resolverIP, "-j", postroutingchain)
}
for _, rule := range rules {
if iptable.RawCombinedOutputNative(rule...) != nil {
logrus.Errorf("set up rule failed, %v", rule)
}
}
}
func (r *resolver) setupIPTable() error {
if r.err != nil {
return r.err
}
laddr := r.conn.LocalAddr().String()
ltcpaddr := r.tcpListen.Addr().String()
resolverIP, ipPort, _ := net.SplitHostPort(laddr)
_, tcpPort, _ := net.SplitHostPort(ltcpaddr)
rules := [][]string{
{"-t", "nat", "-I", outputChain, "-d", resolverIP, "-p", "udp", "--dport", dnsPort, "-j", "DNAT", "--to-destination", laddr},
{"-t", "nat", "-I", postroutingchain, "-s", resolverIP, "-p", "udp", "--sport", ipPort, "-j", "SNAT", "--to-source", ":" + dnsPort},
{"-t", "nat", "-I", outputChain, "-d", resolverIP, "-p", "tcp", "--dport", dnsPort, "-j", "DNAT", "--to-destination", ltcpaddr},
{"-t", "nat", "-I", postroutingchain, "-s", resolverIP, "-p", "tcp", "--sport", tcpPort, "-j", "SNAT", "--to-source", ":" + dnsPort},
}
cmd := &exec.Cmd{
Path: reexec.Self(),
Args: append([]string{"setup-resolver"}, r.resolverKey, laddr, ltcpaddr),
Stdout: os.Stdout,
Stderr: os.Stderr,
}
if err := cmd.Run(); err != nil {
return fmt.Errorf("reexec failed: %v", err)
}
return nil
return r.backend.ExecFunc(func() {
// TODO IPv6 support
iptable := iptables.GetIptable(iptables.IPv4)
// insert outputChain and postroutingchain
err := iptable.RawCombinedOutputNative("-t", "nat", "-C", "OUTPUT", "-d", resolverIP, "-j", outputChain)
if err == nil {
iptable.RawCombinedOutputNative("-t", "nat", "-F", outputChain)
} else {
iptable.RawCombinedOutputNative("-t", "nat", "-N", outputChain)
iptable.RawCombinedOutputNative("-t", "nat", "-I", "OUTPUT", "-d", resolverIP, "-j", outputChain)
}
err = iptable.RawCombinedOutputNative("-t", "nat", "-C", "POSTROUTING", "-d", resolverIP, "-j", postroutingchain)
if err == nil {
iptable.RawCombinedOutputNative("-t", "nat", "-F", postroutingchain)
} else {
iptable.RawCombinedOutputNative("-t", "nat", "-N", postroutingchain)
iptable.RawCombinedOutputNative("-t", "nat", "-I", "POSTROUTING", "-d", resolverIP, "-j", postroutingchain)
}
for _, rule := range rules {
if iptable.RawCombinedOutputNative(rule...) != nil {
logrus.Errorf("set up rule failed, %v", rule)
}
}
})
}

View file

@ -27,7 +27,7 @@ const (
func (sb *sandbox) startResolver(restore bool) {
sb.resolverOnce.Do(func() {
var err error
sb.resolver = NewResolver(resolverIPSandbox, true, sb.Key(), sb)
sb.resolver = NewResolver(resolverIPSandbox, true, sb)
defer func() {
if err != nil {
sb.resolver = nil

View file

@ -5,9 +5,7 @@ import (
"io"
"net"
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
@ -15,20 +13,12 @@ import (
"github.com/docker/docker/libnetwork/iptables"
"github.com/docker/docker/libnetwork/ns"
"github.com/docker/docker/pkg/reexec"
"github.com/gogo/protobuf/proto"
"github.com/ishidawataru/sctp"
"github.com/moby/ipvs"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink/nl"
"github.com/vishvananda/netns"
)
func init() {
reexec.Register("fwmarker", fwMarker)
reexec.Register("redirector", redirector)
}
// Populate all loadbalancers on the network that the passed endpoint
// belongs to, into this sandbox.
func (sb *sandbox) populateLoadBalancers(ep *endpoint) {
@ -41,7 +31,7 @@ func (sb *sandbox) populateLoadBalancers(ep *endpoint) {
eIP := ep.Iface().Address()
if n.ingress {
if err := addRedirectRules(sb.Key(), eIP, ep.ingressPorts); err != nil {
if err := sb.addRedirectRules(eIP, ep.ingressPorts); err != nil {
logrus.Errorf("Failed to add redirect rules for ep %s (%.7s): %v", ep.Name(), ep.ID(), err)
}
}
@ -141,7 +131,7 @@ func (n *network) addLBBackend(ip net.IP, lb *loadBalancer) {
}
logrus.Debugf("Creating service for vip %s fwMark %d ingressPorts %#v in sbox %.7s (%.7s)", lb.vip, lb.fwMark, lb.service.ingressPorts, sb.ID(), sb.ContainerID())
if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, false, n.loadBalancerMode); err != nil {
if err := sb.configureFWMark(lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, false, n.loadBalancerMode); err != nil {
logrus.Errorf("Failed to add firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
return
}
@ -240,7 +230,7 @@ func (n *network) rmLBBackend(ip net.IP, lb *loadBalancer, rmService bool, fullR
}
}
if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, true, n.loadBalancerMode); err != nil {
if err := sb.configureFWMark(lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, true, n.loadBalancerMode); err != nil {
logrus.Errorf("Failed to delete firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
}
@ -381,7 +371,7 @@ func programIngress(gwIP net.IP, ingressPorts []*PortConfig, isDelete bool) erro
}
path := filepath.Join("/proc/sys/net/ipv4/conf", oifName, "route_localnet")
if err := os.WriteFile(path, []byte{'1', '\n'}, 0644); err != nil { //nolint:gosec // gosec complains about perms here, which must be 0644 in this case
if err := os.WriteFile(path, []byte{'1', '\n'}, 0o644); err != nil { //nolint:gosec // gosec complains about perms here, which must be 0644 in this case
return fmt.Errorf("could not write to %s: %v", path, err)
}
@ -540,216 +530,65 @@ func plumbProxy(iPort *PortConfig, isDelete bool) error {
return nil
}
func writePortsToFile(ports []*PortConfig) (string, error) {
f, err := os.CreateTemp("", "port_configs")
if err != nil {
return "", err
}
defer f.Close() //nolint:gosec
buf, _ := proto.Marshal(&EndpointRecord{
IngressPorts: ports,
})
n, err := f.Write(buf)
if err != nil {
return "", err
}
if n < len(buf) {
return "", io.ErrShortWrite
}
return f.Name(), nil
}
func readPortsFromFile(fileName string) ([]*PortConfig, error) {
buf, err := os.ReadFile(fileName)
if err != nil {
return nil, err
}
var epRec EndpointRecord
err = proto.Unmarshal(buf, &epRec)
if err != nil {
return nil, err
}
return epRec.IngressPorts, nil
}
// Invoke fwmarker reexec routine to mark vip destined packets with
// the passed firewall mark.
func invokeFWMarker(path string, vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, isDelete bool, lbMode string) error {
var ingressPortsFile string
if len(ingressPorts) != 0 {
var err error
ingressPortsFile, err = writePortsToFile(ingressPorts)
if err != nil {
return err
}
defer os.Remove(ingressPortsFile)
}
// configureFWMark configures the sandbox firewall to mark vip destined packets
// with the firewall mark fwMark.
func (sb *sandbox) configureFWMark(vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, isDelete bool, lbMode string) error {
// TODO IPv6 support
iptable := iptables.GetIptable(iptables.IPv4)
fwMarkStr := strconv.FormatUint(uint64(fwMark), 10)
addDelOpt := "-A"
if isDelete {
addDelOpt = "-D"
}
cmd := &exec.Cmd{
Path: reexec.Self(),
Args: append([]string{"fwmarker"}, path, vip.String(), strconv.FormatUint(uint64(fwMark), 10), addDelOpt, ingressPortsFile, eIP.String(), lbMode),
Stdout: os.Stdout,
Stderr: os.Stderr,
}
if err := cmd.Run(); err != nil {
return fmt.Errorf("reexec failed: %v", err)
}
return nil
}
// Firewall marker reexec function.
func fwMarker() {
// TODO IPv6 support
iptable := iptables.GetIptable(iptables.IPv4)
runtime.LockOSThread()
defer runtime.UnlockOSThread()
if len(os.Args) < 8 {
logrus.Error("invalid number of arguments..")
os.Exit(1)
}
var ingressPorts []*PortConfig
if os.Args[5] != "" {
var err error
ingressPorts, err = readPortsFromFile(os.Args[5])
if err != nil {
logrus.Errorf("Failed reading ingress ports file: %v", err)
os.Exit(2)
}
}
vip := os.Args[2]
fwMark := os.Args[3]
if _, err := strconv.ParseUint(fwMark, 10, 32); err != nil {
logrus.Errorf("bad fwmark value(%s) passed: %v", fwMark, err)
os.Exit(3)
}
addDelOpt := os.Args[4]
rules := make([][]string, 0, len(ingressPorts))
for _, iPort := range ingressPorts {
var (
protocol = strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)])
publishedPort = strconv.FormatUint(uint64(iPort.PublishedPort), 10)
)
rule := []string{"-t", "mangle", addDelOpt, "PREROUTING", "-p", protocol, "--dport", publishedPort, "-j", "MARK", "--set-mark", fwMark}
rule := []string{"-t", "mangle", addDelOpt, "PREROUTING", "-p", protocol, "--dport", publishedPort, "-j", "MARK", "--set-mark", fwMarkStr}
rules = append(rules, rule)
}
ns, err := netns.GetFromPath(os.Args[1])
if err != nil {
logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err)
os.Exit(4)
}
defer ns.Close()
var innerErr error
err := sb.ExecFunc(func() {
if !isDelete && lbMode == loadBalancerModeNAT {
subnet := net.IPNet{IP: eIP.IP.Mask(eIP.Mask), Mask: eIP.Mask}
ruleParams := []string{"-m", "ipvs", "--ipvs", "-d", subnet.String(), "-j", "SNAT", "--to-source", eIP.IP.String()}
if !iptable.Exists("nat", "POSTROUTING", ruleParams...) {
rule := append([]string{"-t", "nat", "-A", "POSTROUTING"}, ruleParams...)
rules = append(rules, rule)
if err := netns.Set(ns); err != nil {
logrus.Errorf("setting into container net ns %v failed, %v", os.Args[1], err)
os.Exit(5)
}
lbMode := os.Args[7]
if addDelOpt == "-A" && lbMode == loadBalancerModeNAT {
eIP, subnet, err := net.ParseCIDR(os.Args[6])
if err != nil {
logrus.Errorf("Failed to parse endpoint IP %s: %v", os.Args[6], err)
os.Exit(6)
}
ruleParams := []string{"-m", "ipvs", "--ipvs", "-d", subnet.String(), "-j", "SNAT", "--to-source", eIP.String()}
if !iptable.Exists("nat", "POSTROUTING", ruleParams...) {
rule := append([]string{"-t", "nat", "-A", "POSTROUTING"}, ruleParams...)
rules = append(rules, rule)
err := os.WriteFile("/proc/sys/net/ipv4/vs/conntrack", []byte{'1', '\n'}, 0644)
if err != nil {
logrus.Errorf("Failed to write to /proc/sys/net/ipv4/vs/conntrack: %v", err)
os.Exit(7)
err := os.WriteFile("/proc/sys/net/ipv4/vs/conntrack", []byte{'1', '\n'}, 0644)
if err != nil {
innerErr = err
return
}
}
}
}
rule := []string{"-t", "mangle", addDelOpt, "INPUT", "-d", vip + "/32", "-j", "MARK", "--set-mark", fwMark}
rules = append(rules, rule)
rule := []string{"-t", "mangle", addDelOpt, "INPUT", "-d", vip.String() + "/32", "-j", "MARK", "--set-mark", fwMarkStr}
rules = append(rules, rule)
for _, rule := range rules {
if err := iptable.RawCombinedOutputNative(rule...); err != nil {
logrus.Errorf("set up rule failed, %v: %v", rule, err)
os.Exit(8)
for _, rule := range rules {
if err := iptable.RawCombinedOutputNative(rule...); err != nil {
innerErr = fmt.Errorf("set up rule failed, %v: %w", rule, err)
return
}
}
})
if err != nil {
return err
}
return innerErr
}
func addRedirectRules(path string, eIP *net.IPNet, ingressPorts []*PortConfig) error {
var ingressPortsFile string
if len(ingressPorts) != 0 {
var err error
ingressPortsFile, err = writePortsToFile(ingressPorts)
if err != nil {
return err
}
defer os.Remove(ingressPortsFile)
}
cmd := &exec.Cmd{
Path: reexec.Self(),
Args: append([]string{"redirector"}, path, eIP.String(), ingressPortsFile),
Stdout: os.Stdout,
Stderr: os.Stderr,
}
if err := cmd.Run(); err != nil {
return fmt.Errorf("reexec failed: %v", err)
}
return nil
}
// Redirector reexec function.
func redirector() {
func (sb *sandbox) addRedirectRules(eIP *net.IPNet, ingressPorts []*PortConfig) error {
// TODO IPv6 support
iptable := iptables.GetIptable(iptables.IPv4)
runtime.LockOSThread()
defer runtime.UnlockOSThread()
if len(os.Args) < 4 {
logrus.Error("invalid number of arguments..")
os.Exit(1)
}
var ingressPorts []*PortConfig
if os.Args[3] != "" {
var err error
ingressPorts, err = readPortsFromFile(os.Args[3])
if err != nil {
logrus.Errorf("Failed reading ingress ports file: %v", err)
os.Exit(2)
}
}
eIP, _, err := net.ParseCIDR(os.Args[2])
if err != nil {
logrus.Errorf("Failed to parse endpoint IP %s: %v", os.Args[2], err)
os.Exit(3)
}
ipAddr := eIP.String()
ipAddr := eIP.IP.String()
rules := make([][]string, 0, len(ingressPorts)*3) // 3 rules per port
for _, iPort := range ingressPorts {
@ -770,47 +609,42 @@ func redirector() {
)
}
ns, err := netns.GetFromPath(os.Args[1])
var innerErr error
err := sb.ExecFunc(func() {
for _, rule := range rules {
if err := iptable.RawCombinedOutputNative(rule...); err != nil {
innerErr = fmt.Errorf("set up rule failed, %v: %w", rule, err)
return
}
}
if len(ingressPorts) == 0 {
return
}
// Ensure blocking rules for anything else in/to ingress network
for _, rule := range [][]string{
{"-d", ipAddr, "-p", "sctp", "-j", "DROP"},
{"-d", ipAddr, "-p", "udp", "-j", "DROP"},
{"-d", ipAddr, "-p", "tcp", "-j", "DROP"},
} {
if !iptable.ExistsNative(iptables.Filter, "INPUT", rule...) {
if err := iptable.RawCombinedOutputNative(append([]string{"-A", "INPUT"}, rule...)...); err != nil {
innerErr = fmt.Errorf("set up rule failed, %v: %w", rule, err)
return
}
}
rule[0] = "-s"
if !iptable.ExistsNative(iptables.Filter, "OUTPUT", rule...) {
if err := iptable.RawCombinedOutputNative(append([]string{"-A", "OUTPUT"}, rule...)...); err != nil {
innerErr = fmt.Errorf("set up rule failed, %v: %w", rule, err)
return
}
}
}
})
if err != nil {
logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err)
os.Exit(4)
}
defer ns.Close()
if err := netns.Set(ns); err != nil {
logrus.Errorf("setting into container net ns %v failed, %v", os.Args[1], err)
os.Exit(5)
}
for _, rule := range rules {
if err := iptable.RawCombinedOutputNative(rule...); err != nil {
logrus.Errorf("set up rule failed, %v: %v", rule, err)
os.Exit(6)
}
}
if len(ingressPorts) == 0 {
return
}
// Ensure blocking rules for anything else in/to ingress network
for _, rule := range [][]string{
{"-d", ipAddr, "-p", "sctp", "-j", "DROP"},
{"-d", ipAddr, "-p", "udp", "-j", "DROP"},
{"-d", ipAddr, "-p", "tcp", "-j", "DROP"},
} {
if !iptable.ExistsNative(iptables.Filter, "INPUT", rule...) {
if err := iptable.RawCombinedOutputNative(append([]string{"-A", "INPUT"}, rule...)...); err != nil {
logrus.Errorf("set up rule failed, %v: %v", rule, err)
os.Exit(7)
}
}
rule[0] = "-s"
if !iptable.ExistsNative(iptables.Filter, "OUTPUT", rule...) {
if err := iptable.RawCombinedOutputNative(append([]string{"-A", "OUTPUT"}, rule...)...); err != nil {
logrus.Errorf("set up rule failed, %v: %v", rule, err)
os.Exit(8)
}
}
return err
}
return innerErr
}