service_linux.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651
  1. package libnetwork
  2. import (
  3. "context"
  4. "fmt"
  5. "io"
  6. "net"
  7. "os"
  8. "path/filepath"
  9. "strconv"
  10. "strings"
  11. "sync"
  12. "syscall"
  13. "github.com/containerd/log"
  14. "github.com/docker/docker/libnetwork/iptables"
  15. "github.com/docker/docker/libnetwork/ns"
  16. "github.com/ishidawataru/sctp"
  17. "github.com/moby/ipvs"
  18. "github.com/vishvananda/netlink/nl"
  19. )
  20. // Populate all loadbalancers on the network that the passed endpoint
  21. // belongs to, into this sandbox.
  22. func (sb *Sandbox) populateLoadBalancers(ep *Endpoint) {
  23. // This is an interface less endpoint. Nothing to do.
  24. if ep.Iface() == nil {
  25. return
  26. }
  27. n := ep.getNetwork()
  28. eIP := ep.Iface().Address()
  29. if n.ingress {
  30. if err := sb.addRedirectRules(eIP, ep.ingressPorts); err != nil {
  31. log.G(context.TODO()).Errorf("Failed to add redirect rules for ep %s (%.7s): %v", ep.Name(), ep.ID(), err)
  32. }
  33. }
  34. }
  35. func (n *Network) findLBEndpointSandbox() (*Endpoint, *Sandbox, error) {
  36. // TODO: get endpoint from store? See EndpointInfo()
  37. var ep *Endpoint
  38. // Find this node's LB sandbox endpoint: there should be exactly one
  39. for _, e := range n.Endpoints() {
  40. epi := e.Info()
  41. if epi != nil && epi.LoadBalancer() {
  42. ep = e
  43. break
  44. }
  45. }
  46. if ep == nil {
  47. return nil, nil, fmt.Errorf("Unable to find load balancing endpoint for network %s", n.ID())
  48. }
  49. // Get the load balancer sandbox itself as well
  50. sb, ok := ep.getSandbox()
  51. if !ok {
  52. return nil, nil, fmt.Errorf("Unable to get sandbox for %s(%s) in for %s", ep.Name(), ep.ID(), n.ID())
  53. }
  54. sep := sb.getEndpoint(ep.ID())
  55. if sep == nil {
  56. return nil, nil, fmt.Errorf("Load balancing endpoint %s(%s) removed from %s", ep.Name(), ep.ID(), n.ID())
  57. }
  58. return sep, sb, nil
  59. }
  60. // Searches the OS sandbox for the name of the endpoint interface
  61. // within the sandbox. This is required for adding/removing IP
  62. // aliases to the interface.
  63. func findIfaceDstName(sb *Sandbox, ep *Endpoint) string {
  64. srcName := ep.Iface().SrcName()
  65. for _, i := range sb.osSbox.Interfaces() {
  66. if i.SrcName() == srcName {
  67. return i.DstName()
  68. }
  69. }
  70. return ""
  71. }
  72. // Add loadbalancer backend to the loadbalncer sandbox for the network.
  73. // If needed add the service as well.
  74. func (n *Network) addLBBackend(ip net.IP, lb *loadBalancer) {
  75. if len(lb.vip) == 0 {
  76. return
  77. }
  78. ep, sb, err := n.findLBEndpointSandbox()
  79. if err != nil {
  80. log.G(context.TODO()).Errorf("addLBBackend %s/%s: %v", n.ID(), n.Name(), err)
  81. return
  82. }
  83. if sb.osSbox == nil {
  84. return
  85. }
  86. eIP := ep.Iface().Address()
  87. i, err := ipvs.New(sb.Key())
  88. if err != nil {
  89. log.G(context.TODO()).Errorf("Failed to create an ipvs handle for sbox %.7s (%.7s,%s) for lb addition: %v", sb.ID(), sb.ContainerID(), sb.Key(), err)
  90. return
  91. }
  92. defer i.Close()
  93. s := &ipvs.Service{
  94. AddressFamily: nl.FAMILY_V4,
  95. FWMark: lb.fwMark,
  96. SchedName: ipvs.RoundRobin,
  97. }
  98. if !i.IsServicePresent(s) {
  99. // Add IP alias for the VIP to the endpoint
  100. ifName := findIfaceDstName(sb, ep)
  101. if ifName == "" {
  102. log.G(context.TODO()).Errorf("Failed find interface name for endpoint %s(%s) to create LB alias", ep.ID(), ep.Name())
  103. return
  104. }
  105. err := sb.osSbox.AddAliasIP(ifName, &net.IPNet{IP: lb.vip, Mask: net.CIDRMask(32, 32)})
  106. if err != nil {
  107. log.G(context.TODO()).Errorf("Failed add IP alias %s to network %s LB endpoint interface %s: %v", lb.vip, n.ID(), ifName, err)
  108. return
  109. }
  110. if sb.ingress {
  111. var gwIP net.IP
  112. if ep := sb.getGatewayEndpoint(); ep != nil {
  113. gwIP = ep.Iface().Address().IP
  114. }
  115. if err := programIngress(gwIP, lb.service.ingressPorts, false); err != nil {
  116. log.G(context.TODO()).Errorf("Failed to add ingress: %v", err)
  117. return
  118. }
  119. }
  120. log.G(context.TODO()).Debugf("Creating service for vip %s fwMark %d ingressPorts %#v in sbox %.7s (%.7s)", lb.vip, lb.fwMark, lb.service.ingressPorts, sb.ID(), sb.ContainerID())
  121. if err := sb.configureFWMark(lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, false, n.loadBalancerMode); err != nil {
  122. log.G(context.TODO()).Errorf("Failed to add firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
  123. return
  124. }
  125. if err := i.NewService(s); err != nil && err != syscall.EEXIST {
  126. log.G(context.TODO()).Errorf("Failed to create a new service for vip %s fwmark %d in sbox %.7s (%.7s): %v", lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
  127. return
  128. }
  129. }
  130. d := &ipvs.Destination{
  131. AddressFamily: nl.FAMILY_V4,
  132. Address: ip,
  133. Weight: 1,
  134. }
  135. if n.loadBalancerMode == loadBalancerModeDSR {
  136. d.ConnectionFlags = ipvs.ConnFwdDirectRoute
  137. }
  138. // Remove the sched name before using the service to add
  139. // destination.
  140. s.SchedName = ""
  141. if err := i.NewDestination(s, d); err != nil && err != syscall.EEXIST {
  142. log.G(context.TODO()).Errorf("Failed to create real server %s for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
  143. }
  144. // Ensure that kernel tweaks are applied in case this is the first time
  145. // we've initialized ip_vs
  146. sb.osSbox.ApplyOSTweaks(sb.oslTypes)
  147. }
  148. // Remove loadbalancer backend the load balancing endpoint for this
  149. // network. If 'rmService' is true, then remove the service entry as well.
  150. // If 'fullRemove' is true then completely remove the entry, otherwise
  151. // just deweight it for now.
  152. func (n *Network) rmLBBackend(ip net.IP, lb *loadBalancer, rmService bool, fullRemove bool) {
  153. if len(lb.vip) == 0 {
  154. return
  155. }
  156. ep, sb, err := n.findLBEndpointSandbox()
  157. if err != nil {
  158. log.G(context.TODO()).Debugf("rmLBBackend for %s/%s: %v -- probably transient state", n.ID(), n.Name(), err)
  159. return
  160. }
  161. if sb.osSbox == nil {
  162. return
  163. }
  164. eIP := ep.Iface().Address()
  165. i, err := ipvs.New(sb.Key())
  166. if err != nil {
  167. log.G(context.TODO()).Errorf("Failed to create an ipvs handle for sbox %.7s (%.7s,%s) for lb removal: %v", sb.ID(), sb.ContainerID(), sb.Key(), err)
  168. return
  169. }
  170. defer i.Close()
  171. s := &ipvs.Service{
  172. AddressFamily: nl.FAMILY_V4,
  173. FWMark: lb.fwMark,
  174. }
  175. d := &ipvs.Destination{
  176. AddressFamily: nl.FAMILY_V4,
  177. Address: ip,
  178. Weight: 1,
  179. }
  180. if n.loadBalancerMode == loadBalancerModeDSR {
  181. d.ConnectionFlags = ipvs.ConnFwdDirectRoute
  182. }
  183. if fullRemove {
  184. if err := i.DelDestination(s, d); err != nil && err != syscall.ENOENT {
  185. log.G(context.TODO()).Errorf("Failed to delete real server %s for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
  186. }
  187. } else {
  188. d.Weight = 0
  189. if err := i.UpdateDestination(s, d); err != nil && err != syscall.ENOENT {
  190. log.G(context.TODO()).Errorf("Failed to set LB weight of real server %s to 0 for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
  191. }
  192. }
  193. if rmService {
  194. s.SchedName = ipvs.RoundRobin
  195. if err := i.DelService(s); err != nil && err != syscall.ENOENT {
  196. log.G(context.TODO()).Errorf("Failed to delete service for vip %s fwmark %d in sbox %.7s (%.7s): %v", lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
  197. }
  198. if sb.ingress {
  199. var gwIP net.IP
  200. if ep := sb.getGatewayEndpoint(); ep != nil {
  201. gwIP = ep.Iface().Address().IP
  202. }
  203. if err := programIngress(gwIP, lb.service.ingressPorts, true); err != nil {
  204. log.G(context.TODO()).Errorf("Failed to delete ingress: %v", err)
  205. }
  206. }
  207. if err := sb.configureFWMark(lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, true, n.loadBalancerMode); err != nil {
  208. log.G(context.TODO()).Errorf("Failed to delete firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
  209. }
  210. // Remove IP alias from the VIP to the endpoint
  211. ifName := findIfaceDstName(sb, ep)
  212. if ifName == "" {
  213. log.G(context.TODO()).Errorf("Failed find interface name for endpoint %s(%s) to create LB alias", ep.ID(), ep.Name())
  214. return
  215. }
  216. err := sb.osSbox.RemoveAliasIP(ifName, &net.IPNet{IP: lb.vip, Mask: net.CIDRMask(32, 32)})
  217. if err != nil {
  218. log.G(context.TODO()).Errorf("Failed add IP alias %s to network %s LB endpoint interface %s: %v", lb.vip, n.ID(), ifName, err)
  219. }
  220. }
  221. }
  222. const ingressChain = "DOCKER-INGRESS"
  223. var (
  224. ingressOnce sync.Once
  225. ingressMu sync.Mutex // lock for operations on ingress
  226. ingressProxyTbl = make(map[string]io.Closer)
  227. portConfigMu sync.Mutex
  228. portConfigTbl = make(map[PortConfig]int)
  229. )
  230. func filterPortConfigs(ingressPorts []*PortConfig, isDelete bool) []*PortConfig {
  231. portConfigMu.Lock()
  232. iPorts := make([]*PortConfig, 0, len(ingressPorts))
  233. for _, pc := range ingressPorts {
  234. if isDelete {
  235. if cnt, ok := portConfigTbl[*pc]; ok {
  236. // This is the last reference to this
  237. // port config. Delete the port config
  238. // and add it to filtered list to be
  239. // plumbed.
  240. if cnt == 1 {
  241. delete(portConfigTbl, *pc)
  242. iPorts = append(iPorts, pc)
  243. continue
  244. }
  245. portConfigTbl[*pc] = cnt - 1
  246. }
  247. continue
  248. }
  249. if cnt, ok := portConfigTbl[*pc]; ok {
  250. portConfigTbl[*pc] = cnt + 1
  251. continue
  252. }
  253. // We are adding it for the first time. Add it to the
  254. // filter list to be plumbed.
  255. portConfigTbl[*pc] = 1
  256. iPorts = append(iPorts, pc)
  257. }
  258. portConfigMu.Unlock()
  259. return iPorts
  260. }
  261. func programIngress(gwIP net.IP, ingressPorts []*PortConfig, isDelete bool) error {
  262. // TODO IPv6 support
  263. iptable := iptables.GetIptable(iptables.IPv4)
  264. addDelOpt := "-I"
  265. rollbackAddDelOpt := "-D"
  266. if isDelete {
  267. addDelOpt = "-D"
  268. rollbackAddDelOpt = "-I"
  269. }
  270. ingressMu.Lock()
  271. defer ingressMu.Unlock()
  272. chainExists := iptable.ExistChain(ingressChain, iptables.Nat)
  273. filterChainExists := iptable.ExistChain(ingressChain, iptables.Filter)
  274. ingressOnce.Do(func() {
  275. // Flush nat table and filter table ingress chain rules during init if it
  276. // exists. It might contain stale rules from previous life.
  277. if chainExists {
  278. if err := iptable.RawCombinedOutput("-t", "nat", "-F", ingressChain); err != nil {
  279. log.G(context.TODO()).Errorf("Could not flush nat table ingress chain rules during init: %v", err)
  280. }
  281. }
  282. if filterChainExists {
  283. if err := iptable.RawCombinedOutput("-F", ingressChain); err != nil {
  284. log.G(context.TODO()).Errorf("Could not flush filter table ingress chain rules during init: %v", err)
  285. }
  286. }
  287. })
  288. if !isDelete {
  289. if !chainExists {
  290. if err := iptable.RawCombinedOutput("-t", "nat", "-N", ingressChain); err != nil {
  291. return fmt.Errorf("failed to create ingress chain: %v", err)
  292. }
  293. }
  294. if !filterChainExists {
  295. if err := iptable.RawCombinedOutput("-N", ingressChain); err != nil {
  296. return fmt.Errorf("failed to create filter table ingress chain: %v", err)
  297. }
  298. }
  299. if !iptable.Exists(iptables.Nat, ingressChain, "-j", "RETURN") {
  300. if err := iptable.RawCombinedOutput("-t", "nat", "-A", ingressChain, "-j", "RETURN"); err != nil {
  301. return fmt.Errorf("failed to add return rule in nat table ingress chain: %v", err)
  302. }
  303. }
  304. if !iptable.Exists(iptables.Filter, ingressChain, "-j", "RETURN") {
  305. if err := iptable.RawCombinedOutput("-A", ingressChain, "-j", "RETURN"); err != nil {
  306. return fmt.Errorf("failed to add return rule to filter table ingress chain: %v", err)
  307. }
  308. }
  309. for _, chain := range []string{"OUTPUT", "PREROUTING"} {
  310. if !iptable.Exists(iptables.Nat, chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain) {
  311. if err := iptable.RawCombinedOutput("-t", "nat", "-I", chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain); err != nil {
  312. return fmt.Errorf("failed to add jump rule in %s to ingress chain: %v", chain, err)
  313. }
  314. }
  315. }
  316. if !iptable.Exists(iptables.Filter, "FORWARD", "-j", ingressChain) {
  317. if err := iptable.RawCombinedOutput("-I", "FORWARD", "-j", ingressChain); err != nil {
  318. return fmt.Errorf("failed to add jump rule to %s in filter table forward chain: %v", ingressChain, err)
  319. }
  320. arrangeUserFilterRule()
  321. }
  322. oifName, err := findOIFName(gwIP)
  323. if err != nil {
  324. return fmt.Errorf("failed to find gateway bridge interface name for %s: %v", gwIP, err)
  325. }
  326. path := filepath.Join("/proc/sys/net/ipv4/conf", oifName, "route_localnet")
  327. if err := os.WriteFile(path, []byte{'1', '\n'}, 0o644); err != nil { //nolint:gosec // gosec complains about perms here, which must be 0644 in this case
  328. return fmt.Errorf("could not write to %s: %v", path, err)
  329. }
  330. ruleArgs := []string{"-m", "addrtype", "--src-type", "LOCAL", "-o", oifName, "-j", "MASQUERADE"}
  331. if !iptable.Exists(iptables.Nat, "POSTROUTING", ruleArgs...) {
  332. if err := iptable.RawCombinedOutput(append([]string{"-t", "nat", "-I", "POSTROUTING"}, ruleArgs...)...); err != nil {
  333. return fmt.Errorf("failed to add ingress localhost POSTROUTING rule for %s: %v", oifName, err)
  334. }
  335. }
  336. }
  337. // Filter the ingress ports until port rules start to be added/deleted
  338. filteredPorts := filterPortConfigs(ingressPorts, isDelete)
  339. rollbackRules := make([][]string, 0, len(filteredPorts)*3)
  340. var portErr error
  341. defer func() {
  342. if portErr != nil && !isDelete {
  343. filterPortConfigs(filteredPorts, !isDelete)
  344. for _, rule := range rollbackRules {
  345. if err := iptable.RawCombinedOutput(rule...); err != nil {
  346. log.G(context.TODO()).Warnf("roll back rule failed, %v: %v", rule, err)
  347. }
  348. }
  349. }
  350. }()
  351. for _, iPort := range filteredPorts {
  352. var (
  353. protocol = strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)])
  354. publishedPort = strconv.FormatUint(uint64(iPort.PublishedPort), 10)
  355. destination = net.JoinHostPort(gwIP.String(), publishedPort)
  356. )
  357. if iptable.ExistChain(ingressChain, iptables.Nat) {
  358. rule := []string{"-t", "nat", addDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "DNAT", "--to-destination", destination}
  359. if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
  360. err := fmt.Errorf("set up rule failed, %v: %v", rule, portErr)
  361. if !isDelete {
  362. return err
  363. }
  364. log.G(context.TODO()).Info(err)
  365. }
  366. rollbackRule := []string{"-t", "nat", rollbackAddDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "DNAT", "--to-destination", destination}
  367. rollbackRules = append(rollbackRules, rollbackRule)
  368. }
  369. // Filter table rules to allow a published service to be accessible in the local node from..
  370. // 1) service tasks attached to other networks
  371. // 2) unmanaged containers on bridge networks
  372. rule := []string{addDelOpt, ingressChain, "-p", protocol, "--sport", publishedPort, "-m", "conntrack", "--ctstate", "ESTABLISHED,RELATED", "-j", "ACCEPT"}
  373. if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
  374. err := fmt.Errorf("set up rule failed, %v: %v", rule, portErr)
  375. if !isDelete {
  376. return err
  377. }
  378. log.G(context.TODO()).Warn(err)
  379. }
  380. rollbackRule := []string{rollbackAddDelOpt, ingressChain, "-p", protocol, "--sport", publishedPort, "-m", "conntrack", "--ctstate", "ESTABLISHED,RELATED", "-j", "ACCEPT"}
  381. rollbackRules = append(rollbackRules, rollbackRule)
  382. rule = []string{addDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "ACCEPT"}
  383. if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
  384. err := fmt.Errorf("set up rule failed, %v: %v", rule, portErr)
  385. if !isDelete {
  386. return err
  387. }
  388. log.G(context.TODO()).Warn(err)
  389. }
  390. rollbackRule = []string{rollbackAddDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "ACCEPT"}
  391. rollbackRules = append(rollbackRules, rollbackRule)
  392. if err := plumbProxy(iPort, isDelete); err != nil {
  393. log.G(context.TODO()).Warnf("failed to create proxy for port %s: %v", publishedPort, err)
  394. }
  395. }
  396. return nil
  397. }
  398. // In the filter table FORWARD chain the first rule should be to jump to
  399. // DOCKER-USER so the user is able to filter packet first.
  400. // The second rule should be jump to INGRESS-CHAIN.
  401. // This chain has the rules to allow access to the published ports for swarm tasks
  402. // from local bridge networks and docker_gwbridge (ie:taks on other swarm networks)
  403. func arrangeIngressFilterRule() {
  404. // TODO IPv6 support
  405. iptable := iptables.GetIptable(iptables.IPv4)
  406. if iptable.ExistChain(ingressChain, iptables.Filter) {
  407. if iptable.Exists(iptables.Filter, "FORWARD", "-j", ingressChain) {
  408. if err := iptable.RawCombinedOutput("-D", "FORWARD", "-j", ingressChain); err != nil {
  409. log.G(context.TODO()).Warnf("failed to delete jump rule to ingressChain in filter table: %v", err)
  410. }
  411. }
  412. if err := iptable.RawCombinedOutput("-I", "FORWARD", "-j", ingressChain); err != nil {
  413. log.G(context.TODO()).Warnf("failed to add jump rule to ingressChain in filter table: %v", err)
  414. }
  415. }
  416. }
  417. func findOIFName(ip net.IP) (string, error) {
  418. nlh := ns.NlHandle()
  419. routes, err := nlh.RouteGet(ip)
  420. if err != nil {
  421. return "", err
  422. }
  423. if len(routes) == 0 {
  424. return "", fmt.Errorf("no route to %s", ip)
  425. }
  426. // Pick the first route(typically there is only one route). We
  427. // don't support multipath.
  428. link, err := nlh.LinkByIndex(routes[0].LinkIndex)
  429. if err != nil {
  430. return "", err
  431. }
  432. return link.Attrs().Name, nil
  433. }
  434. func plumbProxy(iPort *PortConfig, isDelete bool) error {
  435. var (
  436. err error
  437. l io.Closer
  438. )
  439. portSpec := fmt.Sprintf("%d/%s", iPort.PublishedPort, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]))
  440. if isDelete {
  441. if listener, ok := ingressProxyTbl[portSpec]; ok {
  442. if listener != nil {
  443. listener.Close()
  444. }
  445. }
  446. return nil
  447. }
  448. switch iPort.Protocol {
  449. case ProtocolTCP:
  450. l, err = net.ListenTCP("tcp", &net.TCPAddr{Port: int(iPort.PublishedPort)})
  451. case ProtocolUDP:
  452. l, err = net.ListenUDP("udp", &net.UDPAddr{Port: int(iPort.PublishedPort)})
  453. case ProtocolSCTP:
  454. l, err = sctp.ListenSCTP("sctp", &sctp.SCTPAddr{Port: int(iPort.PublishedPort)})
  455. default:
  456. err = fmt.Errorf("unknown protocol %v", iPort.Protocol)
  457. }
  458. if err != nil {
  459. return err
  460. }
  461. ingressProxyTbl[portSpec] = l
  462. return nil
  463. }
  464. // configureFWMark configures the sandbox firewall to mark vip destined packets
  465. // with the firewall mark fwMark.
  466. func (sb *Sandbox) configureFWMark(vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, isDelete bool, lbMode string) error {
  467. // TODO IPv6 support
  468. iptable := iptables.GetIptable(iptables.IPv4)
  469. fwMarkStr := strconv.FormatUint(uint64(fwMark), 10)
  470. addDelOpt := "-A"
  471. if isDelete {
  472. addDelOpt = "-D"
  473. }
  474. rules := make([][]string, 0, len(ingressPorts))
  475. for _, iPort := range ingressPorts {
  476. var (
  477. protocol = strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)])
  478. publishedPort = strconv.FormatUint(uint64(iPort.PublishedPort), 10)
  479. )
  480. rule := []string{"-t", "mangle", addDelOpt, "PREROUTING", "-p", protocol, "--dport", publishedPort, "-j", "MARK", "--set-mark", fwMarkStr}
  481. rules = append(rules, rule)
  482. }
  483. var innerErr error
  484. err := sb.ExecFunc(func() {
  485. if !isDelete && lbMode == loadBalancerModeNAT {
  486. subnet := net.IPNet{IP: eIP.IP.Mask(eIP.Mask), Mask: eIP.Mask}
  487. ruleParams := []string{"-m", "ipvs", "--ipvs", "-d", subnet.String(), "-j", "SNAT", "--to-source", eIP.IP.String()}
  488. if !iptable.Exists("nat", "POSTROUTING", ruleParams...) {
  489. rule := append([]string{"-t", "nat", "-A", "POSTROUTING"}, ruleParams...)
  490. rules = append(rules, rule)
  491. err := os.WriteFile("/proc/sys/net/ipv4/vs/conntrack", []byte{'1', '\n'}, 0o644)
  492. if err != nil {
  493. innerErr = err
  494. return
  495. }
  496. }
  497. }
  498. rule := []string{"-t", "mangle", addDelOpt, "INPUT", "-d", vip.String() + "/32", "-j", "MARK", "--set-mark", fwMarkStr}
  499. rules = append(rules, rule)
  500. for _, rule := range rules {
  501. if err := iptable.RawCombinedOutputNative(rule...); err != nil {
  502. innerErr = fmt.Errorf("set up rule failed, %v: %w", rule, err)
  503. return
  504. }
  505. }
  506. })
  507. if err != nil {
  508. return err
  509. }
  510. return innerErr
  511. }
  512. func (sb *Sandbox) addRedirectRules(eIP *net.IPNet, ingressPorts []*PortConfig) error {
  513. // TODO IPv6 support
  514. iptable := iptables.GetIptable(iptables.IPv4)
  515. ipAddr := eIP.IP.String()
  516. rules := make([][]string, 0, len(ingressPorts)*3) // 3 rules per port
  517. for _, iPort := range ingressPorts {
  518. var (
  519. protocol = strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)])
  520. publishedPort = strconv.FormatUint(uint64(iPort.PublishedPort), 10)
  521. targetPort = strconv.FormatUint(uint64(iPort.TargetPort), 10)
  522. )
  523. rules = append(rules,
  524. []string{"-t", "nat", "-A", "PREROUTING", "-d", ipAddr, "-p", protocol, "--dport", publishedPort, "-j", "REDIRECT", "--to-port", targetPort},
  525. // Allow only incoming connections to exposed ports
  526. []string{"-I", "INPUT", "-d", ipAddr, "-p", protocol, "--dport", targetPort, "-m", "conntrack", "--ctstate", "NEW,ESTABLISHED", "-j", "ACCEPT"},
  527. // Allow only outgoing connections from exposed ports
  528. []string{"-I", "OUTPUT", "-s", ipAddr, "-p", protocol, "--sport", targetPort, "-m", "conntrack", "--ctstate", "ESTABLISHED", "-j", "ACCEPT"},
  529. )
  530. }
  531. var innerErr error
  532. err := sb.ExecFunc(func() {
  533. for _, rule := range rules {
  534. if err := iptable.RawCombinedOutputNative(rule...); err != nil {
  535. innerErr = fmt.Errorf("set up rule failed, %v: %w", rule, err)
  536. return
  537. }
  538. }
  539. if len(ingressPorts) == 0 {
  540. return
  541. }
  542. // Ensure blocking rules for anything else in/to ingress network
  543. for _, rule := range [][]string{
  544. {"-d", ipAddr, "-p", "sctp", "-j", "DROP"},
  545. {"-d", ipAddr, "-p", "udp", "-j", "DROP"},
  546. {"-d", ipAddr, "-p", "tcp", "-j", "DROP"},
  547. } {
  548. if !iptable.ExistsNative(iptables.Filter, "INPUT", rule...) {
  549. if err := iptable.RawCombinedOutputNative(append([]string{"-A", "INPUT"}, rule...)...); err != nil {
  550. innerErr = fmt.Errorf("set up rule failed, %v: %w", rule, err)
  551. return
  552. }
  553. }
  554. rule[0] = "-s"
  555. if !iptable.ExistsNative(iptables.Filter, "OUTPUT", rule...) {
  556. if err := iptable.RawCombinedOutputNative(append([]string{"-A", "OUTPUT"}, rule...)...); err != nil {
  557. innerErr = fmt.Errorf("set up rule failed, %v: %w", rule, err)
  558. return
  559. }
  560. }
  561. }
  562. })
  563. if err != nil {
  564. return err
  565. }
  566. return innerErr
  567. }