service_linux.go 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816
  1. package libnetwork
  2. import (
  3. "fmt"
  4. "io"
  5. "net"
  6. "os"
  7. "os/exec"
  8. "path/filepath"
  9. "runtime"
  10. "strconv"
  11. "strings"
  12. "sync"
  13. "syscall"
  14. "github.com/docker/docker/libnetwork/iptables"
  15. "github.com/docker/docker/libnetwork/ns"
  16. "github.com/docker/docker/pkg/reexec"
  17. "github.com/gogo/protobuf/proto"
  18. "github.com/ishidawataru/sctp"
  19. "github.com/moby/ipvs"
  20. "github.com/sirupsen/logrus"
  21. "github.com/vishvananda/netlink/nl"
  22. "github.com/vishvananda/netns"
  23. )
  24. func init() {
  25. reexec.Register("fwmarker", fwMarker)
  26. reexec.Register("redirector", redirector)
  27. }
  28. // Populate all loadbalancers on the network that the passed endpoint
  29. // belongs to, into this sandbox.
  30. func (sb *sandbox) populateLoadBalancers(ep *endpoint) {
  31. // This is an interface less endpoint. Nothing to do.
  32. if ep.Iface() == nil {
  33. return
  34. }
  35. n := ep.getNetwork()
  36. eIP := ep.Iface().Address()
  37. if n.ingress {
  38. if err := addRedirectRules(sb.Key(), eIP, ep.ingressPorts); err != nil {
  39. logrus.Errorf("Failed to add redirect rules for ep %s (%.7s): %v", ep.Name(), ep.ID(), err)
  40. }
  41. }
  42. }
  43. func (n *network) findLBEndpointSandbox() (*endpoint, *sandbox, error) {
  44. // TODO: get endpoint from store? See EndpointInfo()
  45. var ep *endpoint
  46. // Find this node's LB sandbox endpoint: there should be exactly one
  47. for _, e := range n.Endpoints() {
  48. epi := e.Info()
  49. if epi != nil && epi.LoadBalancer() {
  50. ep = e.(*endpoint)
  51. break
  52. }
  53. }
  54. if ep == nil {
  55. return nil, nil, fmt.Errorf("Unable to find load balancing endpoint for network %s", n.ID())
  56. }
  57. // Get the load balancer sandbox itself as well
  58. sb, ok := ep.getSandbox()
  59. if !ok {
  60. return nil, nil, fmt.Errorf("Unable to get sandbox for %s(%s) in for %s", ep.Name(), ep.ID(), n.ID())
  61. }
  62. sep := sb.getEndpoint(ep.ID())
  63. if sep == nil {
  64. return nil, nil, fmt.Errorf("Load balancing endpoint %s(%s) removed from %s", ep.Name(), ep.ID(), n.ID())
  65. }
  66. return sep, sb, nil
  67. }
  68. // Searches the OS sandbox for the name of the endpoint interface
  69. // within the sandbox. This is required for adding/removing IP
  70. // aliases to the interface.
  71. func findIfaceDstName(sb *sandbox, ep *endpoint) string {
  72. srcName := ep.Iface().SrcName()
  73. for _, i := range sb.osSbox.Info().Interfaces() {
  74. if i.SrcName() == srcName {
  75. return i.DstName()
  76. }
  77. }
  78. return ""
  79. }
  80. // Add loadbalancer backend to the loadbalncer sandbox for the network.
  81. // If needed add the service as well.
  82. func (n *network) addLBBackend(ip net.IP, lb *loadBalancer) {
  83. if len(lb.vip) == 0 {
  84. return
  85. }
  86. ep, sb, err := n.findLBEndpointSandbox()
  87. if err != nil {
  88. logrus.Errorf("addLBBackend %s/%s: %v", n.ID(), n.Name(), err)
  89. return
  90. }
  91. if sb.osSbox == nil {
  92. return
  93. }
  94. eIP := ep.Iface().Address()
  95. i, err := ipvs.New(sb.Key())
  96. if err != nil {
  97. logrus.Errorf("Failed to create an ipvs handle for sbox %.7s (%.7s,%s) for lb addition: %v", sb.ID(), sb.ContainerID(), sb.Key(), err)
  98. return
  99. }
  100. defer i.Close()
  101. s := &ipvs.Service{
  102. AddressFamily: nl.FAMILY_V4,
  103. FWMark: lb.fwMark,
  104. SchedName: ipvs.RoundRobin,
  105. }
  106. if !i.IsServicePresent(s) {
  107. // Add IP alias for the VIP to the endpoint
  108. ifName := findIfaceDstName(sb, ep)
  109. if ifName == "" {
  110. logrus.Errorf("Failed find interface name for endpoint %s(%s) to create LB alias", ep.ID(), ep.Name())
  111. return
  112. }
  113. err := sb.osSbox.AddAliasIP(ifName, &net.IPNet{IP: lb.vip, Mask: net.CIDRMask(32, 32)})
  114. if err != nil {
  115. logrus.Errorf("Failed add IP alias %s to network %s LB endpoint interface %s: %v", lb.vip, n.ID(), ifName, err)
  116. return
  117. }
  118. if sb.ingress {
  119. var gwIP net.IP
  120. if ep := sb.getGatewayEndpoint(); ep != nil {
  121. gwIP = ep.Iface().Address().IP
  122. }
  123. if err := programIngress(gwIP, lb.service.ingressPorts, false); err != nil {
  124. logrus.Errorf("Failed to add ingress: %v", err)
  125. return
  126. }
  127. }
  128. logrus.Debugf("Creating service for vip %s fwMark %d ingressPorts %#v in sbox %.7s (%.7s)", lb.vip, lb.fwMark, lb.service.ingressPorts, sb.ID(), sb.ContainerID())
  129. if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, false, n.loadBalancerMode); err != nil {
  130. logrus.Errorf("Failed to add firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
  131. return
  132. }
  133. if err := i.NewService(s); err != nil && err != syscall.EEXIST {
  134. logrus.Errorf("Failed to create a new service for vip %s fwmark %d in sbox %.7s (%.7s): %v", lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
  135. return
  136. }
  137. }
  138. d := &ipvs.Destination{
  139. AddressFamily: nl.FAMILY_V4,
  140. Address: ip,
  141. Weight: 1,
  142. }
  143. if n.loadBalancerMode == loadBalancerModeDSR {
  144. d.ConnectionFlags = ipvs.ConnFwdDirectRoute
  145. }
  146. // Remove the sched name before using the service to add
  147. // destination.
  148. s.SchedName = ""
  149. if err := i.NewDestination(s, d); err != nil && err != syscall.EEXIST {
  150. logrus.Errorf("Failed to create real server %s for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
  151. }
  152. // Ensure that kernel tweaks are applied in case this is the first time
  153. // we've initialized ip_vs
  154. sb.osSbox.ApplyOSTweaks(sb.oslTypes)
  155. }
  156. // Remove loadbalancer backend the load balancing endpoint for this
  157. // network. If 'rmService' is true, then remove the service entry as well.
  158. // If 'fullRemove' is true then completely remove the entry, otherwise
  159. // just deweight it for now.
  160. func (n *network) rmLBBackend(ip net.IP, lb *loadBalancer, rmService bool, fullRemove bool) {
  161. if len(lb.vip) == 0 {
  162. return
  163. }
  164. ep, sb, err := n.findLBEndpointSandbox()
  165. if err != nil {
  166. logrus.Debugf("rmLBBackend for %s/%s: %v -- probably transient state", n.ID(), n.Name(), err)
  167. return
  168. }
  169. if sb.osSbox == nil {
  170. return
  171. }
  172. eIP := ep.Iface().Address()
  173. i, err := ipvs.New(sb.Key())
  174. if err != nil {
  175. logrus.Errorf("Failed to create an ipvs handle for sbox %.7s (%.7s,%s) for lb removal: %v", sb.ID(), sb.ContainerID(), sb.Key(), err)
  176. return
  177. }
  178. defer i.Close()
  179. s := &ipvs.Service{
  180. AddressFamily: nl.FAMILY_V4,
  181. FWMark: lb.fwMark,
  182. }
  183. d := &ipvs.Destination{
  184. AddressFamily: nl.FAMILY_V4,
  185. Address: ip,
  186. Weight: 1,
  187. }
  188. if n.loadBalancerMode == loadBalancerModeDSR {
  189. d.ConnectionFlags = ipvs.ConnFwdDirectRoute
  190. }
  191. if fullRemove {
  192. if err := i.DelDestination(s, d); err != nil && err != syscall.ENOENT {
  193. logrus.Errorf("Failed to delete real server %s for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
  194. }
  195. } else {
  196. d.Weight = 0
  197. if err := i.UpdateDestination(s, d); err != nil && err != syscall.ENOENT {
  198. logrus.Errorf("Failed to set LB weight of real server %s to 0 for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
  199. }
  200. }
  201. if rmService {
  202. s.SchedName = ipvs.RoundRobin
  203. if err := i.DelService(s); err != nil && err != syscall.ENOENT {
  204. logrus.Errorf("Failed to delete service for vip %s fwmark %d in sbox %.7s (%.7s): %v", lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
  205. }
  206. if sb.ingress {
  207. var gwIP net.IP
  208. if ep := sb.getGatewayEndpoint(); ep != nil {
  209. gwIP = ep.Iface().Address().IP
  210. }
  211. if err := programIngress(gwIP, lb.service.ingressPorts, true); err != nil {
  212. logrus.Errorf("Failed to delete ingress: %v", err)
  213. }
  214. }
  215. if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, true, n.loadBalancerMode); err != nil {
  216. logrus.Errorf("Failed to delete firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
  217. }
  218. // Remove IP alias from the VIP to the endpoint
  219. ifName := findIfaceDstName(sb, ep)
  220. if ifName == "" {
  221. logrus.Errorf("Failed find interface name for endpoint %s(%s) to create LB alias", ep.ID(), ep.Name())
  222. return
  223. }
  224. err := sb.osSbox.RemoveAliasIP(ifName, &net.IPNet{IP: lb.vip, Mask: net.CIDRMask(32, 32)})
  225. if err != nil {
  226. logrus.Errorf("Failed add IP alias %s to network %s LB endpoint interface %s: %v", lb.vip, n.ID(), ifName, err)
  227. }
  228. }
  229. }
  230. const ingressChain = "DOCKER-INGRESS"
  231. var (
  232. ingressOnce sync.Once
  233. ingressMu sync.Mutex // lock for operations on ingress
  234. ingressProxyTbl = make(map[string]io.Closer)
  235. portConfigMu sync.Mutex
  236. portConfigTbl = make(map[PortConfig]int)
  237. )
  238. func filterPortConfigs(ingressPorts []*PortConfig, isDelete bool) []*PortConfig {
  239. portConfigMu.Lock()
  240. iPorts := make([]*PortConfig, 0, len(ingressPorts))
  241. for _, pc := range ingressPorts {
  242. if isDelete {
  243. if cnt, ok := portConfigTbl[*pc]; ok {
  244. // This is the last reference to this
  245. // port config. Delete the port config
  246. // and add it to filtered list to be
  247. // plumbed.
  248. if cnt == 1 {
  249. delete(portConfigTbl, *pc)
  250. iPorts = append(iPorts, pc)
  251. continue
  252. }
  253. portConfigTbl[*pc] = cnt - 1
  254. }
  255. continue
  256. }
  257. if cnt, ok := portConfigTbl[*pc]; ok {
  258. portConfigTbl[*pc] = cnt + 1
  259. continue
  260. }
  261. // We are adding it for the first time. Add it to the
  262. // filter list to be plumbed.
  263. portConfigTbl[*pc] = 1
  264. iPorts = append(iPorts, pc)
  265. }
  266. portConfigMu.Unlock()
  267. return iPorts
  268. }
  269. func programIngress(gwIP net.IP, ingressPorts []*PortConfig, isDelete bool) error {
  270. // TODO IPv6 support
  271. iptable := iptables.GetIptable(iptables.IPv4)
  272. addDelOpt := "-I"
  273. rollbackAddDelOpt := "-D"
  274. if isDelete {
  275. addDelOpt = "-D"
  276. rollbackAddDelOpt = "-I"
  277. }
  278. ingressMu.Lock()
  279. defer ingressMu.Unlock()
  280. chainExists := iptable.ExistChain(ingressChain, iptables.Nat)
  281. filterChainExists := iptable.ExistChain(ingressChain, iptables.Filter)
  282. ingressOnce.Do(func() {
  283. // Flush nat table and filter table ingress chain rules during init if it
  284. // exists. It might contain stale rules from previous life.
  285. if chainExists {
  286. if err := iptable.RawCombinedOutput("-t", "nat", "-F", ingressChain); err != nil {
  287. logrus.Errorf("Could not flush nat table ingress chain rules during init: %v", err)
  288. }
  289. }
  290. if filterChainExists {
  291. if err := iptable.RawCombinedOutput("-F", ingressChain); err != nil {
  292. logrus.Errorf("Could not flush filter table ingress chain rules during init: %v", err)
  293. }
  294. }
  295. })
  296. if !isDelete {
  297. if !chainExists {
  298. if err := iptable.RawCombinedOutput("-t", "nat", "-N", ingressChain); err != nil {
  299. return fmt.Errorf("failed to create ingress chain: %v", err)
  300. }
  301. }
  302. if !filterChainExists {
  303. if err := iptable.RawCombinedOutput("-N", ingressChain); err != nil {
  304. return fmt.Errorf("failed to create filter table ingress chain: %v", err)
  305. }
  306. }
  307. if !iptable.Exists(iptables.Nat, ingressChain, "-j", "RETURN") {
  308. if err := iptable.RawCombinedOutput("-t", "nat", "-A", ingressChain, "-j", "RETURN"); err != nil {
  309. return fmt.Errorf("failed to add return rule in nat table ingress chain: %v", err)
  310. }
  311. }
  312. if !iptable.Exists(iptables.Filter, ingressChain, "-j", "RETURN") {
  313. if err := iptable.RawCombinedOutput("-A", ingressChain, "-j", "RETURN"); err != nil {
  314. return fmt.Errorf("failed to add return rule to filter table ingress chain: %v", err)
  315. }
  316. }
  317. for _, chain := range []string{"OUTPUT", "PREROUTING"} {
  318. if !iptable.Exists(iptables.Nat, chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain) {
  319. if err := iptable.RawCombinedOutput("-t", "nat", "-I", chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain); err != nil {
  320. return fmt.Errorf("failed to add jump rule in %s to ingress chain: %v", chain, err)
  321. }
  322. }
  323. }
  324. if !iptable.Exists(iptables.Filter, "FORWARD", "-j", ingressChain) {
  325. if err := iptable.RawCombinedOutput("-I", "FORWARD", "-j", ingressChain); err != nil {
  326. return fmt.Errorf("failed to add jump rule to %s in filter table forward chain: %v", ingressChain, err)
  327. }
  328. arrangeUserFilterRule()
  329. }
  330. oifName, err := findOIFName(gwIP)
  331. if err != nil {
  332. return fmt.Errorf("failed to find gateway bridge interface name for %s: %v", gwIP, err)
  333. }
  334. path := filepath.Join("/proc/sys/net/ipv4/conf", oifName, "route_localnet")
  335. if err := os.WriteFile(path, []byte{'1', '\n'}, 0644); err != nil { //nolint:gosec // gosec complains about perms here, which must be 0644 in this case
  336. return fmt.Errorf("could not write to %s: %v", path, err)
  337. }
  338. ruleArgs := []string{"-m", "addrtype", "--src-type", "LOCAL", "-o", oifName, "-j", "MASQUERADE"}
  339. if !iptable.Exists(iptables.Nat, "POSTROUTING", ruleArgs...) {
  340. if err := iptable.RawCombinedOutput(append([]string{"-t", "nat", "-I", "POSTROUTING"}, ruleArgs...)...); err != nil {
  341. return fmt.Errorf("failed to add ingress localhost POSTROUTING rule for %s: %v", oifName, err)
  342. }
  343. }
  344. }
  345. // Filter the ingress ports until port rules start to be added/deleted
  346. filteredPorts := filterPortConfigs(ingressPorts, isDelete)
  347. rollbackRules := make([][]string, 0, len(filteredPorts)*3)
  348. var portErr error
  349. defer func() {
  350. if portErr != nil && !isDelete {
  351. filterPortConfigs(filteredPorts, !isDelete)
  352. for _, rule := range rollbackRules {
  353. if err := iptable.RawCombinedOutput(rule...); err != nil {
  354. logrus.Warnf("roll back rule failed, %v: %v", rule, err)
  355. }
  356. }
  357. }
  358. }()
  359. for _, iPort := range filteredPorts {
  360. var (
  361. protocol = strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)])
  362. publishedPort = strconv.FormatUint(uint64(iPort.PublishedPort), 10)
  363. destination = net.JoinHostPort(gwIP.String(), publishedPort)
  364. )
  365. if iptable.ExistChain(ingressChain, iptables.Nat) {
  366. rule := []string{"-t", "nat", addDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "DNAT", "--to-destination", destination}
  367. if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
  368. err := fmt.Errorf("set up rule failed, %v: %v", rule, portErr)
  369. if !isDelete {
  370. return err
  371. }
  372. logrus.Info(err)
  373. }
  374. rollbackRule := []string{"-t", "nat", rollbackAddDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "DNAT", "--to-destination", destination}
  375. rollbackRules = append(rollbackRules, rollbackRule)
  376. }
  377. // Filter table rules to allow a published service to be accessible in the local node from..
  378. // 1) service tasks attached to other networks
  379. // 2) unmanaged containers on bridge networks
  380. rule := []string{addDelOpt, ingressChain, "-m", "state", "-p", protocol, "--sport", publishedPort, "--state", "ESTABLISHED,RELATED", "-j", "ACCEPT"}
  381. if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
  382. err := fmt.Errorf("set up rule failed, %v: %v", rule, portErr)
  383. if !isDelete {
  384. return err
  385. }
  386. logrus.Warn(err)
  387. }
  388. rollbackRule := []string{rollbackAddDelOpt, ingressChain, "-m", "state", "-p", protocol, "--sport", publishedPort, "--state", "ESTABLISHED,RELATED", "-j", "ACCEPT"}
  389. rollbackRules = append(rollbackRules, rollbackRule)
  390. rule = []string{addDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "ACCEPT"}
  391. if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
  392. err := fmt.Errorf("set up rule failed, %v: %v", rule, portErr)
  393. if !isDelete {
  394. return err
  395. }
  396. logrus.Warn(err)
  397. }
  398. rollbackRule = []string{rollbackAddDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "ACCEPT"}
  399. rollbackRules = append(rollbackRules, rollbackRule)
  400. if err := plumbProxy(iPort, isDelete); err != nil {
  401. logrus.Warnf("failed to create proxy for port %s: %v", publishedPort, err)
  402. }
  403. }
  404. return nil
  405. }
  406. // In the filter table FORWARD chain the first rule should be to jump to
  407. // DOCKER-USER so the user is able to filter packet first.
  408. // The second rule should be jump to INGRESS-CHAIN.
  409. // This chain has the rules to allow access to the published ports for swarm tasks
  410. // from local bridge networks and docker_gwbridge (ie:taks on other swarm networks)
  411. func arrangeIngressFilterRule() {
  412. // TODO IPv6 support
  413. iptable := iptables.GetIptable(iptables.IPv4)
  414. if iptable.ExistChain(ingressChain, iptables.Filter) {
  415. if iptable.Exists(iptables.Filter, "FORWARD", "-j", ingressChain) {
  416. if err := iptable.RawCombinedOutput("-D", "FORWARD", "-j", ingressChain); err != nil {
  417. logrus.Warnf("failed to delete jump rule to ingressChain in filter table: %v", err)
  418. }
  419. }
  420. if err := iptable.RawCombinedOutput("-I", "FORWARD", "-j", ingressChain); err != nil {
  421. logrus.Warnf("failed to add jump rule to ingressChain in filter table: %v", err)
  422. }
  423. }
  424. }
  425. func findOIFName(ip net.IP) (string, error) {
  426. nlh := ns.NlHandle()
  427. routes, err := nlh.RouteGet(ip)
  428. if err != nil {
  429. return "", err
  430. }
  431. if len(routes) == 0 {
  432. return "", fmt.Errorf("no route to %s", ip)
  433. }
  434. // Pick the first route(typically there is only one route). We
  435. // don't support multipath.
  436. link, err := nlh.LinkByIndex(routes[0].LinkIndex)
  437. if err != nil {
  438. return "", err
  439. }
  440. return link.Attrs().Name, nil
  441. }
  442. func plumbProxy(iPort *PortConfig, isDelete bool) error {
  443. var (
  444. err error
  445. l io.Closer
  446. )
  447. portSpec := fmt.Sprintf("%d/%s", iPort.PublishedPort, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]))
  448. if isDelete {
  449. if listener, ok := ingressProxyTbl[portSpec]; ok {
  450. if listener != nil {
  451. listener.Close()
  452. }
  453. }
  454. return nil
  455. }
  456. switch iPort.Protocol {
  457. case ProtocolTCP:
  458. l, err = net.ListenTCP("tcp", &net.TCPAddr{Port: int(iPort.PublishedPort)})
  459. case ProtocolUDP:
  460. l, err = net.ListenUDP("udp", &net.UDPAddr{Port: int(iPort.PublishedPort)})
  461. case ProtocolSCTP:
  462. l, err = sctp.ListenSCTP("sctp", &sctp.SCTPAddr{Port: int(iPort.PublishedPort)})
  463. default:
  464. err = fmt.Errorf("unknown protocol %v", iPort.Protocol)
  465. }
  466. if err != nil {
  467. return err
  468. }
  469. ingressProxyTbl[portSpec] = l
  470. return nil
  471. }
  472. func writePortsToFile(ports []*PortConfig) (string, error) {
  473. f, err := os.CreateTemp("", "port_configs")
  474. if err != nil {
  475. return "", err
  476. }
  477. defer f.Close() //nolint:gosec
  478. buf, _ := proto.Marshal(&EndpointRecord{
  479. IngressPorts: ports,
  480. })
  481. n, err := f.Write(buf)
  482. if err != nil {
  483. return "", err
  484. }
  485. if n < len(buf) {
  486. return "", io.ErrShortWrite
  487. }
  488. return f.Name(), nil
  489. }
  490. func readPortsFromFile(fileName string) ([]*PortConfig, error) {
  491. buf, err := os.ReadFile(fileName)
  492. if err != nil {
  493. return nil, err
  494. }
  495. var epRec EndpointRecord
  496. err = proto.Unmarshal(buf, &epRec)
  497. if err != nil {
  498. return nil, err
  499. }
  500. return epRec.IngressPorts, nil
  501. }
  502. // Invoke fwmarker reexec routine to mark vip destined packets with
  503. // the passed firewall mark.
  504. func invokeFWMarker(path string, vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, isDelete bool, lbMode string) error {
  505. var ingressPortsFile string
  506. if len(ingressPorts) != 0 {
  507. var err error
  508. ingressPortsFile, err = writePortsToFile(ingressPorts)
  509. if err != nil {
  510. return err
  511. }
  512. defer os.Remove(ingressPortsFile)
  513. }
  514. addDelOpt := "-A"
  515. if isDelete {
  516. addDelOpt = "-D"
  517. }
  518. cmd := &exec.Cmd{
  519. Path: reexec.Self(),
  520. Args: append([]string{"fwmarker"}, path, vip.String(), fmt.Sprintf("%d", fwMark), addDelOpt, ingressPortsFile, eIP.String(), lbMode),
  521. Stdout: os.Stdout,
  522. Stderr: os.Stderr,
  523. }
  524. if err := cmd.Run(); err != nil {
  525. return fmt.Errorf("reexec failed: %v", err)
  526. }
  527. return nil
  528. }
  529. // Firewall marker reexec function.
  530. func fwMarker() {
  531. // TODO IPv6 support
  532. iptable := iptables.GetIptable(iptables.IPv4)
  533. runtime.LockOSThread()
  534. defer runtime.UnlockOSThread()
  535. if len(os.Args) < 8 {
  536. logrus.Error("invalid number of arguments..")
  537. os.Exit(1)
  538. }
  539. var ingressPorts []*PortConfig
  540. if os.Args[5] != "" {
  541. var err error
  542. ingressPorts, err = readPortsFromFile(os.Args[5])
  543. if err != nil {
  544. logrus.Errorf("Failed reading ingress ports file: %v", err)
  545. os.Exit(2)
  546. }
  547. }
  548. vip := os.Args[2]
  549. fwMark := os.Args[3]
  550. if _, err := strconv.ParseUint(fwMark, 10, 32); err != nil {
  551. logrus.Errorf("bad fwmark value(%s) passed: %v", fwMark, err)
  552. os.Exit(3)
  553. }
  554. addDelOpt := os.Args[4]
  555. rules := make([][]string, 0, len(ingressPorts))
  556. for _, iPort := range ingressPorts {
  557. var (
  558. protocol = strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)])
  559. publishedPort = strconv.FormatUint(uint64(iPort.PublishedPort), 10)
  560. )
  561. rule := []string{"-t", "mangle", addDelOpt, "PREROUTING", "-p", protocol, "--dport", publishedPort, "-j", "MARK", "--set-mark", fwMark}
  562. rules = append(rules, rule)
  563. }
  564. ns, err := netns.GetFromPath(os.Args[1])
  565. if err != nil {
  566. logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err)
  567. os.Exit(4)
  568. }
  569. defer ns.Close()
  570. if err := netns.Set(ns); err != nil {
  571. logrus.Errorf("setting into container net ns %v failed, %v", os.Args[1], err)
  572. os.Exit(5)
  573. }
  574. lbMode := os.Args[7]
  575. if addDelOpt == "-A" && lbMode == loadBalancerModeNAT {
  576. eIP, subnet, err := net.ParseCIDR(os.Args[6])
  577. if err != nil {
  578. logrus.Errorf("Failed to parse endpoint IP %s: %v", os.Args[6], err)
  579. os.Exit(6)
  580. }
  581. ruleParams := []string{"-m", "ipvs", "--ipvs", "-d", subnet.String(), "-j", "SNAT", "--to-source", eIP.String()}
  582. if !iptable.Exists("nat", "POSTROUTING", ruleParams...) {
  583. rule := append([]string{"-t", "nat", "-A", "POSTROUTING"}, ruleParams...)
  584. rules = append(rules, rule)
  585. err := os.WriteFile("/proc/sys/net/ipv4/vs/conntrack", []byte{'1', '\n'}, 0644)
  586. if err != nil {
  587. logrus.Errorf("Failed to write to /proc/sys/net/ipv4/vs/conntrack: %v", err)
  588. os.Exit(7)
  589. }
  590. }
  591. }
  592. rule := []string{"-t", "mangle", addDelOpt, "INPUT", "-d", vip + "/32", "-j", "MARK", "--set-mark", fwMark}
  593. rules = append(rules, rule)
  594. for _, rule := range rules {
  595. if err := iptable.RawCombinedOutputNative(rule...); err != nil {
  596. logrus.Errorf("set up rule failed, %v: %v", rule, err)
  597. os.Exit(8)
  598. }
  599. }
  600. }
  601. func addRedirectRules(path string, eIP *net.IPNet, ingressPorts []*PortConfig) error {
  602. var ingressPortsFile string
  603. if len(ingressPorts) != 0 {
  604. var err error
  605. ingressPortsFile, err = writePortsToFile(ingressPorts)
  606. if err != nil {
  607. return err
  608. }
  609. defer os.Remove(ingressPortsFile)
  610. }
  611. cmd := &exec.Cmd{
  612. Path: reexec.Self(),
  613. Args: append([]string{"redirector"}, path, eIP.String(), ingressPortsFile),
  614. Stdout: os.Stdout,
  615. Stderr: os.Stderr,
  616. }
  617. if err := cmd.Run(); err != nil {
  618. return fmt.Errorf("reexec failed: %v", err)
  619. }
  620. return nil
  621. }
  622. // Redirector reexec function.
  623. func redirector() {
  624. // TODO IPv6 support
  625. iptable := iptables.GetIptable(iptables.IPv4)
  626. runtime.LockOSThread()
  627. defer runtime.UnlockOSThread()
  628. if len(os.Args) < 4 {
  629. logrus.Error("invalid number of arguments..")
  630. os.Exit(1)
  631. }
  632. var ingressPorts []*PortConfig
  633. if os.Args[3] != "" {
  634. var err error
  635. ingressPorts, err = readPortsFromFile(os.Args[3])
  636. if err != nil {
  637. logrus.Errorf("Failed reading ingress ports file: %v", err)
  638. os.Exit(2)
  639. }
  640. }
  641. eIP, _, err := net.ParseCIDR(os.Args[2])
  642. if err != nil {
  643. logrus.Errorf("Failed to parse endpoint IP %s: %v", os.Args[2], err)
  644. os.Exit(3)
  645. }
  646. ipAddr := eIP.String()
  647. rules := make([][]string, 0, len(ingressPorts)*3) // 3 rules per port
  648. for _, iPort := range ingressPorts {
  649. var (
  650. protocol = strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)])
  651. publishedPort = strconv.FormatUint(uint64(iPort.PublishedPort), 10)
  652. targetPort = strconv.FormatUint(uint64(iPort.TargetPort), 10)
  653. )
  654. rules = append(rules,
  655. []string{"-t", "nat", "-A", "PREROUTING", "-d", ipAddr, "-p", protocol, "--dport", publishedPort, "-j", "REDIRECT", "--to-port", targetPort},
  656. // Allow only incoming connections to exposed ports
  657. []string{"-I", "INPUT", "-d", ipAddr, "-p", protocol, "--dport", targetPort, "-m", "conntrack", "--ctstate", "NEW,ESTABLISHED", "-j", "ACCEPT"},
  658. // Allow only outgoing connections from exposed ports
  659. []string{"-I", "OUTPUT", "-s", ipAddr, "-p", protocol, "--sport", targetPort, "-m", "conntrack", "--ctstate", "ESTABLISHED", "-j", "ACCEPT"},
  660. )
  661. }
  662. ns, err := netns.GetFromPath(os.Args[1])
  663. if err != nil {
  664. logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err)
  665. os.Exit(4)
  666. }
  667. defer ns.Close()
  668. if err := netns.Set(ns); err != nil {
  669. logrus.Errorf("setting into container net ns %v failed, %v", os.Args[1], err)
  670. os.Exit(5)
  671. }
  672. for _, rule := range rules {
  673. if err := iptable.RawCombinedOutputNative(rule...); err != nil {
  674. logrus.Errorf("set up rule failed, %v: %v", rule, err)
  675. os.Exit(6)
  676. }
  677. }
  678. if len(ingressPorts) == 0 {
  679. return
  680. }
  681. // Ensure blocking rules for anything else in/to ingress network
  682. for _, rule := range [][]string{
  683. {"-d", ipAddr, "-p", "sctp", "-j", "DROP"},
  684. {"-d", ipAddr, "-p", "udp", "-j", "DROP"},
  685. {"-d", ipAddr, "-p", "tcp", "-j", "DROP"},
  686. } {
  687. if !iptable.ExistsNative(iptables.Filter, "INPUT", rule...) {
  688. if err := iptable.RawCombinedOutputNative(append([]string{"-A", "INPUT"}, rule...)...); err != nil {
  689. logrus.Errorf("set up rule failed, %v: %v", rule, err)
  690. os.Exit(7)
  691. }
  692. }
  693. rule[0] = "-s"
  694. if !iptable.ExistsNative(iptables.Filter, "OUTPUT", rule...) {
  695. if err := iptable.RawCombinedOutputNative(append([]string{"-A", "OUTPUT"}, rule...)...); err != nil {
  696. logrus.Errorf("set up rule failed, %v: %v", rule, err)
  697. os.Exit(8)
  698. }
  699. }
  700. }
  701. }